linux-headers-5.4.0-2.11

This commit is contained in:
Alibek Omarov 2021-07-14 01:51:52 +03:00
parent f99e7af53c
commit f830966167
50 changed files with 3410 additions and 1963 deletions

View File

@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 4
SUBLEVEL = 91
EXTRAVERSION = -2.9
EXTRAVERSION = -2.11
NAME = Kleptomaniac Octopus
# *DOCUMENTATION*

View File

@ -77,6 +77,15 @@ extern void epic_send_IPI_mask_allbutself(const struct cpumask *mask,
extern void epic_wait_icr_idle(void);
extern void clear_cepic(void);
extern bool pcsm_adjust_enable;
struct pcs_handle {
void (*pcs_interrupt)(void);
};
extern void register_pcs_handle(const struct pcs_handle *handle);
extern void unregister_pcs_handle(void);
extern __visible void epic_smp_timer_interrupt(struct pt_regs *regs);
extern __visible void epic_smp_spurious_interrupt(struct pt_regs *regs);
extern __visible void epic_smp_error_interrupt(struct pt_regs *regs);

View File

@ -0,0 +1,852 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* include/asm-e2k/copy-hw-stacks.h
*
* Copyright 2021 mcst.ru
*/
#ifndef _E2K_COPY_HW_STACKS_H
#define _E2K_COPY_HW_STACKS_H
#include <linux/types.h>
#include <asm/mman.h>
#include <asm/pv_info.h>
#include <asm/process.h>
#include <asm/kvm/trace-hw-stacks.h>
#undef DEBUG_PV_UST_MODE
#undef DebugUST
#define DEBUG_PV_UST_MODE 0 /* guest user stacks debug */
#define DebugUST(fmt, args...) \
({ \
if (debug_guest_ust) \
pr_info("%s(): " fmt, __func__, ##args); \
})
#undef DEBUG_PV_SYSCALL_MODE
#define DEBUG_PV_SYSCALL_MODE 0 /* syscall injection debugging */
#if DEBUG_PV_UST_MODE || DEBUG_PV_SYSCALL_MODE
extern bool debug_guest_ust;
#else
#define debug_guest_ust false
#endif /* DEBUG_PV_UST_MODE || DEBUG_PV_SYSCALL_MODE */
#ifndef CONFIG_VIRTUALIZATION
/* it native kernel without virtualization support */
#else /* CONFIG_VIRTUALIZATION */
/* It is native host kernel with virtualization support */
/* or paravirtualized host and guest */
/* or native guest kernel
#include <asm/kvm/process.h>
*/
#endif /* ! CONFIG_VIRTUALIZATION */
typedef void (*trace_ps_frame_func_t)(kernel_mem_ps_t *base, kernel_mem_ps_t *frame);
typedef void (*trace_pcs_frame_func_t)(e2k_mem_crs_t *base, e2k_mem_crs_t *frame);
static inline void trace_proc_stack_frames(kernel_mem_ps_t *dst_ps_base,
kernel_mem_ps_t *src_ps_base, u64 ps_size,
trace_ps_frame_func_t trace_func)
{
int qreg, qreg_num;
kernel_mem_ps_t *dst_ps_frame, *src_ps_frame;
kernel_mem_ps_t rw;
qreg_num = ps_size / EXT_4_NR_SZ;
for (qreg = qreg_num - 1; qreg >= 0; qreg--) {
dst_ps_frame = &dst_ps_base[qreg];
src_ps_frame = &src_ps_base[qreg];
rw.word_lo = src_ps_frame->word_lo;
if (machine.native_iset_ver < E2K_ISET_V5) {
rw.word_hi = src_ps_frame->word_hi;
rw.ext_lo = src_ps_frame->ext_lo;
rw.ext_hi = src_ps_frame->ext_hi;
} else {
rw.word_hi = src_ps_frame->ext_lo;
rw.ext_lo = src_ps_frame->word_hi;
rw.ext_hi = src_ps_frame->ext_hi;
}
trace_func(dst_ps_frame, &rw);
}
}
static inline void trace_chain_stack_frames(e2k_mem_crs_t *dst_pcs_base,
e2k_mem_crs_t *src_pcs_base, u64 pcs_size,
trace_pcs_frame_func_t trace_func)
{
int crs_no, crs_num;
e2k_mem_crs_t *dst_pcs_frame, *src_pcs_frame;
e2k_mem_crs_t crs;
unsigned long flags;
crs_num = pcs_size / sizeof(crs);
raw_all_irq_save(flags);
for (crs_no = crs_num - 1; crs_no >= 0; crs_no--) {
dst_pcs_frame = &dst_pcs_base[crs_no];
src_pcs_frame = &src_pcs_base[crs_no];
crs = *src_pcs_frame;
trace_func(dst_pcs_frame, &crs);
}
raw_all_irq_restore(flags);
}
static inline void trace_host_hva_area(u64 *hva_base, u64 hva_size)
{
int line_no, line_num;
u64 *dst_hva_line;
unsigned long flags;
line_num = hva_size / (sizeof(u64) * 4);
raw_all_irq_save(flags);
for (line_no = line_num - 1; line_no >= 0; line_no--) {
dst_hva_line = &hva_base[line_no * 4];
trace_host_hva_area_line(dst_hva_line, (sizeof(u64) * 4));
}
if (line_num * (sizeof(u64) * 4) < hva_size) {
dst_hva_line = &hva_base[line_no * 4];
trace_host_hva_area_line(dst_hva_line,
hva_size - line_num * (sizeof(u64) * 4));
}
raw_all_irq_restore(flags);
}
static __always_inline void
native_kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size)
{
void *dst_tail;
const void *src_tail;
u64 copied;
int i;
/*
* Kernel does not use FP registers so do not copy them.
* This only applies to CPUs before V5 instruction set
* (since V5 FP registers become general-purpose QP registers).
*/
if (cpu_has(CPU_FEAT_QPREG)) {
#pragma loop count (10)
for (i = 0; i < size / 64; i++)
E2K_TAGGED_MEMMOVE_64(&dst[8 * i], &src[8 * i]);
copied = round_down(size, 64);
dst_tail = (void *) dst + copied;
src_tail = (void *) src + copied;
} else {
#pragma loop count (5)
for (i = 0; i < size / 128; i++)
E2K_TAGGED_MEMMOVE_128_RF_V2(&dst[16 * i],
&src[16 * i]);
copied = round_down(size, 128);
dst_tail = (void *) dst + copied;
src_tail = (void *) src + copied;
if (size & 64) {
E2K_TAGGED_MEMMOVE_64(dst_tail, src_tail);
dst_tail += 64;
src_tail += 64;
}
}
if (size & 32)
E2K_TAGGED_MEMMOVE_32(dst_tail, src_tail);
}
static __always_inline void
native_collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size)
{
e2k_pcsp_hi_t k_pcsp_hi;
u64 size;
int i;
DebugUST("current host chain stack index 0x%x, PCSHTP 0x%llx\n",
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_ind,
NATIVE_READ_PCSHTP_REG_SVALUE());
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
size = k_pcsp_hi.PCSP_hi_ind - spilled_size;
BUG_ON(!IS_ALIGNED(size, ALIGN_PCSTACK_TOP_SIZE) || (s64) size < 0);
#pragma loop count (2)
for (i = 0; i < size / 32; i++) {
u64 v0, v1, v2, v3;
v0 = src[4 * i];
v1 = src[4 * i + 1];
v2 = src[4 * i + 2];
v3 = src[4 * i + 3];
dst[4 * i] = v0;
dst[4 * i + 1] = v1;
dst[4 * i + 2] = v2;
dst[4 * i + 3] = v3;
}
k_pcsp_hi.PCSP_hi_ind -= spilled_size;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
DebugUST("move spilled chain part from host top %px to "
"bottom %px, size 0x%llx\n",
src, dst, size);
DebugUST("host kernel chain stack index is now 0x%x, "
"guest user PCSHTP 0x%llx\n",
k_pcsp_hi.PCSP_hi_ind, spilled_size);
}
static __always_inline void
native_collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size)
{
e2k_psp_hi_t k_psp_hi;
u64 size;
DebugUST("current host procedure stack index 0x%x, PSHTP 0x%x\n",
NATIVE_NV_READ_PSP_HI_REG().PSP_hi_ind,
NATIVE_NV_READ_PSHTP_REG().PSHTP_ind);
NATIVE_FLUSHR;
k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
size = k_psp_hi.PSP_hi_ind - spilled_size;
BUG_ON(!IS_ALIGNED(size, ALIGN_PSTACK_TOP_SIZE) || (s64) size < 0);
prefetchw_range(src, size);
native_kernel_hw_stack_frames_copy(dst, src, size);
k_psp_hi.PSP_hi_ind -= spilled_size;
NATIVE_NV_NOIRQ_WRITE_PSP_HI_REG(k_psp_hi);
DebugUST("move spilled procedure part from host top %px to "
"bottom %px, size 0x%llx\n",
src, dst, size);
DebugUST("host kernel procedure stack index is now 0x%x, "
"guest user PSHTP 0x%llx\n",
k_psp_hi.PSP_hi_ind, spilled_size);
}
#if defined(CONFIG_PARAVIRT_GUEST)
/* paravirtualized kernel (host and guest) */
#include <asm/paravirt/copy-hw-stacks.h>
#elif defined(CONFIG_KVM_GUEST_KERNEL)
/* It is native guest kernel (without paravirtualization) */
#include <asm/kvm/guest/copy-hw-stacks.h>
#elif defined(CONFIG_VIRTUALIZATION) || !defined(CONFIG_VIRTUALIZATION)
/* native kernel with virtualization support */
/* native kernel without virtualization support */
static __always_inline void
kernel_hw_stack_frames_copy(u64 *dst, const u64 *src, unsigned long size)
{
native_kernel_hw_stack_frames_copy(dst, src, size);
}
static __always_inline void
collapse_kernel_pcs(u64 *dst, const u64 *src, u64 spilled_size)
{
native_collapse_kernel_pcs(dst, src, spilled_size);
}
static __always_inline void
collapse_kernel_ps(u64 *dst, const u64 *src, u64 spilled_size)
{
native_collapse_kernel_ps(dst, src, spilled_size);
}
#else /* ??? */
#error "Undefined virtualization mode"
#endif /* CONFIG_PARAVIRT_GUEST */
static __always_inline u64 get_wsz(enum restore_caller from)
{
return NATIVE_READ_WD_REG().size >> 4;
}
static __always_inline u64 get_ps_clear_size(u64 cur_window_q,
e2k_pshtp_t pshtp)
{
s64 u_pshtp_size_q;
u_pshtp_size_q = GET_PSHTP_Q_INDEX(pshtp);
if (u_pshtp_size_q > E2K_MAXSR - cur_window_q)
u_pshtp_size_q = E2K_MAXSR - cur_window_q;
return E2K_MAXSR - (cur_window_q + u_pshtp_size_q);
}
static __always_inline s64 get_ps_copy_size(u64 cur_window_q, s64 u_pshtp_size)
{
return u_pshtp_size - (E2K_MAXSR - cur_window_q) * EXT_4_NR_SZ;
}
#ifdef CONFIG_CPU_HAS_FILL_INSTRUCTION
# define E2K_CF_MAX_FILL (E2K_CF_MAX_FILL_FILLC_q * 0x10)
#else
extern int cf_max_fill_return;
# define E2K_CF_MAX_FILL cf_max_fill_return
#endif
static __always_inline s64 get_pcs_copy_size(s64 u_pcshtp_size)
{
/* Before v6 it was possible to fill no more than 16 registers.
* Since E2K_MAXCR_q is much bigger than 16 we can be sure that
* there is enough space in CF for the FILL, so there is no
* need to take into account space taken by current window. */
return u_pcshtp_size - E2K_CF_MAX_FILL;
}
/*
* Copy hardware stack from user to *current* kernel stack.
* One has to be careful to avoid hardware FILL of this stack.
*/
static inline int __copy_user_to_current_hw_stack(void *dst, void __user *src,
unsigned long size, const pt_regs_t *regs, bool chain)
{
unsigned long min_flt, maj_flt, ts_flag;
if (likely(!host_test_intc_emul_mode(regs))) {
if (!__range_ok((unsigned long __force) src, size,
PAGE_OFFSET))
return -EFAULT;
}
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
/*
* Every page fault here has a chance of FILL'ing the frame
* that is being copied, in which case we repeat the copy.
*/
do {
min_flt = READ_ONCE(current->min_flt);
maj_flt = READ_ONCE(current->maj_flt);
if (chain)
E2K_FLUSHC;
else
E2K_FLUSHR;
SET_USR_PFAULT("$.recovery_memcpy_fault");
fast_tagged_memory_copy_from_user(dst, src, size, regs,
TAGGED_MEM_STORE_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
TAGGED_MEM_LOAD_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
true);
if (RESTORE_USR_PFAULT) {
clear_ts_flag(ts_flag);
return -EFAULT;
}
} while (unlikely(min_flt != READ_ONCE(current->min_flt) ||
maj_flt != READ_ONCE(current->maj_flt)));
clear_ts_flag(ts_flag);
return 0;
}
static inline int copy_user_to_current_hw_stack(void *dst, void __user *src,
unsigned long size, pt_regs_t *regs, bool chain)
{
unsigned long flags;
int ret;
raw_all_irq_save(flags);
ret = __copy_user_to_current_hw_stack(dst, src, size, regs, chain);
raw_all_irq_restore(flags);
return ret;
}
static inline int copy_e2k_stack_from_user(void *dst, void __user *src,
unsigned long size, pt_regs_t *regs)
{
unsigned long ts_flag;
int ret;
if (likely(!host_test_intc_emul_mode(regs))) {
if (!__range_ok((unsigned long __force) src, size, PAGE_OFFSET))
return -EFAULT;
}
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
ret = host_copy_from_user_with_tags(dst, src, size, regs);
clear_ts_flag(ts_flag);
return (ret) ? -EFAULT : 0;
}
static inline int copy_e2k_stack_to_user(void __user *dst, void *src,
unsigned long size, pt_regs_t *regs)
{
unsigned long ts_flag;
int ret;
if (likely(!host_test_intc_emul_mode(regs))) {
if (!__range_ok((unsigned long __force) dst, size, PAGE_OFFSET))
return -EFAULT;
}
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
ret = host_copy_to_user_with_tags(dst, src, size, regs);
clear_ts_flag(ts_flag);
return (ret) ? -EFAULT : 0;
}
static __always_inline int
user_hw_stack_frames_copy(void __user *dst, void *src, unsigned long copy_size,
const pt_regs_t *regs, unsigned long hw_stack_ind, bool is_pcsp)
{
unsigned long ts_flag;
if (unlikely(hw_stack_ind < copy_size)) {
unsigned long flags;
raw_all_irq_save(flags);
if (is_pcsp) {
E2K_FLUSHC;
} else {
E2K_FLUSHR;
}
raw_all_irq_restore(flags);
}
SET_USR_PFAULT("$.recovery_memcpy_fault");
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
fast_tagged_memory_copy_to_user(dst, src, copy_size, regs,
TAGGED_MEM_STORE_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT,
TAGGED_MEM_LOAD_REC_OPC |
MAS_BYPASS_L1_CACHE << LDST_REC_OPC_MAS_SHIFT, true);
clear_ts_flag(ts_flag);
if (RESTORE_USR_PFAULT) {
pr_err("process %s (%d) %s stack could not be copied "
"from %px to %px size 0x%lx (out of memory?)\n",
current->comm, current->pid,
(is_pcsp) ? "chain" : "procedure",
src, dst, copy_size);
return -EFAULT;
}
DebugUST("copying guest %s stack spilled to host from %px "
"to guest kernel stack from %px, size 0x%lx\n",
(is_pcsp) ? "chain" : "procedure", src, dst, copy_size);
return 0;
}
static __always_inline int
user_crs_frames_copy(e2k_mem_crs_t __user *u_frame, pt_regs_t *regs,
e2k_mem_crs_t *crs)
{
unsigned long ts_flag;
int ret;
ts_flag = set_ts_flag(TS_KERNEL_SYSCALL);
ret = host_copy_to_user(u_frame, crs, sizeof(*crs), regs);
clear_ts_flag(ts_flag);
if (unlikely(ret))
return -EFAULT;
return 0;
}
static __always_inline int user_psp_stack_copy(e2k_psp_lo_t u_psp_lo,
e2k_psp_hi_t u_psp_hi, s64 u_pshtp_size,
e2k_psp_lo_t k_psp_lo, e2k_psp_hi_t k_psp_hi,
unsigned long copy_size, const pt_regs_t *regs)
{
void __user *dst;
void *src;
int ret;
dst = (void __user *) (AS(u_psp_lo).base + AS(u_psp_hi).ind -
u_pshtp_size);
src = (void *) AS(k_psp_lo).base;
if (host_test_intc_emul_mode(regs) && trace_host_copy_hw_stack_enabled())
trace_host_copy_hw_stack(dst, src, copy_size, false);
ret = user_hw_stack_frames_copy(dst, src, copy_size,
regs, k_psp_hi.PSP_hi_ind, false);
if (host_test_intc_emul_mode(regs) && trace_host_proc_stack_frame_enabled())
trace_proc_stack_frames((kernel_mem_ps_t *)dst,
(kernel_mem_ps_t *)src, copy_size,
trace_host_proc_stack_frame);
return ret;
}
static __always_inline int user_pcsp_stack_copy(e2k_pcsp_lo_t u_pcsp_lo,
e2k_pcsp_hi_t u_pcsp_hi, s64 u_pcshtp_size,
e2k_pcsp_lo_t k_pcsp_lo, e2k_pcsp_hi_t k_pcsp_hi,
unsigned long copy_size, const pt_regs_t *regs)
{
void __user *dst;
void *src;
int ret;
dst = (void __user *)(AS(u_pcsp_lo).base + AS(u_pcsp_hi).ind -
u_pcshtp_size);
src = (void *) AS(k_pcsp_lo).base;
if (host_test_intc_emul_mode(regs) && trace_host_copy_hw_stack_enabled())
trace_host_copy_hw_stack(dst, src, copy_size, true);
ret = user_hw_stack_frames_copy(dst, src, copy_size,
regs, k_pcsp_hi.PCSP_hi_ind, true);
if (host_test_intc_emul_mode(regs) && trace_host_chain_stack_frame_enabled())
trace_chain_stack_frames((e2k_mem_crs_t *)dst,
(e2k_mem_crs_t *)src, copy_size,
trace_host_chain_stack_frame);
return ret;
}
/**
* user_hw_stacks_copy - copy user hardware stacks that have been
* SPILLed to kernel back to user space
* @stacks - saved user stack registers
* @cur_window_q - size of current window in procedure stack,
* needed only if @copy_full is not set
* @copy_full - set if want to copy _all_ of SPILLed stacks
*
* This does not update stacks->pshtp and stacks->pcshtp. Main reason is
* signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space is spilled to kernel space.
*/
static __always_inline int
native_user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
trap_pt_regs_t *trap = regs->trap;
e2k_psp_lo_t u_psp_lo = stacks->psp_lo,
k_psp_lo = current_thread_info()->k_psp_lo;
e2k_psp_hi_t u_psp_hi = stacks->psp_hi;
e2k_pcsp_lo_t u_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t u_pcsp_hi = stacks->pcsp_hi;
s64 u_pshtp_size, u_pcshtp_size, ps_copy_size, pcs_copy_size;
int ret;
u_pshtp_size = GET_PSHTP_MEM_INDEX(stacks->pshtp);
u_pcshtp_size = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
/*
* Copy user's part from kernel stacks into user stacks
* Update user's stack registers
*/
if (copy_full) {
pcs_copy_size = u_pcshtp_size;
ps_copy_size = u_pshtp_size;
} else {
pcs_copy_size = get_pcs_copy_size(u_pcshtp_size);
ps_copy_size = get_ps_copy_size(cur_window_q, u_pshtp_size);
/* Make sure there is enough space in CF for the FILL */
BUG_ON((E2K_MAXCR_q - 4) * 16 < E2K_CF_MAX_FILL);
}
if (likely(pcs_copy_size <= 0 && ps_copy_size <= 0))
return 0;
if (unlikely(pcs_copy_size > 0)) {
e2k_pcsp_hi_t k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
/* Since not all user data has been SPILL'ed it is possible
* that we have already overflown user's hardware stack. */
if (unlikely(AS(u_pcsp_hi).ind > AS(u_pcsp_hi).size)) {
ret = handle_chain_stack_bounds(stacks, trap);
if (unlikely(ret)) {
pr_warning("process %s (%d) chain stack overflow (out of memory?)\n",
current->comm, current->pid);
return ret;
}
u_pcsp_lo = stacks->pcsp_lo;
u_pcsp_hi = stacks->pcsp_hi;
}
ret = user_pcsp_stack_copy(u_pcsp_lo, u_pcsp_hi, u_pcshtp_size,
k_pcsp_lo, k_pcsp_hi, pcs_copy_size, regs);
if (ret)
return ret;
}
if (unlikely(ps_copy_size > 0)) {
e2k_psp_hi_t k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
/* Since not all user data has been SPILL'ed it is possible
* that we have already overflowed user's hardware stack. */
if (unlikely(AS(u_psp_hi).ind > AS(u_psp_hi).size)) {
ret = handle_proc_stack_bounds(stacks, trap);
if (unlikely(ret)) {
pr_warning("process %s (%d) procedure stack overflow (out of memory?)\n",
current->comm, current->pid);
return ret;
}
u_psp_lo = stacks->psp_lo;
u_psp_hi = stacks->psp_hi;
}
ret = user_psp_stack_copy(u_psp_lo, u_psp_hi, u_pshtp_size,
k_psp_lo, k_psp_hi, ps_copy_size, regs);
if (ret)
return ret;
}
return 0;
}
static inline void collapse_kernel_hw_stacks(struct e2k_stacks *stacks)
{
e2k_pcsp_lo_t k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_psp_lo_t k_psp_lo = current_thread_info()->k_psp_lo;
unsigned long flags, spilled_pc_size, spilled_p_size;
e2k_pshtp_t pshtp = stacks->pshtp;
u64 *dst;
const u64 *src;
spilled_pc_size = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
spilled_p_size = GET_PSHTP_MEM_INDEX(pshtp);
DebugUST("guest user spilled to host kernel stack part: chain 0x%lx "
"procedure 0x%lx\n",
spilled_pc_size, spilled_p_size);
/* When user tries to return from the last user frame
* we will have pcshtp = pcsp_hi.ind = 0. But situation
* with pcsp_hi.ind != 0 and pcshtp = 0 is impossible. */
if (WARN_ON_ONCE(spilled_pc_size < SZ_OF_CR &&
AS(stacks->pcsp_hi).ind != 0))
do_exit(SIGKILL);
/* Keep the last user frame (see user_hw_stacks_copy_full()) */
if (spilled_pc_size >= SZ_OF_CR) {
spilled_pc_size -= SZ_OF_CR;
DebugUST("Keep the prev user chain frame, so spilled chain "
"size is now 0x%lx\n",
spilled_pc_size);
}
raw_all_irq_save(flags);
if (spilled_pc_size) {
dst = (u64 *) AS(k_pcsp_lo).base;
src = (u64 *) (AS(k_pcsp_lo).base + spilled_pc_size);
collapse_kernel_pcs(dst, src, spilled_pc_size);
stacks->pcshtp = SZ_OF_CR;
apply_graph_tracer_delta(-spilled_pc_size);
}
if (spilled_p_size) {
dst = (u64 *) AS(k_psp_lo).base;
src = (u64 *) (AS(k_psp_lo).base + spilled_p_size);
collapse_kernel_ps(dst, src, spilled_p_size);
AS(pshtp).ind = 0;
stacks->pshtp = pshtp;
}
raw_all_irq_restore(flags);
}
/**
* user_hw_stacks_prepare - prepare user hardware stacks that have been
* SPILLed to kernel back to user space
* @stacks - saved user stack registers
* @cur_window_q - size of current window in procedure stack,
* needed only if @copy_full is not set
* @syscall - true if called upon direct system call exit (no signal handlers)
*
* This does two things:
*
* 1) It is possible that upon kernel entry pcshtp == 0 in some cases:
* - user signal handler had pcshtp==0x20 before return to sigreturn()
* - user context had pcshtp==0x20 before return to makecontext_trampoline()
* - chain stack underflow happened
* So it is possible in sigreturn() and traps, but not in system calls.
* If we are using the trick with return to FILL user hardware stacks than
* we must have frame in chain stack to return to. So in this case kernel's
* chain stack is moved up by one frame (0x20 bytes).
* We also fill the new frame with actual user data and update stacks->pcshtp,
* this is needed to keep the coherent state where saved stacks->pcshtp values
* shows how much data from user space has been spilled to kernel space.
*
* 2) It is not possible to always FILL all of user data that have been
* SPILLed to kernel stacks. So we manually copy the leftovers that can
* not be FILLed to user space.
* This copy does not update stacks->pshtp and stacks->pcshtp. Main reason
* is signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space has been spilled to kernel space.
*/
static __always_inline void native_user_hw_stacks_prepare(
struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
e2k_pcshtp_t u_pcshtp = stacks->pcshtp;
int ret;
BUG_ON(from & FROM_PV_VCPU_MODE);
/*
* 1) Make sure there is free space in kernel chain stack to return to
*/
if (!syscall && u_pcshtp == 0) {
unsigned long flags;
e2k_pcsp_lo_t u_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t u_pcsp_hi = stacks->pcsp_hi, k_pcsp_hi;
e2k_mem_crs_t __user *u_cframe;
e2k_mem_crs_t *k_crs;
u64 u_cbase;
int ret = -EINVAL;
raw_all_irq_save(flags);
E2K_FLUSHC;
k_pcsp_hi = READ_PCSP_HI_REG();
BUG_ON(AS(k_pcsp_hi).ind);
AS(k_pcsp_hi).ind += SZ_OF_CR;
WRITE_PCSP_HI_REG(k_pcsp_hi);
k_crs = (e2k_mem_crs_t *) AS(k_pcsp_lo).base;
u_cframe = (e2k_mem_crs_t __user *) (AS(u_pcsp_lo).base +
AS(u_pcsp_hi).ind);
u_cbase = ((from & FROM_RETURN_PV_VCPU_TRAP) ||
host_test_intc_emul_mode(regs)) ?
u_pcsp_lo.PCSP_lo_base :
(u64) CURRENT_PCS_BASE();
if ((u64) u_cframe > u_cbase) {
ret = __copy_user_to_current_hw_stack(k_crs,
u_cframe - 1, sizeof(*k_crs), regs, true);
}
raw_all_irq_restore(flags);
/* Can happen if application returns until runs out of
* chain stack or there is no free memory for stacks.
* There is no user stack to return to - die. */
if (ret) {
SIGDEBUG_PRINT("SIGKILL. %s\n",
(ret == -EINVAL) ? "tried to return to kernel" :
"ran into Out-of-Memory on user stacks");
force_sig(SIGKILL);
return;
}
if (AS(u_pcsp_hi).ind < SZ_OF_CR) {
update_pcsp_regs(AS(u_pcsp_lo).base,
&u_pcsp_lo, &u_pcsp_hi);
stacks->pcsp_lo = u_pcsp_lo;
stacks->pcsp_hi = u_pcsp_hi;
BUG_ON(AS(u_pcsp_hi).ind < SZ_OF_CR);
}
u_pcshtp = SZ_OF_CR;
stacks->pcshtp = u_pcshtp;
}
/*
* 2) Copy user data that cannot be FILLed
*/
ret = native_user_hw_stacks_copy(stacks, regs, cur_window_q, false);
if (unlikely(ret))
do_exit(SIGKILL);
}
#ifndef CONFIG_VIRTUALIZATION
/* native kernel without virtualization support */
static __always_inline int
user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
return native_user_hw_stacks_copy(stacks, regs, cur_window_q, copy_full);
}
static __always_inline void
host_user_hw_stacks_prepare(struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
native_user_hw_stacks_prepare(stacks, regs, cur_window_q,
from, syscall);
}
#elif defined(CONFIG_KVM_GUEST_KERNEL)
/* It is native guest kernel (without paravirtualization) */
#include <asm/kvm/guest/copy-hw-stacks.h>
#elif defined(CONFIG_PARAVIRT_GUEST)
/* It is paravirtualized kernel (host and guest) */
#include <asm/paravirt/copy-hw-stacks.h>
#elif defined(CONFIG_KVM_HOST_MODE)
/* It is host kernel with virtualization support */
#include <asm/kvm/copy-hw-stacks.h>
#else /* unknow mode */
#error "unknown virtualization mode"
#endif /* !CONFIG_VIRTUALIZATION */
/**
* user_hw_stacks_copy_full - copy part of user stacks that was SPILLed
* into kernel back to user stacks.
* @stacks - saved user stack registers
* @regs - pt_regs pointer
* @crs - last frame to copy
*
* If @crs is not NULL then the frame pointed to by it will also be copied
* to userspace. Note that 'stacks->pcsp_hi.ind' is _not_ updated after
* copying since it would leave stack in inconsistent state (with two
* copies of the same @crs frame), this is left to the caller. *
*
* Inlining this reduces the amount of memory to copy in
* collapse_kernel_hw_stacks().
*/
static inline int do_user_hw_stacks_copy_full(struct e2k_stacks *stacks,
pt_regs_t *regs, e2k_mem_crs_t *crs)
{
int ret;
/*
* Copy part of user stacks that were SPILLed into kernel stacks
*/
ret = user_hw_stacks_copy(stacks, regs, 0, true);
if (unlikely(ret))
return ret;
/*
* Nothing to FILL so remove the resulting hole from kernel stacks.
*
* IMPORTANT: there is always at least one user frame at the top of
* kernel stack - the one that issued a system call (in case of an
* exception we uphold this rule manually, see user_hw_stacks_prepare())
* We keep this ABI and _always_ leave space for one user frame,
* this way we can later FILL using return trick (otherwise there
* would be no space in chain stack for the trick).
*/
collapse_kernel_hw_stacks(stacks);
/*
* Copy saved %cr registers
*
* Caller must take care of filling of resulting hole
* (last user frame from pcshtp == SZ_OF_CR).
*/
if (crs) {
e2k_mem_crs_t __user *u_frame;
int ret;
u_frame = (void __user *) (AS(stacks->pcsp_lo).base +
AS(stacks->pcsp_hi).ind);
ret = user_crs_frames_copy(u_frame, regs, &regs->crs);
if (unlikely(ret))
return ret;
}
return 0;
}
#endif /* _E2K_COPY_HW_STACKS_H */

View File

@ -2241,6 +2241,7 @@ typedef union e2k_fpsr {
#define FPSR_reg word
typedef union {
u32 half_word[2];
struct {
u32 user : 1;
u32 system : 1;

View File

@ -1038,19 +1038,26 @@ _Pragma("no_asm_inline") \
: clobbers); \
})
#define NATIVE_EXIT_HANDLE_SYSCALL(sbr, usd_hi, usd_lo, upsr) \
({ \
asm volatile ("{rwd %0, %%sbr}" \
"{rwd %1, %%usd.hi}" \
asm volatile (ALTERNATIVE_1_ALTINSTR \
/* CPU_HWBUG_USD_ALIGNMENT version */ \
"{rwd %0, %%sbr;" \
" nop}" \
ALTERNATIVE_2_OLDINSTR \
/* Default version */ \
"{rwd %0, %%sbr}" \
ALTERNATIVE_3_FEATURE(%[facility]) \
"{rwd %2, %%usd.lo}" \
"{rwd %1, %%usd.hi}" \
"{rws %3, %%upsr;" \
" nop 4}\n" \
: \
: "ri" ((__e2k_u64_t) (sbr)), \
"ri" ((__e2k_u64_t) (usd_hi)), \
"ri" ((__e2k_u64_t) (usd_lo)), \
"ri" ((__e2k_u32_t) (upsr))); \
"ri" ((__e2k_u32_t) (upsr)), \
[facility] "i" (CPU_HWBUG_USD_ALIGNMENT)); \
})
@ -1093,6 +1100,15 @@ _Pragma("no_asm_inline") \
: "r" ((__e2k_u64_t) (val))); \
})
#define NATIVE_SET_MMUREG_CLOSED(reg_mnemonic, val, nop) \
({ \
asm volatile ("{nop " #nop "\n" \
" mmurw %0, %%" #reg_mnemonic "}" \
: \
: "r" ((u64) (val))); \
})
#define NATIVE_TAGGED_LOAD_TO_MMUREG(reg_mnemonic, _addr) \
do { \
unsigned long long _tmp; \

View File

@ -23,8 +23,6 @@
#include <asm/e2k.h>
#include <asm/pgtable_def.h>
#define CHK_DEBUGGER(trapnr, signr, error_code, address, regs, after)
#define IS_KERNEL_THREAD(task, mm) \
({ \
e2k_addr_t ps_base; \
@ -248,6 +246,8 @@ host_ftrace_dump(void)
{
return;
}
static const bool kvm_debug = false;
#else /* CONFIG_VIRTUALIZATION */
/* it is native host kernel with virtualization support */
/* or it is paravirtualized host/guest kernel */

View File

@ -11,6 +11,8 @@
#ifndef __ASSEMBLY__
void do_sic_error_interrupt(void);
static inline bool cpu_has_epic(void)
{
if (cpu_has(CPU_FEAT_EPIC))

View File

@ -35,18 +35,12 @@ extern int hw_breakpoint_exceptions_notify(
extern void hw_breakpoint_pmu_read(struct perf_event *bp);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
extern int bp_data_overflow_handle(struct pt_regs *regs);
extern int bp_instr_overflow_handle(struct pt_regs *regs);
extern void bp_data_overflow_handle(struct pt_regs *regs);
extern void bp_instr_overflow_handle(struct pt_regs *regs);
extern void clear_ptrace_hw_breakpoint(struct task_struct *tsk);
#else /* ! CONFIG_HAVE_HW_BREAKPOINT */
static inline int bp_data_overflow_handle(struct pt_regs *regs)
{
return 0;
}
static inline int bp_instr_overflow_handle(struct pt_regs *regs)
{
return 0;
}
static inline void bp_data_overflow_handle(struct pt_regs *regs) { }
static inline void bp_instr_overflow_handle(struct pt_regs *regs) { }
static inline void clear_ptrace_hw_breakpoint(struct task_struct *tsk) {}
#endif /* CONFIG_HAVE_HW_BREAKPOINT */

View File

@ -61,18 +61,14 @@ static inline int is_kprobe_break1_trap(struct pt_regs *regs)
return *instr == KPROBE_BREAK_1;
}
extern int kprobe_instr_debug_handle(struct pt_regs *);
extern void kprobe_instr_debug_handle(struct pt_regs *);
#else
static inline int is_kprobe_break1_trap(struct pt_regs *regs)
{
return false;
}
static inline int kprobe_instr_debug_handle(struct pt_regs *regs)
{
return 0;
}
static inline void kprobe_instr_debug_handle(struct pt_regs *regs) { }
#endif /* #ifdef CONFIG_KPROBES */
#ifdef CONFIG_KRETPROBES

View File

@ -0,0 +1,462 @@
/*
* KVM guest kernel processes support
* Copyright 2011 Salavat S. Guiliazov (atic@mcst.ru)
*/
#ifndef _E2K_KVM_COPY_HW_STACKS_H
#define _E2K_KVM_COPY_HW_STACKS_H
#include <linux/kvm_host.h>
#include <asm/cpu_regs.h>
#include <asm/regs_state.h>
#include <asm/kvm/thread_info.h>
#include <asm/kvm/mmu.h>
#include <asm/kvm/page.h>
#include <asm/kvm/switch.h>
#undef DEBUG_KVM_GUEST_STACKS_MODE
#undef DebugGUST
#define DEBUG_KVM_GUEST_STACKS_MODE 0 /* guest user stacks */
/* copy debug */
#define DebugGUST(fmt, args...) \
({ \
if (DEBUG_KVM_GUEST_STACKS_MODE) \
pr_info("%s(): " fmt, __func__, ##args); \
})
#ifdef CONFIG_KVM_HOST_MODE
static inline void
prepare_pv_vcpu_inject_stacks(struct kvm_vcpu *vcpu, pt_regs_t *regs)
{
e2k_stacks_t *stacks, *g_stacks;
gthread_info_t *gti = pv_vcpu_get_gti(vcpu);
if (regs->g_stacks_valid) {
/* already prepared */
return;
}
/* all stacks at empty state, because of guest user recursion */
/* of trap/system calls can not be */
g_stacks = &regs->g_stacks;
g_stacks->usd_lo = gti->g_usd_lo;
g_stacks->usd_hi = gti->g_usd_hi;
g_stacks->top = gti->g_sbr.SBR_base;
g_stacks->psp_lo = gti->g_psp_lo;
g_stacks->psp_hi = gti->g_psp_hi;
g_stacks->pcsp_lo = gti->g_pcsp_lo;
g_stacks->pcsp_hi = gti->g_pcsp_hi;
/* pshtp & pcshtp from guest user stack real state upon trap/syscall */
stacks = &regs->stacks;
g_stacks->pshtp = stacks->pshtp;
g_stacks->pcshtp = stacks->pcshtp;
regs->g_stacks_valid = true;
regs->g_stacks_active = false;
regs->need_inject = false;
}
#undef EMULATE_EMPTY_CHAIN_STACK /* only to debug */
#ifdef EMULATE_EMPTY_CHAIN_STACK
static __always_inline void
pv_vcpu_emulate_empty_chain_staks(struct kvm_vcpu *vcpu, pt_regs_t *regs,
e2k_stacks_t *stacks, bool guest_user)
{
e2k_pcshtp_t pcshtp;
unsigned long flags;
e2k_pcsp_lo_t g_pcsp_lo, k_pcsp_lo;
e2k_pcsp_hi_t g_pcsp_hi, k_pcsp_hi;
e2k_mem_crs_t __user *g_cframe;
e2k_mem_crs_t *k_crs;
int ret;
pcshtp = stacks->pcshtp;
if (!(guest_user && pcshtp <= 0x40))
return;
g_pcsp_lo = regs->stacks.pcsp_lo;
g_pcsp_hi = regs->stacks.pcsp_hi;
raw_all_irq_save(flags);
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
k_pcsp_lo = NATIVE_NV_READ_PCSP_LO_REG();
BUG_ON(AS(k_pcsp_hi).ind != pcshtp);
k_crs = (e2k_mem_crs_t *) AS(k_pcsp_lo).base;
g_cframe = (e2k_mem_crs_t __user *) (AS(g_pcsp_lo).base +
AS(g_pcsp_hi).ind - pcshtp);
ret = user_hw_stack_frames_copy(g_cframe, k_crs, pcshtp, regs,
k_pcsp_hi.PCSP_hi_ind, true);
if (ret) {
pr_err("%s(): copy to user stack failed\n", __func__);
BUG_ON(true);
}
k_pcsp_hi.PCSP_hi_ind -= pcshtp;
pcshtp = 0;
regs->stacks.pcshtp = pcshtp;
stacks->pcshtp = pcshtp;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
raw_all_irq_restore(flags);
}
#else /* !EMULATE_EMPTY_CHAIN_STACK */
static __always_inline void
pv_vcpu_emulate_empty_chain_staks(struct kvm_vcpu *vcpu, pt_regs_t *regs,
e2k_stacks_t *stacks, bool guest_user)
{
}
#endif /* EMULATE_EMPTY_CHAIN_STACK */
/**
* pv_vcpu_user_hw_stacks_copy - check size of user hardware stacks that have
* been SPILLed to kernel back to guest space
* @regs - saved guest user stack registers
* @cur_window_q - size of current window in procedure stack
*
* All guest user's stacks part were already copied to guest kernel stacks,
* so it need only check that it was full size and nothing to copy here
*/
static __always_inline int
pv_vcpu_user_hw_stacks_copy(pt_regs_t *regs, e2k_stacks_t *stacks,
u64 cur_window_q, bool guest_user)
{
e2k_psp_lo_t g_psp_lo = stacks->psp_lo,
k_psp_lo = current_thread_info()->k_psp_lo;
e2k_psp_hi_t g_psp_hi = stacks->psp_hi;
e2k_pcsp_lo_t g_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t g_pcsp_hi = stacks->pcsp_hi;
s64 g_pshtp_size, g_pcshtp_size, ps_copy_size, pcs_copy_size;
int ret;
DebugUST("guest kernel chain state: base 0x%llx ind 0x%x size 0x%x\n",
g_pcsp_lo.PCSP_lo_base, g_pcsp_hi.PCSP_hi_ind,
g_pcsp_hi.PCSP_hi_size);
DebugUST("guest kernel proc state: base 0x%llx ind 0x%x size 0x%x\n",
g_psp_lo.PSP_lo_base, g_psp_hi.PSP_hi_ind,
g_psp_hi.PSP_hi_size);
g_pshtp_size = GET_PSHTP_MEM_INDEX(stacks->pshtp);
g_pcshtp_size = PCSHTP_SIGN_EXTEND(stacks->pcshtp);
DebugUST("guest kernel chain stack PCSHTP 0x%llx, "
"proc stack PSHTP 0x%llx cur window 0x%llx\n",
g_pcshtp_size, g_pshtp_size, cur_window_q);
/*
* FIXME: the current implementation of the guest user signal handler
* injection uses direct copying to guest hardware stacks.
* It is bad decision, needs to be corrected
KVM_BUG_ON(is_paging(current_thread_info()->vcpu) &&
(g_psp_lo.PSP_lo_base < GUEST_TASK_SIZE ||
g_pcsp_lo.PCSP_lo_base < GUEST_TASK_SIZE));
*/
/*
* Calculate size of user's part to copy from kernel stacks
* into guest kernel stacks
*/
pcs_copy_size = get_pcs_copy_size(g_pcshtp_size);
ps_copy_size = get_ps_copy_size(cur_window_q, g_pshtp_size);
/* Make sure there is enough space in CF for the FILL */
BUG_ON((E2K_MAXCR_q - 4) * 16 < E2K_CF_MAX_FILL);
DebugUST("to copy chain stack 0x%llx, proc stack 0x%llx\n",
pcs_copy_size, ps_copy_size);
if (likely(pcs_copy_size <= 0 && ps_copy_size <= 0))
return 0;
if (unlikely(pcs_copy_size > 0)) {
e2k_pcsp_hi_t k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
void __user *dst;
void *src;
/* Since SPILL'ed guest user data will be copyed to guest */
/* kernel stacks then cannot be any overflow of user's */
/* hardware stack. */
if (unlikely(AS(g_pcsp_hi).ind > AS(g_pcsp_hi).size)) {
pr_err("%s(): guest kernel chain stack overflow "
"(out of memory?): ind 0x%x size 0x%x\n",
__func__, g_pcsp_hi.PCSP_hi_ind,
g_pcsp_hi.PCSP_hi_size);
KVM_BUG_ON(true);
}
dst = (void __user *)(g_pcsp_lo.PCSP_lo_base +
g_pcsp_hi.PCSP_hi_ind);
if (!guest_user) {
/* stack index has been incremented on PCSHTP */
dst -= g_pcshtp_size;
}
src = (void *)k_pcsp_lo.PCSP_lo_base;
if (trace_host_copy_hw_stack_enabled())
trace_host_copy_hw_stack(dst, src, pcs_copy_size, true);
ret = user_hw_stack_frames_copy(dst, src, pcs_copy_size, regs,
k_pcsp_hi.PCSP_hi_ind, true);
if (trace_host_chain_stack_frame_enabled())
trace_chain_stack_frames((e2k_mem_crs_t *)dst,
(e2k_mem_crs_t *)src, pcs_copy_size,
trace_host_chain_stack_frame);
if (ret)
return ret;
if (guest_user) {
g_pcsp_hi.PCSP_hi_ind += pcs_copy_size;
stacks->pcsp_hi = g_pcsp_hi;
DebugGUST("guest user chain stack frames copied from "
"host %px to guest kernel from %px size 0x%llx "
"PCSP.ind 0x%x\n",
src, dst, pcs_copy_size, g_pcsp_hi.PCSP_hi_ind);
}
}
if (unlikely(ps_copy_size > 0)) {
e2k_psp_hi_t k_psp_hi = NATIVE_NV_READ_PSP_HI_REG();
void __user *dst;
void *src;
/* Since SPILL'ed guest user data will be copyed to guest */
/* kernel stacks then cannot be any overflow of user's */
/* hardware stack. */
if (unlikely(AS(g_psp_hi).ind > AS(g_psp_hi).size)) {
pr_err("%s(): guest kernel proc stack overflow "
"(out of memory?): ind 0x%x size 0x%x\n",
__func__, g_psp_hi.PSP_hi_ind,
g_psp_hi.PSP_hi_size);
KVM_BUG_ON(true);
}
dst = (void __user *)(g_psp_lo.PSP_lo_base +
g_psp_hi.PSP_hi_ind);
if (!guest_user) {
/* stack index has been incremented on PSHTP */
dst -= g_pshtp_size;
}
src = (void *)k_psp_lo.PSP_lo_base;
if (trace_host_copy_hw_stack_enabled())
trace_host_copy_hw_stack(dst, src, ps_copy_size, false);
ret = user_hw_stack_frames_copy(dst, src, ps_copy_size, regs,
k_psp_hi.PSP_hi_ind, false);
if (trace_host_proc_stack_frame_enabled())
trace_proc_stack_frames((kernel_mem_ps_t *)dst,
(kernel_mem_ps_t *)src, ps_copy_size,
trace_host_proc_stack_frame);
if (ret)
return ret;
if (guest_user) {
g_psp_hi.PSP_hi_ind += ps_copy_size;
stacks->psp_hi = g_psp_hi;
DebugGUST("guest user proc stack frames copied from "
"host %px to guest kernel from %px size 0x%llx "
"PSP.ind 0x%x\n",
src, dst, ps_copy_size, g_psp_hi.PSP_hi_ind);
}
}
return 0;
}
/**
* pv_vcpu_user_hw_stacks_prepare - prepare guest user hardware stacks
that have been SPILLed to kernel back
to guest user space
* @regs - saved guest user stack registers
* @cur_window_q - size of current window in procedure stack
* @syscall - true if called upon direct system call exit (no signal handlers)
*
* This does two things:
*
* 1) It is possible that upon kernel entry pcshtp == 0 in some cases:
* - user signal handler had pcshtp==0x20 before return to sigreturn()
* - user context had pcshtp==0x20 before return to makecontext_trampoline()
* - chain stack underflow happened
* So it is possible in sigreturn() and traps, but not in system calls.
* If we are using the trick with return to FILL user hardware stacks than
* we must have frame in chain stack to return to. So in this case kernel's
* chain stack is moved up by one frame (0x20 bytes).
* We also fill the new frame with actual user data and update stacks->pcshtp,
* this is needed to keep the coherent state where saved stacks->pcshtp values
* shows how much data from user space has been spilled to kernel space.
*
* 2) It is not possible to always FILL all of user data that have been
* SPILLed to kernel stacks. So we manually copy the leftovers that can
* not be FILLed to user space.
* This copy does not update stacks->pshtp and stacks->pcshtp. Main reason
* is signals: if a signal arrives after copying then it must see a coherent
* state where saved stacks->pshtp and stacks->pcshtp values show how much
* data from user space has been spilled to kernel space.
*/
static __always_inline void
pv_vcpu_user_hw_stacks_prepare(struct kvm_vcpu *vcpu, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
e2k_stacks_t *stacks;
e2k_pcshtp_t pcshtp;
bool guest_user;
bool paging = is_paging(vcpu);
int ret;
if (likely(paging)) {
guest_user = !!(syscall || !pv_vcpu_trap_on_guest_kernel(regs));
} else {
guest_user = false;
}
if (guest_user) {
if (from & FROM_PV_VCPU_MODE) {
/* all preparation has been made */
/* by host & guest handler */
return;
}
/* trap on/syscall from guest user, so regs keeps user */
/* registers state and it need use guest kernel stacks */
/* in empty state to handle this trap/syscall */
if (!regs->g_stacks_valid) {
prepare_pv_vcpu_inject_stacks(vcpu, regs);
}
stacks = &regs->g_stacks;
} else {
/* trap on guest kernel, so regs already points to guest */
/* kernel stacks and trap will be handled by host */
/* same as other user's processes traps */
stacks = &regs->stacks;
}
/* only to debug on simulator : pcshtp == 0 */
pv_vcpu_emulate_empty_chain_staks(vcpu, regs, stacks, guest_user);
pcshtp = stacks->pcshtp;
DebugUST("guest kernel chain stack state: base 0x%llx ind 0x%x "
"size 0x%x\n",
stacks->pcsp_lo.PCSP_lo_base,
stacks->pcsp_hi.PCSP_hi_ind,
stacks->pcsp_hi.PCSP_hi_size);
DebugUST("host kernel chain stack state: base 0x%llx ind 0x%x "
"size 0x%x\n",
NATIVE_NV_READ_PCSP_LO_REG().PCSP_lo_base,
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_ind,
NATIVE_NV_READ_PCSP_HI_REG().PCSP_hi_size);
DebugUST("guest kernel chain stack size to fill PCSHTP 0x%x\n",
pcshtp);
/*
* 1) Make sure there is free space in kernel chain stack to return to
*/
if (!syscall && pcshtp == 0 && !guest_user) {
unsigned long flags;
e2k_pcsp_lo_t g_pcsp_lo = stacks->pcsp_lo,
k_pcsp_lo = current_thread_info()->k_pcsp_lo;
e2k_pcsp_hi_t g_pcsp_hi = stacks->pcsp_hi, k_pcsp_hi;
e2k_mem_crs_t __user *g_cframe;
e2k_mem_crs_t *k_crs;
int ret = -EINVAL;
raw_all_irq_save(flags);
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
BUG_ON(AS(k_pcsp_hi).ind);
AS(k_pcsp_hi).ind += SZ_OF_CR;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
k_crs = (e2k_mem_crs_t *) AS(k_pcsp_lo).base;
g_cframe = (e2k_mem_crs_t __user *) (AS(g_pcsp_lo).base +
AS(g_pcsp_hi).ind);
if ((u64) g_cframe > (u64) AS(g_pcsp_lo).base) {
ret = __copy_user_to_current_hw_stack(k_crs,
g_cframe - 1, sizeof(*k_crs), regs, true);
}
raw_all_irq_restore(flags);
/* Can happen if application returns until runs out of
* chain stack or there is no free memory for stacks.
* There is no user stack to return to - die. */
if (ret) {
E2K_LMS_HALT_OK;
pr_err("%s(): SIGKILL. %s\n",
__func__,
(ret == -EINVAL) ?
"tried to return to kernel"
:
"ran into Out-of-Memory on user stacks");
force_sig(SIGKILL);
return;
}
DebugUST("copy guest user chain frame from %px to kernel "
"bottom from %px\n",
g_cframe - 1, k_crs);
if (AS(g_pcsp_hi).ind < SZ_OF_CR) {
pr_err("%s(): guest kernel chain stack underflow\n",
__func__);
KVM_BUG_ON(true);
}
pcshtp = SZ_OF_CR;
stacks->pcshtp = pcshtp;
DebugUST("guest kernel chain stack to FILL PCSHTP "
"set to 0x%x\n",
stacks->pcshtp);
} else if (!syscall && pcshtp == 0 && guest_user) {
e2k_pcsp_hi_t k_pcsp_hi;
unsigned long flags;
/* set flag for unconditional injection to do not copy */
/* from guest user space */
regs->need_inject = true;
/* reserve one bottom frames for trampoline */
/* the guest handler replaces guest user trapped frame */
raw_all_irq_save(flags);
NATIVE_FLUSHC;
k_pcsp_hi = NATIVE_NV_READ_PCSP_HI_REG();
BUG_ON(k_pcsp_hi.PCSP_hi_ind);
k_pcsp_hi.PCSP_hi_ind += 1 * SZ_OF_CR;
NATIVE_NV_NOIRQ_WRITE_PCSP_HI_REG(k_pcsp_hi);
raw_all_irq_restore(flags);
}
/*
* 2) Copy user data that cannot be FILLed
*/
ret = pv_vcpu_user_hw_stacks_copy(regs, stacks, cur_window_q,
guest_user);
if (unlikely(ret))
do_exit(SIGKILL);
}
/* Same as for native kernel without virtualization support */
static __always_inline int
user_hw_stacks_copy(struct e2k_stacks *stacks,
pt_regs_t *regs, u64 cur_window_q, bool copy_full)
{
return native_user_hw_stacks_copy(stacks, regs, cur_window_q, copy_full);
}
static __always_inline void
host_user_hw_stacks_prepare(struct e2k_stacks *stacks, pt_regs_t *regs,
u64 cur_window_q, enum restore_caller from, int syscall)
{
struct kvm_vcpu *vcpu;
if (likely(!kvm_test_intc_emul_flag(regs))) {
/* trap on/syscall from host user processes */
return native_user_hw_stacks_prepare(stacks, regs,