lguest: add iomem region, where guest page faults get sent to userspace.
This lets us implement PCI. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
parent
d1c29465b8
commit
7313d5217e
|
@ -97,8 +97,12 @@ struct lguest {
|
||||||
struct lg_cpu cpus[NR_CPUS];
|
struct lg_cpu cpus[NR_CPUS];
|
||||||
unsigned int nr_cpus;
|
unsigned int nr_cpus;
|
||||||
|
|
||||||
|
/* Valid guest memory pages must be < this. */
|
||||||
u32 pfn_limit;
|
u32 pfn_limit;
|
||||||
|
|
||||||
|
/* Device memory is >= pfn_limit and < device_limit. */
|
||||||
|
u32 device_limit;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This provides the offset to the base of guest-physical memory in the
|
* This provides the offset to the base of guest-physical memory in the
|
||||||
* Launcher.
|
* Launcher.
|
||||||
|
@ -200,7 +204,8 @@ void guest_pagetable_flush_user(struct lg_cpu *cpu);
|
||||||
void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
|
void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
|
||||||
unsigned long vaddr, pte_t val);
|
unsigned long vaddr, pte_t val);
|
||||||
void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages);
|
void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages);
|
||||||
bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode);
|
bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode,
|
||||||
|
unsigned long *iomem);
|
||||||
void pin_page(struct lg_cpu *cpu, unsigned long vaddr);
|
void pin_page(struct lg_cpu *cpu, unsigned long vaddr);
|
||||||
bool __guest_pa(struct lg_cpu *cpu, unsigned long vaddr, unsigned long *paddr);
|
bool __guest_pa(struct lg_cpu *cpu, unsigned long vaddr, unsigned long *paddr);
|
||||||
unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr);
|
unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr);
|
||||||
|
|
|
@ -385,7 +385,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
|
||||||
/* "struct lguest" contains all we (the Host) know about a Guest. */
|
/* "struct lguest" contains all we (the Host) know about a Guest. */
|
||||||
struct lguest *lg;
|
struct lguest *lg;
|
||||||
int err;
|
int err;
|
||||||
unsigned long args[3];
|
unsigned long args[4];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We grab the Big Lguest lock, which protects against multiple
|
* We grab the Big Lguest lock, which protects against multiple
|
||||||
|
@ -419,6 +419,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
|
||||||
/* Populate the easy fields of our "struct lguest" */
|
/* Populate the easy fields of our "struct lguest" */
|
||||||
lg->mem_base = (void __user *)args[0];
|
lg->mem_base = (void __user *)args[0];
|
||||||
lg->pfn_limit = args[1];
|
lg->pfn_limit = args[1];
|
||||||
|
lg->device_limit = args[3];
|
||||||
|
|
||||||
/* This is the first cpu (cpu 0) and it will start booting at args[2] */
|
/* This is the first cpu (cpu 0) and it will start booting at args[2] */
|
||||||
err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
|
err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
|
||||||
|
|
|
@ -250,6 +250,16 @@ static void release_pte(pte_t pte)
|
||||||
}
|
}
|
||||||
/*:*/
|
/*:*/
|
||||||
|
|
||||||
|
static bool gpte_in_iomem(struct lg_cpu *cpu, pte_t gpte)
|
||||||
|
{
|
||||||
|
/* We don't handle large pages. */
|
||||||
|
if (pte_flags(gpte) & _PAGE_PSE)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return (pte_pfn(gpte) >= cpu->lg->pfn_limit
|
||||||
|
&& pte_pfn(gpte) < cpu->lg->device_limit);
|
||||||
|
}
|
||||||
|
|
||||||
static bool check_gpte(struct lg_cpu *cpu, pte_t gpte)
|
static bool check_gpte(struct lg_cpu *cpu, pte_t gpte)
|
||||||
{
|
{
|
||||||
if ((pte_flags(gpte) & _PAGE_PSE) ||
|
if ((pte_flags(gpte) & _PAGE_PSE) ||
|
||||||
|
@ -374,8 +384,14 @@ static pte_t *find_spte(struct lg_cpu *cpu, unsigned long vaddr, bool allocate,
|
||||||
*
|
*
|
||||||
* If we fixed up the fault (ie. we mapped the address), this routine returns
|
* If we fixed up the fault (ie. we mapped the address), this routine returns
|
||||||
* true. Otherwise, it was a real fault and we need to tell the Guest.
|
* true. Otherwise, it was a real fault and we need to tell the Guest.
|
||||||
|
*
|
||||||
|
* There's a corner case: they're trying to access memory between
|
||||||
|
* pfn_limit and device_limit, which is I/O memory. In this case, we
|
||||||
|
* return false and set @iomem to the physical address, so the the
|
||||||
|
* Launcher can handle the instruction manually.
|
||||||
*/
|
*/
|
||||||
bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode,
|
||||||
|
unsigned long *iomem)
|
||||||
{
|
{
|
||||||
unsigned long gpte_ptr;
|
unsigned long gpte_ptr;
|
||||||
pte_t gpte;
|
pte_t gpte;
|
||||||
|
@ -383,6 +399,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
||||||
pmd_t gpmd;
|
pmd_t gpmd;
|
||||||
pgd_t gpgd;
|
pgd_t gpgd;
|
||||||
|
|
||||||
|
*iomem = 0;
|
||||||
|
|
||||||
/* We never demand page the Switcher, so trying is a mistake. */
|
/* We never demand page the Switcher, so trying is a mistake. */
|
||||||
if (vaddr >= switcher_addr)
|
if (vaddr >= switcher_addr)
|
||||||
return false;
|
return false;
|
||||||
|
@ -459,6 +477,12 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
||||||
if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
|
if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/* If they're accessing io memory, we expect a fault. */
|
||||||
|
if (gpte_in_iomem(cpu, gpte)) {
|
||||||
|
*iomem = (pte_pfn(gpte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that the Guest PTE flags are OK, and the page number is below
|
* Check that the Guest PTE flags are OK, and the page number is below
|
||||||
* the pfn_limit (ie. not mapping the Launcher binary).
|
* the pfn_limit (ie. not mapping the Launcher binary).
|
||||||
|
@ -553,7 +577,9 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
|
||||||
*/
|
*/
|
||||||
void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
|
void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
|
||||||
{
|
{
|
||||||
if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2))
|
unsigned long iomem;
|
||||||
|
|
||||||
|
if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2, &iomem))
|
||||||
kill_guest(cpu, "bad stack page %#lx", vaddr);
|
kill_guest(cpu, "bad stack page %#lx", vaddr);
|
||||||
}
|
}
|
||||||
/*:*/
|
/*:*/
|
||||||
|
@ -928,7 +954,8 @@ static void __guest_set_pte(struct lg_cpu *cpu, int idx,
|
||||||
* now. This shaves 10% off a copy-on-write
|
* now. This shaves 10% off a copy-on-write
|
||||||
* micro-benchmark.
|
* micro-benchmark.
|
||||||
*/
|
*/
|
||||||
if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
|
if ((pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED))
|
||||||
|
&& !gpte_in_iomem(cpu, gpte)) {
|
||||||
if (!check_gpte(cpu, gpte))
|
if (!check_gpte(cpu, gpte))
|
||||||
return;
|
return;
|
||||||
set_pte(spte,
|
set_pte(spte,
|
||||||
|
|
|
@ -362,9 +362,19 @@ static void setup_emulate_insn(struct lg_cpu *cpu)
|
||||||
sizeof(cpu->pending.insn));
|
sizeof(cpu->pending.insn));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void setup_iomem_insn(struct lg_cpu *cpu, unsigned long iomem_addr)
|
||||||
|
{
|
||||||
|
cpu->pending.trap = 14;
|
||||||
|
cpu->pending.addr = iomem_addr;
|
||||||
|
copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip,
|
||||||
|
sizeof(cpu->pending.insn));
|
||||||
|
}
|
||||||
|
|
||||||
/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
|
/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
|
||||||
void lguest_arch_handle_trap(struct lg_cpu *cpu)
|
void lguest_arch_handle_trap(struct lg_cpu *cpu)
|
||||||
{
|
{
|
||||||
|
unsigned long iomem_addr;
|
||||||
|
|
||||||
switch (cpu->regs->trapnum) {
|
switch (cpu->regs->trapnum) {
|
||||||
case 13: /* We've intercepted a General Protection Fault. */
|
case 13: /* We've intercepted a General Protection Fault. */
|
||||||
/* Hand to Launcher to emulate those pesky IN and OUT insns */
|
/* Hand to Launcher to emulate those pesky IN and OUT insns */
|
||||||
|
@ -385,9 +395,16 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
|
||||||
* whether kernel or userspace code.
|
* whether kernel or userspace code.
|
||||||
*/
|
*/
|
||||||
if (demand_page(cpu, cpu->arch.last_pagefault,
|
if (demand_page(cpu, cpu->arch.last_pagefault,
|
||||||
cpu->regs->errcode))
|
cpu->regs->errcode, &iomem_addr))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/* Was this an access to memory mapped IO? */
|
||||||
|
if (iomem_addr) {
|
||||||
|
/* Tell Launcher, let it handle it. */
|
||||||
|
setup_iomem_insn(cpu, iomem_addr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* OK, it's really not there (or not OK): the Guest needs to
|
* OK, it's really not there (or not OK): the Guest needs to
|
||||||
* know. We write out the cr2 value so it knows where the
|
* know. We write out the cr2 value so it knows where the
|
||||||
|
|
|
@ -548,7 +548,8 @@ static void tell_kernel(unsigned long start)
|
||||||
{
|
{
|
||||||
unsigned long args[] = { LHREQ_INITIALIZE,
|
unsigned long args[] = { LHREQ_INITIALIZE,
|
||||||
(unsigned long)guest_base,
|
(unsigned long)guest_base,
|
||||||
guest_limit / getpagesize(), start };
|
guest_limit / getpagesize(), start,
|
||||||
|
guest_limit / getpagesize() };
|
||||||
verbose("Guest: %p - %p (%#lx)\n",
|
verbose("Guest: %p - %p (%#lx)\n",
|
||||||
guest_base, guest_base + guest_limit, guest_limit);
|
guest_base, guest_base + guest_limit, guest_limit);
|
||||||
lguest_fd = open_or_die("/dev/lguest", O_RDWR);
|
lguest_fd = open_or_die("/dev/lguest", O_RDWR);
|
||||||
|
|
Loading…
Reference in New Issue