xen: features and fixes for 3.16-rc0

- Support foreign mappings in PVH domains (needed when dom0 is PVH)
 
 - Fix mapping high MMIO regions in x86 PV guests (this is also the
   first half of removing the PAGE_IOMAP PTE flag).
 
 - ARM suspend/resume support.
 
 - ARM multicall support.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQEcBAABAgAGBQJTjE5MAAoJEFxbo/MsZsTRtl8H/2lfS9w05e60vRxjolPV0vRc
 5k9DcYFeJ+k2cz/2T3mNlIvKdfBTesSfgVquH+28GhQz+uKFQ1OrJpYNDTougSw5
 Wv0Ae8e+7eLABvJ9XMiZdDsPzsICw2wqWOvqrnQi2qR3SIimBc5tBigR4+Rccv+e
 btuBLlYT4WPQ8qgNyCBPgxzuyxteu5wK/0XryX6NcbrxeEbAzQAeDKkmvCD4fSvx
 KxrwTO3mwV4Lefmf/WS4Z9fDcPujQOUqKEtUWanw/2JalO1BzDPo+1wvYs0LduLC
 QI/YJN4SL3UeGOmbX2tyIaRgMsAcQVVrYkTm1cp8eD7vcRuvXaqy6dxuX05+V4g=
 =cxfG
 -----END PGP SIGNATURE-----

Merge tag 'stable/for-linus-3.16-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip into next

Pull Xen updates from David Vrabel:
 "xen: features and fixes for 3.16-rc0
   - support foreign mappings in PVH domains (needed when dom0 is PVH)

   - fix mapping high MMIO regions in x86 PV guests (this is also the
     first half of removing the PAGE_IOMAP PTE flag).

   - ARM suspend/resume support.

   - ARM multicall support"

* tag 'stable/for-linus-3.16-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  x86/xen: map foreign pfns for autotranslated guests
  xen-acpi-processor: Don't display errors when we get -ENOSYS
  xen/pciback: Document the entry points for 'pcistub_put_pci_dev'
  xen/pciback: Document when the 'unbind' and 'bind' functions are called.
  xen-pciback: Document when we FLR an PCI device.
  xen-pciback: First reset, then free.
  xen-pciback: Cleanup up pcistub_put_pci_dev
  x86/xen: do not use _PAGE_IOMAP in xen_remap_domain_mfn_range()
  x86/xen: set regions above the end of RAM as 1:1
  x86/xen: only warn once if bad MFNs are found during setup
  x86/xen: compactly store large identity ranges in the p2m
  x86/xen: fix set_phys_range_identity() if pfn_e > MAX_P2M_PFN
  x86/xen: rename early_p2m_alloc() and early_p2m_alloc_middle()
  xen/x86: set panic notifier priority to minimum
  arm,arm64/xen: introduce HYPERVISOR_suspend()
  xen: refactor suspend pre/post hooks
  arm: xen: export HYPERVISOR_multicall to modules.
  arm64: introduce virt_to_pfn
  arm/xen: Remove definiition of virt_to_pfn in asm/xen/page.h
  arm: xen: implement multicall hypercall support.
This commit is contained in:
Linus Torvalds 2014-06-02 08:24:12 -07:00
commit 9f888b3a10
19 changed files with 334 additions and 128 deletions

View File

@ -34,6 +34,7 @@
#define _ASM_ARM_XEN_HYPERCALL_H
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
long privcmd_call(unsigned call, unsigned long a1,
unsigned long a2, unsigned long a3,
@ -48,6 +49,16 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
int HYPERVISOR_physdev_op(int cmd, void *arg);
int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
int HYPERVISOR_tmem_op(void *arg);
int HYPERVISOR_multicall(struct multicall_entry *calls, uint32_t nr);
static inline int
HYPERVISOR_suspend(unsigned long start_info_mfn)
{
struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
/* start_info_mfn is unused on ARM */
return HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
}
static inline void
MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
@ -63,9 +74,4 @@ MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
BUG();
}
static inline int
HYPERVISOR_multicall(void *call_list, int nr_calls)
{
BUG();
}
#endif /* _ASM_ARM_XEN_HYPERCALL_H */

View File

@ -40,6 +40,8 @@ typedef uint64_t xen_pfn_t;
#define PRI_xen_pfn "llx"
typedef uint64_t xen_ulong_t;
#define PRI_xen_ulong "llx"
typedef int64_t xen_long_t;
#define PRI_xen_long "llx"
/* Guest handles for primitive C types. */
__DEFINE_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_GUEST_HANDLE(uint, unsigned int);

View File

@ -339,6 +339,14 @@ static int __init xen_pm_init(void)
}
late_initcall(xen_pm_init);
/* empty stubs */
void xen_arch_pre_suspend(void) { }
void xen_arch_post_suspend(int suspend_cancelled) { }
void xen_timer_resume(void) { }
void xen_arch_resume(void) { }
/* In the hypervisor.S file. */
EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op);
@ -350,4 +358,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
EXPORT_SYMBOL_GPL(privcmd_call);

View File

@ -89,6 +89,7 @@ HYPERCALL2(memory_op);
HYPERCALL2(physdev_op);
HYPERCALL3(vcpu_op);
HYPERCALL1(tmem_op);
HYPERCALL2(multicall);
ENTRY(privcmd_call)
stmdb sp!, {r4}

View File

@ -80,6 +80,7 @@ HYPERCALL2(memory_op);
HYPERCALL2(physdev_op);
HYPERCALL3(vcpu_op);
HYPERCALL1(tmem_op);
HYPERCALL2(multicall);
ENTRY(privcmd_call)
mov x16, x0

View File

@ -343,7 +343,7 @@ HYPERVISOR_memory_op(unsigned int cmd, void *arg)
}
static inline int
HYPERVISOR_multicall(void *call_list, int nr_calls)
HYPERVISOR_multicall(void *call_list, uint32_t nr_calls)
{
return _hypercall2(int, multicall, call_list, nr_calls);
}

View File

@ -54,6 +54,9 @@ typedef unsigned long xen_pfn_t;
#define PRI_xen_pfn "lx"
typedef unsigned long xen_ulong_t;
#define PRI_xen_ulong "lx"
typedef long xen_long_t;
#define PRI_xen_long "lx"
/* Guest handles for primitive C types. */
__DEFINE_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_GUEST_HANDLE(uint, unsigned int);

View File

@ -1339,6 +1339,7 @@ xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
static struct notifier_block xen_panic_block = {
.notifier_call= xen_panic_event,
.priority = INT_MIN
};
int xen_panic_handler_init(void)

View File

@ -2510,6 +2510,95 @@ void __init xen_hvm_init_mmu_ops(void)
}
#endif
#ifdef CONFIG_XEN_PVH
/*
* Map foreign gfn (fgfn), to local pfn (lpfn). This for the user
* space creating new guest on pvh dom0 and needing to map domU pages.
*/
static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn,
unsigned int domid)
{
int rc, err = 0;
xen_pfn_t gpfn = lpfn;
xen_ulong_t idx = fgfn;
struct xen_add_to_physmap_range xatp = {
.domid = DOMID_SELF,
.foreign_domid = domid,
.size = 1,
.space = XENMAPSPACE_gmfn_foreign,
};
set_xen_guest_handle(xatp.idxs, &idx);
set_xen_guest_handle(xatp.gpfns, &gpfn);
set_xen_guest_handle(xatp.errs, &err);
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
if (rc < 0)
return rc;
return err;
}
static int xlate_remove_from_p2m(unsigned long spfn, int count)
{
struct xen_remove_from_physmap xrp;
int i, rc;
for (i = 0; i < count; i++) {
xrp.domid = DOMID_SELF;
xrp.gpfn = spfn+i;
rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
if (rc)
break;
}
return rc;
}
struct xlate_remap_data {
unsigned long fgfn; /* foreign domain's gfn */
pgprot_t prot;
domid_t domid;
int index;
struct page **pages;
};
static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
void *data)
{
int rc;
struct xlate_remap_data *remap = data;
unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid);
if (rc)
return rc;
native_set_pte(ptep, pteval);
return 0;
}
static int xlate_remap_gfn_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long mfn,
int nr, pgprot_t prot, unsigned domid,
struct page **pages)
{
int err;
struct xlate_remap_data pvhdata;
BUG_ON(!pages);
pvhdata.fgfn = mfn;
pvhdata.prot = prot;
pvhdata.domid = domid;
pvhdata.index = 0;
pvhdata.pages = pages;
err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
xlate_map_pte_fn, &pvhdata);
flush_tlb_all();
return err;
}
#endif
#define REMAP_BATCH_SIZE 16
struct remap_data {
@ -2522,7 +2611,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
{
struct remap_data *rmd = data;
pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
rmd->mmu_update->val = pte_val_ma(pte);
@ -2544,13 +2633,18 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long range;
int err = 0;
if (xen_feature(XENFEAT_auto_translated_physmap))
return -EINVAL;
prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef CONFIG_XEN_PVH
/* We need to update the local page tables and the xen HAP */
return xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
domid, pages);
#else
return -EINVAL;
#endif
}
rmd.mfn = mfn;
rmd.prot = prot;
@ -2588,6 +2682,25 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
return 0;
#ifdef CONFIG_XEN_PVH
while (numpgs--) {
/*
* The mmu has already cleaned up the process mmu
* resources at this point (lookup_address will return
* NULL).
*/
unsigned long pfn = page_to_pfn(pages[numpgs]);
xlate_remove_from_p2m(pfn, 1);
}
/*
* We don't need to flush tlbs because as part of
* xlate_remove_from_p2m, the hypervisor will do tlb flushes
* after removing the p2m entries from the EPT/NPT
*/
return 0;
#else
return -EINVAL;
#endif
}
EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);

View File

@ -36,7 +36,7 @@
* pfn_to_mfn(0xc0000)=0xc0000
*
* The benefit of this is, that we can assume for non-RAM regions (think
* PCI BARs, or ACPI spaces), we can create mappings easily b/c we
* PCI BARs, or ACPI spaces), we can create mappings easily because we
* get the PFN value to match the MFN.
*
* For this to work efficiently we have one new page p2m_identity and
@ -60,7 +60,7 @@
* There is also a digram of the P2M at the end that can help.
* Imagine your E820 looking as so:
*
* 1GB 2GB
* 1GB 2GB 4GB
* /-------------------+---------\/----\ /----------\ /---+-----\
* | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
* \-------------------+---------/\----/ \----------/ \---+-----/
@ -77,9 +77,8 @@
* of the PFN and the end PFN (263424 and 512256 respectively). The first step
* is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
* covers 512^2 of page estate (1GB) and in case the start or end PFN is not
* aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
* to end pfn. We reserve_brk top leaf pages if they are missing (means they
* point to p2m_mid_missing).
* aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as
* required to split any existing p2m_mid_missing middle pages.
*
* With the E820 example above, 263424 is not 1GB aligned so we allocate a
* reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
@ -88,7 +87,7 @@
* Next stage is to determine if we need to do a more granular boundary check
* on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
* We check if the start pfn and end pfn violate that boundary check, and if
* so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
* so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer
* granularity of setting which PFNs are missing and which ones are identity.
* In our example 263424 and 512256 both fail the check so we reserve_brk two
* pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
@ -102,9 +101,10 @@
*
* The next step is to walk from the start pfn to the end pfn setting
* the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
* If we find that the middle leaf is pointing to p2m_missing we can swap it
* over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
* point we do not need to worry about boundary aligment (so no need to
* If we find that the middle entry is pointing to p2m_missing we can swap it
* over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and
* similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions).
* At this point we do not need to worry about boundary aligment (so no need to
* reserve_brk a middle page, figure out which PFNs are "missing" and which
* ones are identity), as that has been done earlier. If we find that the
* middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
@ -118,6 +118,9 @@
* considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
* contain the INVALID_P2M_ENTRY value and are considered "missing."
*
* Finally, the region beyond the end of of the E820 (4 GB in this example)
* is set to be identity (in case there are MMIO regions placed here).
*
* This is what the p2m ends up looking (for the E820 above) with this
* fabulous drawing:
*
@ -129,21 +132,27 @@
* |-----| \ | [p2m_identity]+\\ | .... |
* | 2 |--\ \-------------------->| ... | \\ \----------------/
* |-----| \ \---------------/ \\
* | 3 |\ \ \\ p2m_identity
* |-----| \ \-------------------->/---------------\ /-----------------\
* | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
* \-----/ / | [p2m_identity]+-->| ..., ~0 |
* / /---------------\ | .... | \-----------------/
* / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
* / | IDENTITY[@256]|<----/ \---------------/
* / | ~0, ~0, .... |
* | \---------------/
* |
* p2m_mid_missing p2m_missing
* /-----------------\ /------------\
* | [p2m_missing] +---->| ~0, ~0, ~0 |
* | [p2m_missing] +---->| ..., ~0 |
* \-----------------/ \------------/
* | 3 |-\ \ \\ p2m_identity [1]
* |-----| \ \-------------------->/---------------\ /-----------------\
* | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... |
* \-----/ | | | [p2m_identity]+-->| ..., ~0 |
* | | | .... | \-----------------/
* | | +-[x], ~0, ~0.. +\
* | | \---------------/ \
* | | \-> /---------------\
* | V p2m_mid_missing p2m_missing | IDENTITY[@0] |
* | /-----------------\ /------------\ | IDENTITY[@256]|
* | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... |
* | | [p2m_missing] +---->| ..., ~0 | \---------------/
* | | ... | \------------/
* | \-----------------/
* |
* | p2m_mid_identity
* | /-----------------\
* \-->| [p2m_identity] +---->[1]
* | [p2m_identity] +---->[1]
* | ... |
* \-----------------/
*
* where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
*/
@ -187,13 +196,15 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
/* We might hit two boundary violations at the start and end, at max each
* boundary violation will require three middle nodes. */
RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3);
/* When we populate back during bootup, the amount of pages can vary. The
* max we have is seen is 395979, but that does not mean it can't be more.
@ -242,20 +253,20 @@ static void p2m_top_mfn_p_init(unsigned long **top)
top[i] = p2m_mid_missing_mfn;
}
static void p2m_mid_init(unsigned long **mid)
static void p2m_mid_init(unsigned long **mid, unsigned long *leaf)
{
unsigned i;
for (i = 0; i < P2M_MID_PER_PAGE; i++)
mid[i] = p2m_missing;
mid[i] = leaf;
}
static void p2m_mid_mfn_init(unsigned long *mid)
static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
{
unsigned i;
for (i = 0; i < P2M_MID_PER_PAGE; i++)
mid[i] = virt_to_mfn(p2m_missing);
mid[i] = virt_to_mfn(leaf);
}
static void p2m_init(unsigned long *p2m)
@ -286,7 +297,9 @@ void __ref xen_build_mfn_list_list(void)
/* Pre-initialize p2m_top_mfn to be completely missing */
if (p2m_top_mfn == NULL) {
p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(p2m_mid_missing_mfn);
p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_mfn_p_init(p2m_top_mfn_p);
@ -295,7 +308,8 @@ void __ref xen_build_mfn_list_list(void)
p2m_top_mfn_init(p2m_top_mfn);
} else {
/* Reinitialise, mfn's all change after migration */
p2m_mid_mfn_init(p2m_mid_missing_mfn);
p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
}
for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
@ -327,7 +341,7 @@ void __ref xen_build_mfn_list_list(void)
* it too late.
*/
mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(mid_mfn_p);
p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
p2m_top_mfn_p[topidx] = mid_mfn_p;
}
@ -365,16 +379,17 @@ void __init xen_build_dynamic_phys_to_machine(void)
p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_init(p2m_missing);
p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_init(p2m_identity);
p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(p2m_mid_missing);
p2m_mid_init(p2m_mid_missing, p2m_missing);
p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(p2m_mid_identity, p2m_identity);
p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_init(p2m_top);
p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_init(p2m_identity);
/*
* The domain builder gives us a pre-constructed p2m array in
* mfn_list for all the pages initially given to us, so we just
@ -386,7 +401,7 @@ void __init xen_build_dynamic_phys_to_machine(void)
if (p2m_top[topidx] == p2m_mid_missing) {
unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(mid);
p2m_mid_init(mid, p2m_missing);
p2m_top[topidx] = mid;
}
@ -492,7 +507,7 @@ unsigned long get_phys_to_machine(unsigned long pfn)
unsigned topidx, mididx, idx;
if (unlikely(pfn >= MAX_P2M_PFN))
return INVALID_P2M_ENTRY;
return IDENTITY_FRAME(pfn);
topidx = p2m_top_index(pfn);
mididx = p2m_mid_index(pfn);
@ -545,7 +560,7 @@ static bool alloc_p2m(unsigned long pfn)
if (!mid)
return false;
p2m_mid_init(mid);
p2m_mid_init(mid, p2m_missing);
if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
free_p2m_page(mid);
@ -565,7 +580,7 @@ static bool alloc_p2m(unsigned long pfn)
if (!mid_mfn)
return false;
p2m_mid_mfn_init(mid_mfn);
p2m_mid_mfn_init(mid_mfn, p2m_missing);
missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
mid_mfn_mfn = virt_to_mfn(mid_mfn);
@ -596,7 +611,7 @@ static bool alloc_p2m(unsigned long pfn)
return true;
}
static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary)
static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
{
unsigned topidx, mididx, idx;
unsigned long *p2m;
@ -638,7 +653,7 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary
return true;
}
static bool __init early_alloc_p2m(unsigned long pfn)
static bool __init early_alloc_p2m_middle(unsigned long pfn)
{
unsigned topidx = p2m_top_index(pfn);
unsigned long *mid_mfn_p;
@ -649,7 +664,7 @@ static bool __init early_alloc_p2m(unsigned long pfn)
if (mid == p2m_mid_missing) {
mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(mid);
p2m_mid_init(mid, p2m_missing);
p2m_top[topidx] = mid;
@ -658,12 +673,12 @@ static bool __init early_alloc_p2m(unsigned long pfn)
/* And the save/restore P2M tables.. */
if (mid_mfn_p == p2m_mid_missing_mfn) {
mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(mid_mfn_p);
p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
p2m_top_mfn_p[topidx] = mid_mfn_p;
p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
/* Note: we don't set mid_mfn_p[midix] here,
* look in early_alloc_p2m_middle */
* look in early_alloc_p2m() */
}
return true;
}
@ -739,7 +754,7 @@ found:
/* This shouldn't happen */
if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
early_alloc_p2m(set_pfn);
early_alloc_p2m_middle(set_pfn);
if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
return false;
@ -754,13 +769,13 @@ found:
bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
if (!early_alloc_p2m(pfn))
if (!early_alloc_p2m_middle(pfn))
return false;
if (early_can_reuse_p2m_middle(pfn, mfn))
return __set_phys_to_machine(pfn, mfn);
if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))
if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
return false;
if (!__set_phys_to_machine(pfn, mfn))
@ -769,12 +784,30 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
return true;
}
static void __init early_split_p2m(unsigned long pfn)
{
unsigned long mididx, idx;
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
/*
* Allocate new middle and leaf pages if this pfn lies in the
* middle of one.
*/
if (mididx || idx)
early_alloc_p2m_middle(pfn);
if (idx)
early_alloc_p2m(pfn, false);
}
unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e)
{
unsigned long pfn;
if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
if (unlikely(pfn_s >= MAX_P2M_PFN))
return 0;
if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
@ -783,19 +816,30 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
if (pfn_s > pfn_e)
return 0;
for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
{
WARN_ON(!early_alloc_p2m(pfn));
}
if (pfn_e > MAX_P2M_PFN)
pfn_e = MAX_P2M_PFN;
early_alloc_p2m_middle(pfn_s, true);
early_alloc_p2m_middle(pfn_e, true);
early_split_p2m(pfn_s);
early_split_p2m(pfn_e);
for (pfn = pfn_s; pfn < pfn_e;) {
unsigned topidx = p2m_top_index(pfn);
unsigned mididx = p2m_mid_index(pfn);
for (pfn = pfn_s; pfn < pfn_e; pfn++)
if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
break;
pfn++;
/*
* If the PFN was set to a middle or leaf identity
* page the remainder must also be identity, so skip
* ahead to the next middle or leaf entry.
*/
if (p2m_top[topidx] == p2m_mid_identity)
pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE);
else if (p2m_top[topidx][mididx] == p2m_identity)
pfn = ALIGN(pfn, P2M_PER_PAGE);
}
if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
"Identity mapping failed. We are %ld short of 1-1 mappings!\n",
@ -825,8 +869,22 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
/* For sparse holes were the p2m leaf has real PFN along with
* PCI holes, stick in the PFN as the MFN value.
*
* set_phys_range_identity() will have allocated new middle
* and leaf pages as required so an existing p2m_mid_missing
* or p2m_missing mean that whole range will be identity so
* these can be switched to p2m_mid_identity or p2m_identity.
*/
if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
if (p2m_top[topidx] == p2m_mid_identity)
return true;
if (p2m_top[topidx] == p2m_mid_missing) {
WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing,
p2m_mid_identity) != p2m_mid_missing);
return true;
}
if (p2m_top[topidx][mididx] == p2m_identity)
return true;

View File

@ -89,10 +89,10 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
continue;
WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
pfn, mfn);
WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
pfn, mfn);
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
@ -468,6 +468,15 @@ char * __init xen_memory_setup(void)
i++;
}
/*
* Set the rest as identity mapped, in case PCI BARs are
* located here.
*
* PFNs above MAX_P2M_PFN are considered identity mapped as
* well.
*/
set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul);
/*
* In domU, the ISA region is normal, usable memory, but we
* reserve ISA memory anyway because too many things poke

View File

@ -12,8 +12,10 @@
#include "xen-ops.h"
#include "mmu.h"
void xen_arch_pre_suspend(void)
static void xen_pv_pre_suspend(void)
{
xen_mm_pin_all();
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
mfn_to_pfn(xen_start_info->console.domU.mfn);
@ -26,7 +28,7 @@ void xen_arch_pre_suspend(void)
BUG();
}
void xen_arch_hvm_post_suspend(int suspend_cancelled)
static void xen_hvm_post_suspend(int suspend_cancelled)
{
#ifdef CONFIG_XEN_PVHVM
int cpu;
@ -41,7 +43,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
#endif
}
void xen_arch_post_suspend(int suspend_cancelled)
static void xen_pv_post_suspend(int suspend_cancelled)
{
xen_build_mfn_list_list();
@ -60,6 +62,21 @@ void xen_arch_post_suspend(int suspend_cancelled)
xen_vcpu_restore();
}
xen_mm_unpin_all();
}
void xen_arch_pre_suspend(void)
{
if (xen_pv_domain())
xen_pv_pre_suspend();
}
void xen_arch_post_suspend(int cancelled)
{
if (xen_pv_domain())
xen_pv_post_suspend(cancelled);
else
xen_hvm_post_suspend(cancelled);
}
static void xen_vcpu_notify_restore(void *data)

View File

@ -31,6 +31,8 @@ void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_reserve_top(void);
extern unsigned long xen_max_p2m_pfn;
void xen_mm_pin_all(void);
void xen_mm_unpin_all(void);
void xen_set_pat(u64);
char * __init xen_memory_setup(void);

View File

@ -41,9 +41,6 @@ static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
struct suspend_info {
int cancelled;
unsigned long arg; /* extra hypercall argument */
void (*pre)(void);
void (*post)(int cancelled);
};
static RAW_NOTIFIER_HEAD(xen_resume_notifier);
@ -61,26 +58,6 @@ void xen_resume_notifier_unregister(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(xen_resume_notifier_unregister);
#ifdef CONFIG_HIBERNATE_CALLBACKS
static void xen_hvm_post_suspend(int cancelled)
{
xen_arch_hvm_post_suspend(cancelled);
gnttab_resume();
}
static void xen_pre_suspend(void)
{
xen_mm_pin_all();
gnttab_suspend();
xen_arch_pre_suspend();
}
static void xen_post_suspend(int cancelled)
{
xen_arch_post_suspend(cancelled);
gnttab_resume();
xen_mm_unpin_all();
}
static int xen_suspend(void *data)
{
struct suspend_info *si = data;
@ -94,18 +71,20 @@ static int xen_suspend(void *data)
return err;
}
if (si->pre)
si->pre();
gnttab_suspend();
xen_arch_pre_suspend();
/*
* This hypercall returns 1 if suspend was cancelled
* or the domain was merely checkpointed, and 0 if it
* is resuming in a new domain.
*/
si->cancelled = HYPERVISOR_suspend(si->arg);
si->cancelled = HYPERVISOR_suspend(xen_pv_domain()
? virt_to_mfn(xen_start_info)
: 0);
if (si->post)
si->post(si->cancelled);
xen_arch_post_suspend(si->cancelled);
gnttab_resume();
if (!si->cancelled) {
xen_irq_resume();
@ -154,16 +133,6 @@ static void do_suspend(void)
si.cancelled = 1;
if (xen_hvm_domain()) {
si.arg = 0UL;
si.pre = NULL;
si.post = &xen_hvm_post_suspend;
} else {
si.arg = virt_to_mfn(xen_start_info);
si.pre = &xen_pre_suspend;
si.post = &xen_post_suspend;
}
err = stop_machine(xen_suspend, &si, cpumask_of(0));
raw_notifier_call_chain(&xen_resume_notifier, 0, NULL);

View File

@ -127,7 +127,7 @@ static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
pr_debug(" C%d: %s %d uS\n",
cx->type, cx->desc, (u32)cx->latency);
}
} else if (ret != -EINVAL)
} else if ((ret != -EINVAL) && (ret != -ENOSYS))
/* EINVAL means the ACPI ID is incorrect - meaning the ACPI
* table is referencing a non-existing CPU - which can happen
* with broken ACPI tables. */
@ -259,7 +259,7 @@ static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
(u32) perf->states[i].power,
(u32) perf->states[i].transition_latency);
}
} else if (ret != -EINVAL)
} else if ((ret != -EINVAL) && (ret != -ENOSYS))
/* EINVAL means the ACPI ID is incorrect - meaning the ACPI
* table is referencing a non-existing CPU - which can happen
* with broken ACPI tables. */

View File

@ -242,6 +242,15 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
return found_dev;
}
/*
* Called when:
* - XenBus state has been reconfigure (pci unplug). See xen_pcibk_remove_device
* - XenBus state has been disconnected (guest shutdown). See xen_pcibk_xenbus_remove
* - 'echo BDF > unbind' on pciback module with no guest attached. See pcistub_remove
* - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
*
* As such we have to be careful.
*/
void pcistub_put_pci_dev(struct pci_dev *dev)
{
struct pcistub_device *psdev, *found_psdev = NULL;
@ -272,16 +281,16 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
* and want to inhibit the user from fiddling with 'reset'
*/
pci_reset_function(dev);
pci_restore_state(psdev->dev);
pci_restore_state(dev);
/* This disables the device. */
xen_pcibk_reset_device(found_psdev->dev);
xen_pcibk_reset_device(dev);
/* And cleanup up our emulated fields. */
xen_pcibk_config_free_dyn_fields(found_psdev->dev);
xen_pcibk_config_reset_dev(found_psdev->dev);
xen_pcibk_config_reset_dev(dev);
xen_pcibk_config_free_dyn_fields(dev);
xen_unregister_device_domain_owner(found_psdev->dev);
xen_unregister_device_domain_owner(dev);
spin_lock_irqsave(&found_psdev->lock, flags);
found_psdev->pdev = NULL;
@ -493,6 +502,8 @@ static int pcistub_seize(struct pci_dev *dev)
return err;
}
/* Called when 'bind'. This means we must _NOT_ call pci_reset_function or
* other functions that take the sysfs lock. */
static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
int err = 0;
@ -520,6 +531,8 @@ out:
return err;
}
/* Called when 'unbind'. This means we must _NOT_ call pci_reset_function or
* other functions that take the sysfs lock. */
static void pcistub_remove(struct pci_dev *dev)
{
struct pcistub_device *psdev, *found_psdev = NULL;
@ -551,6 +564,8 @@ static void pcistub_remove(struct pci_dev *dev)
pr_warn("****** shutdown driver domain before binding device\n");
pr_warn("****** to other drivers or domains\n");
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
xen_pcibk_release_pci_dev(found_psdev->pdev,
found_psdev->dev);
}

View File

@ -93,6 +93,8 @@ static void free_pdev(struct xen_pcibk_device *pdev)
xen_pcibk_disconnect(pdev);
/* N.B. This calls pcistub_put_pci_dev which does the FLR on all
* of the PCIe devices. */
xen_pcibk_release_devices(pdev);
dev_set_drvdata(&pdev->xdev->dev, NULL);
@ -286,6 +288,8 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
xen_unregister_device_domain_owner(dev);
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
xen_pcibk_release_pci_dev(pdev, dev);
out:

View File

@ -275,9 +275,9 @@ DEFINE_GUEST_HANDLE_STRUCT(mmu_update);
* NB. The fields are natural register size for this architecture.
*/
struct multicall_entry {
unsigned long op;
long result;
unsigned long args[6];
xen_ulong_t op;
xen_long_t result;
xen_ulong_t args[6];
};
DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);

View File

@ -9,10 +9,6 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
void xen_arch_pre_suspend(void);
void xen_arch_post_suspend(int suspend_cancelled);
void xen_arch_hvm_post_suspend(int suspend_cancelled);
void xen_mm_pin_all(void);
void xen_mm_unpin_all(void);
void xen_timer_resume(void);
void xen_arch_resume(void);