diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 9e18a61d3df3..d39a31c3cdf2 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -18,9 +18,9 @@ unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); -unsigned long *page_table_alloc(struct mm_struct *, unsigned long); +unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); -void page_table_free_rcu(struct mmu_gather *, unsigned long *); +void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long, bool init_skey); @@ -145,8 +145,8 @@ static inline void pmd_populate(struct mm_struct *mm, /* * page table entry allocation/free routines. */ -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) +#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) +#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index d95012f9e77f..9bfdbca14f95 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -789,19 +790,25 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) /** * struct gmap_struct - guest address space + * @crst_list: list of all crst tables used in the guest address space * @mm: pointer to the parent mm_struct + * @guest_to_host: radix tree with guest to host address translation + * @host_to_guest: radix tree with pointer to segment table entries + * @guest_table_lock: spinlock to protect all entries in the guest page table * @table: pointer to the page directory * @asce: address space control element for gmap page table - * @crst_list: list of all crst tables used in the guest address space * @pfault_enabled: defines if pfaults are applicable for the guest */ struct gmap { struct list_head list; + struct list_head crst_list; struct mm_struct *mm; + struct radix_tree_root guest_to_host; + struct radix_tree_root host_to_guest; + spinlock_t guest_table_lock; unsigned long *table; unsigned long asce; void *private; - struct list_head crst_list; bool pfault_enabled; }; @@ -846,8 +853,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); unsigned long gmap_translate(struct gmap *, unsigned long gaddr); -unsigned long __gmap_fault(struct gmap *, unsigned long gaddr); -unsigned long gmap_fault(struct gmap *, unsigned long gaddr); +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); +int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index a25f09fbaf36..572c59949004 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -105,7 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long address) { - page_table_free_rcu(tlb, (unsigned long *) pte); + page_table_free_rcu(tlb, (unsigned long *) pte, address); } /* diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5c877c8e4494..543c24baf1eb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1092,18 +1092,8 @@ retry: */ long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) { - struct mm_struct *mm = current->mm; - hva_t hva; - long rc; - - hva = gmap_fault(vcpu->arch.gmap, gpa); - if (IS_ERR_VALUE(hva)) - return (long)hva; - down_read(&mm->mmap_sem); - rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL); - up_read(&mm->mmap_sem); - - return rc < 0 ? rc : 0; + return gmap_fault(vcpu->arch.gmap, gpa, + writable ? FAULT_FLAG_WRITE : 0); } static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, @@ -1683,9 +1673,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_S390_VCPU_FAULT: { - r = gmap_fault(vcpu->arch.gmap, arg); - if (!IS_ERR_VALUE(r)) - r = 0; + r = gmap_fault(vcpu->arch.gmap, arg, 0); break; } case KVM_ENABLE_CAP: diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 4880399d040e..a2b81d6ce8a5 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -442,18 +442,15 @@ static inline int do_exception(struct pt_regs *regs, int access) down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE - gmap = (struct gmap *) - ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); + gmap = (current->flags & PF_VCPU) ? + (struct gmap *) S390_lowcore.gmap : NULL; if (gmap) { - address = __gmap_fault(gmap, address); + current->thread.gmap_addr = address; + address = __gmap_translate(gmap, address); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; } - if (address == -ENOMEM) { - fault = VM_FAULT_OOM; - goto out_up; - } if (gmap->pfault_enabled) flags |= FAULT_FLAG_RETRY_NOWAIT; } @@ -530,6 +527,20 @@ retry: goto retry; } } +#ifdef CONFIG_PGSTE + if (gmap) { + address = __gmap_link(gmap, current->thread.gmap_addr, + address); + if (address == -EFAULT) { + fault = VM_FAULT_BADMAP; + goto out_up; + } + if (address == -ENOMEM) { + fault = VM_FAULT_OOM; + goto out_up; + } + } +#endif fault = 0; out_up: up_read(&mm->mmap_sem); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 16ca8617f2e1..74dfd9eaa300 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -158,17 +158,23 @@ struct gmap *gmap_alloc(struct mm_struct *mm) if (!gmap) goto out; INIT_LIST_HEAD(&gmap->crst_list); + INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); + INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); + spin_lock_init(&gmap->guest_table_lock); gmap->mm = mm; page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); if (!page) goto out_free; + page->index = 0; list_add(&page->lru, &gmap->crst_list); table = (unsigned long *) page_to_phys(page); crst_table_init(table, _REGION1_ENTRY_EMPTY); gmap->table = table; gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | __pa(table); + down_write(&mm->mmap_sem); list_add(&gmap->list, &mm->context.gmap_list); + up_write(&mm->mmap_sem); return gmap; out_free: @@ -178,27 +184,6 @@ out: } EXPORT_SYMBOL_GPL(gmap_alloc); -static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) -{ - struct gmap_pgtable *mp; - struct gmap_rmap *rmap; - struct page *page; - - if (*table & _SEGMENT_ENTRY_INVALID) - return 0; - page = pfn_to_page(*table >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - list_for_each_entry(rmap, &mp->mapper, list) { - if (rmap->entry != table) - continue; - list_del(&rmap->list); - kfree(rmap); - break; - } - *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; - return 1; -} - static void gmap_flush_tlb(struct gmap *gmap) { if (MACHINE_HAS_IDTE) @@ -208,6 +193,30 @@ static void gmap_flush_tlb(struct gmap *gmap) __tlb_flush_global(); } +static void gmap_radix_tree_free(struct radix_tree_root *root) +{ + struct radix_tree_iter iter; + unsigned long indices[16]; + unsigned long index; + void **slot; + int i, nr; + + /* A radix tree is freed by deleting all of its entries */ + index = 0; + do { + nr = 0; + radix_tree_for_each_slot(slot, root, &iter, index) { + indices[nr] = iter.index; + if (++nr == 16) + break; + } + for (i = 0; i < nr; i++) { + index = indices[i]; + radix_tree_delete(root, index); + } + } while (nr > 0); +} + /** * gmap_free - free a guest address space * @gmap: pointer to the guest address space structure @@ -215,9 +224,6 @@ static void gmap_flush_tlb(struct gmap *gmap) void gmap_free(struct gmap *gmap) { struct page *page, *next; - unsigned long *table; - int i; - /* Flush tlb. */ if (MACHINE_HAS_IDTE) @@ -227,19 +233,13 @@ void gmap_free(struct gmap *gmap) __tlb_flush_global(); /* Free all segment & region tables. */ - down_read(&gmap->mm->mmap_sem); - spin_lock(&gmap->mm->page_table_lock); - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { - table = (unsigned long *) page_to_phys(page); - if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) - /* Remove gmap rmap structures for segment table. */ - for (i = 0; i < PTRS_PER_PMD; i++, table++) - gmap_unlink_segment(gmap, table); + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) __free_pages(page, ALLOC_ORDER); - } - spin_unlock(&gmap->mm->page_table_lock); - up_read(&gmap->mm->mmap_sem); + gmap_radix_tree_free(&gmap->guest_to_host); + gmap_radix_tree_free(&gmap->host_to_guest); + down_write(&gmap->mm->mmap_sem); list_del(&gmap->list); + up_write(&gmap->mm->mmap_sem); kfree(gmap); } EXPORT_SYMBOL_GPL(gmap_free); @@ -267,31 +267,87 @@ EXPORT_SYMBOL_GPL(gmap_disable); /* * gmap_alloc_table is assumed to be called with mmap_sem held */ -static int gmap_alloc_table(struct gmap *gmap, - unsigned long *table, unsigned long init) - __releases(&gmap->mm->page_table_lock) - __acquires(&gmap->mm->page_table_lock) +static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, + unsigned long init, unsigned long gaddr) { struct page *page; unsigned long *new; /* since we dont free the gmap table until gmap_free we can unlock */ - spin_unlock(&gmap->mm->page_table_lock); page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); - spin_lock(&gmap->mm->page_table_lock); if (!page) return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); + spin_lock(&gmap->mm->page_table_lock); if (*table & _REGION_ENTRY_INVALID) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); - } else + page->index = gaddr; + page = NULL; + } + spin_unlock(&gmap->mm->page_table_lock); + if (page) __free_pages(page, ALLOC_ORDER); return 0; } +/** + * __gmap_segment_gaddr - find virtual address from segment pointer + * @entry: pointer to a segment table entry in the guest address space + * + * Returns the virtual address in the guest address space for the segment + */ +static unsigned long __gmap_segment_gaddr(unsigned long *entry) +{ + struct page *page; + unsigned long offset; + + offset = (unsigned long) entry / sizeof(unsigned long); + offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; + page = pmd_to_page((pmd_t *) entry); + return page->index + offset; +} + +/** + * __gmap_unlink_by_vmaddr - unlink a single segment via a host address + * @gmap: pointer to the guest address space structure + * @vmaddr: address in the host process address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) +{ + unsigned long *entry; + int flush = 0; + + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); + if (entry) { + flush = (*entry != _SEGMENT_ENTRY_INVALID); + *entry = _SEGMENT_ENTRY_INVALID; + } + spin_unlock(&gmap->guest_table_lock); + return flush; +} + +/** + * __gmap_unmap_by_gaddr - unmap a single segment via a guest address + * @gmap: pointer to the guest address space structure + * @gaddr: address in the guest address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + + vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; +} + /** * gmap_unmap_segment - unmap segment from the guest address space * @gmap: pointer to the guest address space structure @@ -302,7 +358,6 @@ static int gmap_alloc_table(struct gmap *gmap, */ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) { - unsigned long *table; unsigned long off; int flush; @@ -312,31 +367,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) return -EINVAL; flush = 0; - down_read(&gmap->mm->mmap_sem); - spin_lock(&gmap->mm->page_table_lock); - for (off = 0; off < len; off += PMD_SIZE) { - /* Walk the guest addr space page table */ - table = gmap->table + (((to + off) >> 53) & 0x7ff); - if (*table & _REGION_ENTRY_INVALID) - goto out; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 42) & 0x7ff); - if (*table & _REGION_ENTRY_INVALID) - goto out; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 31) & 0x7ff); - if (*table & _REGION_ENTRY_INVALID) - goto out; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 20) & 0x7ff); - - /* Clear segment table entry in guest address space. */ - flush |= gmap_unlink_segment(gmap, table); - *table = _SEGMENT_ENTRY_INVALID; - } -out: - spin_unlock(&gmap->mm->page_table_lock); - up_read(&gmap->mm->mmap_sem); + down_write(&gmap->mm->mmap_sem); + for (off = 0; off < len; off += PMD_SIZE) + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + up_write(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); return 0; @@ -355,7 +389,6 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len) { - unsigned long *table; unsigned long off; int flush; @@ -366,66 +399,26 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, return -EINVAL; flush = 0; - down_read(&gmap->mm->mmap_sem); - spin_lock(&gmap->mm->page_table_lock); + down_write(&gmap->mm->mmap_sem); for (off = 0; off < len; off += PMD_SIZE) { - /* Walk the gmap address space page table */ - table = gmap->table + (((to + off) >> 53) & 0x7ff); - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) - goto out_unmap; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 42) & 0x7ff); - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) - goto out_unmap; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 31) & 0x7ff); - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) - goto out_unmap; - table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 20) & 0x7ff); - - /* Store 'from' address in an invalid segment table entry. */ - flush |= gmap_unlink_segment(gmap, table); - *table = (from + off) | (_SEGMENT_ENTRY_INVALID | - _SEGMENT_ENTRY_PROTECT); + /* Remove old translation */ + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + /* Store new translation */ + if (radix_tree_insert(&gmap->guest_to_host, + (to + off) >> PMD_SHIFT, + (void *) from + off)) + break; } - spin_unlock(&gmap->mm->page_table_lock); - up_read(&gmap->mm->mmap_sem); + up_write(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); - return 0; - -out_unmap: - spin_unlock(&gmap->mm->page_table_lock); - up_read(&gmap->mm->mmap_sem); + if (off >= len) + return 0; gmap_unmap_segment(gmap, to, len); return -ENOMEM; } EXPORT_SYMBOL_GPL(gmap_map_segment); -static unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long *table; - - table = gmap->table + ((gaddr >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) - return ERR_PTR(-EFAULT); - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((gaddr >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) - return ERR_PTR(-EFAULT); - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((gaddr >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) - return ERR_PTR(-EFAULT); - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((gaddr >> 20) & 0x7ff); - return table; -} - /** * __gmap_translate - translate a guest address to a user space address * @gmap: pointer to guest mapping meta data structure @@ -439,25 +432,11 @@ static unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr) */ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) { - unsigned long *segment_ptr, vmaddr, segment; - struct gmap_pgtable *mp; - struct page *page; + unsigned long vmaddr; - current->thread.gmap_addr = gaddr; - segment_ptr = gmap_table_walk(gmap, gaddr); - if (IS_ERR(segment_ptr)) - return PTR_ERR(segment_ptr); - /* Convert the gmap address to an mm address. */ - segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INVALID)) { - page = pfn_to_page(segment >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - return mp->vmaddr | (gaddr & ~PMD_MASK); - } else if (segment & _SEGMENT_ENTRY_PROTECT) { - vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; - return vmaddr | (gaddr & ~PMD_MASK); - } - return -EFAULT; + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); + return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; } EXPORT_SYMBOL_GPL(__gmap_translate); @@ -481,125 +460,124 @@ unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) } EXPORT_SYMBOL_GPL(gmap_translate); -static int gmap_connect_pgtable(struct gmap *gmap, unsigned long gaddr, - unsigned long segment, - unsigned long *segment_ptr) +/** + * gmap_unlink - disconnect a page table from the gmap shadow tables + * @gmap: pointer to guest mapping meta data structure + * @table: pointer to the host page table + * @vmaddr: vm address associated with the host page table + */ +static void gmap_unlink(struct mm_struct *mm, unsigned long *table, + unsigned long vmaddr) +{ + struct gmap *gmap; + int flush; + + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); + if (flush) + gmap_flush_tlb(gmap); + } +} + +/** + * gmap_link - set up shadow page tables to connect a host to a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @vmaddr: vm address + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) { - unsigned long vmaddr; - struct vm_area_struct *vma; - struct gmap_pgtable *mp; - struct gmap_rmap *rmap; struct mm_struct *mm; - struct page *page; + unsigned long *table; + spinlock_t *ptl; pgd_t *pgd; pud_t *pud; pmd_t *pmd; + int rc; - mm = gmap->mm; - vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; - vma = find_vma(mm, vmaddr); - if (!vma || vma->vm_start > vmaddr) - return -EFAULT; + /* Create higher level tables in the gmap page table */ + table = gmap->table + ((gaddr >> 53) & 0x7ff); + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, + gaddr & 0xffe0000000000000)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((gaddr >> 42) & 0x7ff); + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, + gaddr & 0xfffffc0000000000)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((gaddr >> 31) & 0x7ff); + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, + gaddr & 0xffffffff80000000)) + return -ENOMEM; + table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); + table = table + ((gaddr >> 20) & 0x7ff); /* Walk the parent mm page table */ + mm = gmap->mm; pgd = pgd_offset(mm, vmaddr); - pud = pud_alloc(mm, pgd, vmaddr); - if (!pud) - return -ENOMEM; - pmd = pmd_alloc(mm, pud, vmaddr); - if (!pmd) - return -ENOMEM; - if (!pmd_present(*pmd) && - __pte_alloc(mm, vma, pmd, vmaddr)) - return -ENOMEM; + VM_BUG_ON(pgd_none(*pgd)); + pud = pud_offset(pgd, vmaddr); + VM_BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, vmaddr); + VM_BUG_ON(pmd_none(*pmd)); /* large pmds cannot yet be handled */ if (pmd_large(*pmd)) return -EFAULT; - /* pmd now points to a valid segment table entry. */ - rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); - if (!rmap) - return -ENOMEM; /* Link gmap segment table entry location to page table. */ - page = pmd_page(*pmd); - mp = (struct gmap_pgtable *) page->index; - rmap->gmap = gmap; - rmap->entry = segment_ptr; - rmap->vmaddr = gaddr & PMD_MASK; - spin_lock(&mm->page_table_lock); - if (*segment_ptr == segment) { - list_add(&rmap->list, &mp->mapper); - /* Set gmap segment table entry to page table. */ - *segment_ptr = pmd_val(*pmd) & PAGE_MASK; - rmap = NULL; - } - spin_unlock(&mm->page_table_lock); - kfree(rmap); - return 0; + rc = radix_tree_preload(GFP_KERNEL); + if (rc) + return rc; + ptl = pmd_lock(mm, pmd); + spin_lock(&gmap->guest_table_lock); + if (*table == _SEGMENT_ENTRY_INVALID) { + rc = radix_tree_insert(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT, table); + if (!rc) + *table = pmd_val(*pmd); + } else + rc = 0; + spin_unlock(&gmap->guest_table_lock); + spin_unlock(ptl); + radix_tree_preload_end(); + return rc; } -static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) -{ - struct gmap_rmap *rmap, *next; - struct gmap_pgtable *mp; - struct page *page; - int flush; - - flush = 0; - spin_lock(&mm->page_table_lock); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - list_for_each_entry_safe(rmap, next, &mp->mapper, list) { - *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | - _SEGMENT_ENTRY_PROTECT); - list_del(&rmap->list); - kfree(rmap); - flush = 1; - } - spin_unlock(&mm->page_table_lock); - if (flush) - __tlb_flush_global(); -} - -/* - * this function is assumed to be called with mmap_sem held +/** + * gmap_fault - resolve a fault on a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @fault_flags: flags to pass down to handle_mm_fault() + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. */ -unsigned long __gmap_fault(struct gmap *gmap, unsigned long gaddr) +int gmap_fault(struct gmap *gmap, unsigned long gaddr, + unsigned int fault_flags) { - unsigned long *segment_ptr, segment; - struct gmap_pgtable *mp; - struct page *page; + unsigned long vmaddr; int rc; - current->thread.gmap_addr = gaddr; - segment_ptr = gmap_table_walk(gmap, gaddr); - if (IS_ERR(segment_ptr)) - return -EFAULT; - /* Convert the gmap address to an mm address. */ - while (1) { - segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INVALID)) { - /* Page table is present */ - page = pfn_to_page(segment >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - return mp->vmaddr | (gaddr & ~PMD_MASK); - } - if (!(segment & _SEGMENT_ENTRY_PROTECT)) - /* Nothing mapped in the gmap address space. */ - break; - rc = gmap_connect_pgtable(gmap, gaddr, segment, segment_ptr); - if (rc) - return rc; - } - return -EFAULT; -} - -unsigned long gmap_fault(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long rc; - down_read(&gmap->mm->mmap_sem); - rc = __gmap_fault(gmap, gaddr); + vmaddr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(vmaddr)) { + rc = vmaddr; + goto out_up; + } + if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) { + rc = -EFAULT; + goto out_up; + } + rc = __gmap_link(gmap, gaddr, vmaddr); +out_up: up_read(&gmap->mm->mmap_sem); - return rc; } EXPORT_SYMBOL_GPL(gmap_fault); @@ -619,17 +597,24 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) free_swap_and_cache(entry); } -/** - * The mm->mmap_sem lock must be held +/* + * this function is assumed to be called with mmap_sem held */ -static void gmap_zap_unused(struct mm_struct *mm, unsigned long vmaddr) +void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { - unsigned long ptev, pgstev; + unsigned long vmaddr, ptev, pgstev; + pte_t *ptep, pte; spinlock_t *ptl; pgste_t pgste; - pte_t *ptep, pte; - ptep = get_locked_pte(mm, vmaddr, &ptl); + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (!vmaddr) + return; + vmaddr |= gaddr & ~PMD_MASK; + /* Get pointer to the page table entry */ + ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); if (unlikely(!ptep)) return; pte = *ptep; @@ -641,87 +626,34 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long vmaddr) ptev = pte_val(pte); if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { - gmap_zap_swap_entry(pte_to_swp_entry(pte), mm); - pte_clear(mm, vmaddr, ptep); + gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm); + pte_clear(gmap->mm, vmaddr, ptep); } pgste_set_unlock(ptep, pgste); out_pte: pte_unmap_unlock(*ptep, ptl); } - -/* - * this function is assumed to be called with mmap_sem held - */ -void __gmap_zap(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long *table, *segment_ptr; - unsigned long segment, vmaddr, pgstev, ptev; - struct gmap_pgtable *mp; - struct page *page; - - segment_ptr = gmap_table_walk(gmap, gaddr); - if (IS_ERR(segment_ptr)) - return; - segment = *segment_ptr; - if (segment & _SEGMENT_ENTRY_INVALID) - return; - page = pfn_to_page(segment >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - vmaddr = mp->vmaddr | (gaddr & ~PMD_MASK); - /* Page table is present */ - table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN); - table = table + ((vmaddr >> 12) & 0xff); - pgstev = table[PTRS_PER_PTE]; - ptev = table[0]; - /* quick check, checked again with locks held */ - if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || - ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) - gmap_zap_unused(gmap->mm, vmaddr); -} EXPORT_SYMBOL_GPL(__gmap_zap); void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) { - - unsigned long *table, gaddr, size; + unsigned long gaddr, vmaddr, size; struct vm_area_struct *vma; - struct gmap_pgtable *mp; - struct page *page; down_read(&gmap->mm->mmap_sem); - gaddr = from; - while (gaddr < to) { - /* Walk the gmap address space page table */ - table = gmap->table + ((gaddr >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) { - gaddr = (gaddr + PMD_SIZE) & PMD_MASK; + for (gaddr = from; gaddr < to; + gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (!vmaddr) continue; - } - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((gaddr >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) { - gaddr = (gaddr + PMD_SIZE) & PMD_MASK; - continue; - } - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((gaddr >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) { - gaddr = (gaddr + PMD_SIZE) & PMD_MASK; - continue; - } - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((gaddr >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { - gaddr = (gaddr + PMD_SIZE) & PMD_MASK; - continue; - } - page = pfn_to_page(*table >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - vma = find_vma(gmap->mm, mp->vmaddr); + vmaddr |= gaddr & ~PMD_MASK; + /* Find vma in the parent mm */ + vma = find_vma(gmap->mm, vmaddr); size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range(vma, mp->vmaddr | (gaddr & ~PMD_MASK), - size, NULL); - gaddr = (gaddr + PMD_SIZE) & PMD_MASK; + zap_page_range(vma, vmaddr, size, NULL); } up_read(&gmap->mm->mmap_sem); } @@ -778,7 +710,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) down_read(&gmap->mm->mmap_sem); while (len) { /* Convert gmap address and connect the page tables */ - addr = __gmap_fault(gmap, gaddr); + addr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(addr)) { rc = addr; break; @@ -788,6 +720,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) rc = -EFAULT; break; } + rc = __gmap_link(gmap, gaddr, addr); + if (rc) + break; /* Walk the process page table, lock and get pte pointer */ ptep = get_locked_pte(gmap->mm, addr, &ptl); if (unlikely(!ptep)) @@ -817,23 +752,24 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify); * This function is assumed to be called with the page table lock held * for the pte to notify. */ -void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) +void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) { - unsigned long segment_offset; + unsigned long offset, gaddr; + unsigned long *table; struct gmap_notifier *nb; - struct gmap_pgtable *mp; - struct gmap_rmap *rmap; - struct page *page; + struct gmap *gmap; - segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); - segment_offset = segment_offset * (4096 / sizeof(pte_t)); - page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; + offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + offset = offset * (4096 / sizeof(pte_t)); spin_lock(&gmap_notifier_lock); - list_for_each_entry(rmap, &mp->mapper, list) { + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + table = radix_tree_lookup(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (!table) + continue; + gaddr = __gmap_segment_gaddr(table) + offset; list_for_each_entry(nb, &gmap_notifier_list, list) - nb->notifier_call(rmap->gmap, - rmap->vmaddr + segment_offset); + nb->notifier_call(gmap, gaddr); } spin_unlock(&gmap_notifier_lock); } @@ -844,29 +780,18 @@ static inline int page_table_with_pgste(struct page *page) return atomic_read(&page->_mapcount) == 0; } -static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, - unsigned long vmaddr) +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) { struct page *page; unsigned long *table; - struct gmap_pgtable *mp; page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; - mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); - if (!mp) { - __free_page(page); - return NULL; - } if (!pgtable_page_ctor(page)) { - kfree(mp); __free_page(page); return NULL; } - mp->vmaddr = vmaddr & PMD_MASK; - INIT_LIST_HEAD(&mp->mapper); - page->index = (unsigned long) mp; atomic_set(&page->_mapcount, 0); table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); @@ -877,14 +802,10 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, static inline void page_table_free_pgste(unsigned long *table) { struct page *page; - struct gmap_pgtable *mp; page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - BUG_ON(!list_empty(&mp->mapper)); pgtable_page_dtor(page); atomic_set(&page->_mapcount, -1); - kfree(mp); __free_page(page); } @@ -1041,8 +962,7 @@ static inline int page_table_with_pgste(struct page *page) return 0; } -static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, - unsigned long vmaddr) +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) { return NULL; } @@ -1056,8 +976,8 @@ static inline void page_table_free_pgste(unsigned long *table) { } -static inline void gmap_disconnect_pgtable(struct mm_struct *mm, - unsigned long *table) +static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, + unsigned long vmaddr) { } @@ -1077,14 +997,14 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) /* * page table entry allocation/free routines. */ -unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) +unsigned long *page_table_alloc(struct mm_struct *mm) { unsigned long *uninitialized_var(table); struct page *uninitialized_var(page); unsigned int mask, bit; if (mm_has_pgste(mm)) - return page_table_alloc_pgste(mm, vmaddr); + return page_table_alloc_pgste(mm); /* Allocate fragments of a 4K page as 1K/2K page table */ spin_lock_bh(&mm->context.list_lock); mask = FRAG_MASK; @@ -1126,10 +1046,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) unsigned int bit, mask; page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (page_table_with_pgste(page)) { - gmap_disconnect_pgtable(mm, table); + if (page_table_with_pgste(page)) return page_table_free_pgste(table); - } /* Free 1K/2K page table fragment of a 4K page */ bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); spin_lock_bh(&mm->context.list_lock); @@ -1161,7 +1079,8 @@ static void __page_table_free_rcu(void *table, unsigned bit) } } -void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, + unsigned long vmaddr) { struct mm_struct *mm; struct page *page; @@ -1170,7 +1089,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) mm = tlb->mm; page = pfn_to_page(__pa(table) >> PAGE_SHIFT); if (page_table_with_pgste(page)) { - gmap_disconnect_pgtable(mm, table); + gmap_unlink(mm, table, vmaddr); table = (unsigned long *) (__pa(table) | FRAG_MASK); tlb_remove_table(tlb, table); return; @@ -1306,7 +1225,7 @@ again: if (page_table_with_pgste(page)) continue; /* Allocate new page table with pgstes */ - new = page_table_alloc_pgste(mm, addr); + new = page_table_alloc_pgste(mm); if (!new) return -ENOMEM; @@ -1321,7 +1240,7 @@ again: /* Establish new table */ pmd_populate(mm, pmd, (pte_t *) new); /* Free old table with rcu, there might be a walker! */ - page_table_free_rcu(tlb, table); + page_table_free_rcu(tlb, table, addr); new = NULL; } spin_unlock(ptl); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index fe9012a49aa5..fdbd7888cb07 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -65,7 +65,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address) pte_t *pte; if (slab_is_available()) - pte = (pte_t *) page_table_alloc(&init_mm, address); + pte = (pte_t *) page_table_alloc(&init_mm); else pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t), PTRS_PER_PTE * sizeof(pte_t));