summaryrefslogtreecommitdiff
path: root/arch/s390/mm/pgtable.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r--arch/s390/mm/pgtable.c713
1 files changed, 337 insertions, 376 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 37b8241ec784..296b61a4af59 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -145,30 +145,56 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
/**
* gmap_alloc - allocate a guest address space
* @mm: pointer to the parent mm_struct
+ * @limit: maximum size of the gmap address space
*
* Returns a guest address space structure.
*/
-struct gmap *gmap_alloc(struct mm_struct *mm)
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
{
struct gmap *gmap;
struct page *page;
unsigned long *table;
-
+ unsigned long etype, atype;
+
+ if (limit < (1UL << 31)) {
+ limit = (1UL << 31) - 1;
+ atype = _ASCE_TYPE_SEGMENT;
+ etype = _SEGMENT_ENTRY_EMPTY;
+ } else if (limit < (1UL << 42)) {
+ limit = (1UL << 42) - 1;
+ atype = _ASCE_TYPE_REGION3;
+ etype = _REGION3_ENTRY_EMPTY;
+ } else if (limit < (1UL << 53)) {
+ limit = (1UL << 53) - 1;
+ atype = _ASCE_TYPE_REGION2;
+ etype = _REGION2_ENTRY_EMPTY;
+ } else {
+ limit = -1UL;
+ atype = _ASCE_TYPE_REGION1;
+ etype = _REGION1_ENTRY_EMPTY;
+ }
gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
if (!gmap)
goto out;
INIT_LIST_HEAD(&gmap->crst_list);
+ INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
+ INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+ spin_lock_init(&gmap->guest_table_lock);
gmap->mm = mm;
page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
if (!page)
goto out_free;
+ page->index = 0;
list_add(&page->lru, &gmap->crst_list);
table = (unsigned long *) page_to_phys(page);
- crst_table_init(table, _REGION1_ENTRY_EMPTY);
+ crst_table_init(table, etype);
gmap->table = table;
- gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | __pa(table);
+ gmap->asce = atype | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | __pa(table);
+ gmap->asce_end = limit;
+ down_write(&mm->mmap_sem);
list_add(&gmap->list, &mm->context.gmap_list);
+ up_write(&mm->mmap_sem);
return gmap;
out_free:
@@ -178,36 +204,38 @@ out:
}
EXPORT_SYMBOL_GPL(gmap_alloc);
-static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
-{
- struct gmap_pgtable *mp;
- struct gmap_rmap *rmap;
- struct page *page;
-
- if (*table & _SEGMENT_ENTRY_INVALID)
- return 0;
- page = pfn_to_page(*table >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- list_for_each_entry(rmap, &mp->mapper, list) {
- if (rmap->entry != table)
- continue;
- list_del(&rmap->list);
- kfree(rmap);
- break;
- }
- *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
- return 1;
-}
-
static void gmap_flush_tlb(struct gmap *gmap)
{
if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
- _ASCE_TYPE_REGION1);
+ __tlb_flush_asce(gmap->mm, gmap->asce);
else
__tlb_flush_global();
}
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+ struct radix_tree_iter iter;
+ unsigned long indices[16];
+ unsigned long index;
+ void **slot;
+ int i, nr;
+
+ /* A radix tree is freed by deleting all of its entries */
+ index = 0;
+ do {
+ nr = 0;
+ radix_tree_for_each_slot(slot, root, &iter, index) {
+ indices[nr] = iter.index;
+ if (++nr == 16)
+ break;
+ }
+ for (i = 0; i < nr; i++) {
+ index = indices[i];
+ radix_tree_delete(root, index);
+ }
+ } while (nr > 0);
+}
+
/**
* gmap_free - free a guest address space
* @gmap: pointer to the guest address space structure
@@ -215,31 +243,21 @@ static void gmap_flush_tlb(struct gmap *gmap)
void gmap_free(struct gmap *gmap)
{
struct page *page, *next;
- unsigned long *table;
- int i;
-
/* Flush tlb. */
if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
- _ASCE_TYPE_REGION1);
+ __tlb_flush_asce(gmap->mm, gmap->asce);
else
__tlb_flush_global();
/* Free all segment & region tables. */
- down_read(&gmap->mm->mmap_sem);
- spin_lock(&gmap->mm->page_table_lock);
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
- table = (unsigned long *) page_to_phys(page);
- if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
- /* Remove gmap rmap structures for segment table. */
- for (i = 0; i < PTRS_PER_PMD; i++, table++)
- gmap_unlink_segment(gmap, table);
+ list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
__free_pages(page, ALLOC_ORDER);
- }
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ gmap_radix_tree_free(&gmap->guest_to_host);
+ gmap_radix_tree_free(&gmap->host_to_guest);
+ down_write(&gmap->mm->mmap_sem);
list_del(&gmap->list);
+ up_write(&gmap->mm->mmap_sem);
kfree(gmap);
}
EXPORT_SYMBOL_GPL(gmap_free);
@@ -267,42 +285,97 @@ EXPORT_SYMBOL_GPL(gmap_disable);
/*
* gmap_alloc_table is assumed to be called with mmap_sem held
*/
-static int gmap_alloc_table(struct gmap *gmap,
- unsigned long *table, unsigned long init)
- __releases(&gmap->mm->page_table_lock)
- __acquires(&gmap->mm->page_table_lock)
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+ unsigned long init, unsigned long gaddr)
{
struct page *page;
unsigned long *new;
/* since we dont free the gmap table until gmap_free we can unlock */
- spin_unlock(&gmap->mm->page_table_lock);
page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
- spin_lock(&gmap->mm->page_table_lock);
if (!page)
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
crst_table_init(new, init);
+ spin_lock(&gmap->mm->page_table_lock);
if (*table & _REGION_ENTRY_INVALID) {
list_add(&page->lru, &gmap->crst_list);
*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
- } else
+ page->index = gaddr;
+ page = NULL;
+ }
+ spin_unlock(&gmap->mm->page_table_lock);
+ if (page)
__free_pages(page, ALLOC_ORDER);
return 0;
}
/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+ struct page *page;
+ unsigned long offset;
+
+ offset = (unsigned long) entry / sizeof(unsigned long);
+ offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+ page = pmd_to_page((pmd_t *) entry);
+ return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+ unsigned long *entry;
+ int flush = 0;
+
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+ if (entry) {
+ flush = (*entry != _SEGMENT_ENTRY_INVALID);
+ *entry = _SEGMENT_ENTRY_INVALID;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
+/**
* gmap_unmap_segment - unmap segment from the guest address space
* @gmap: pointer to the guest address space structure
- * @addr: address in the guest address space
+ * @to: address in the guest address space
* @len: length of the memory area to unmap
*
* Returns 0 if the unmap succeeded, -EINVAL if not.
*/
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
{
- unsigned long *table;
unsigned long off;
int flush;
@@ -312,31 +385,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
return -EINVAL;
flush = 0;
- down_read(&gmap->mm->mmap_sem);
- spin_lock(&gmap->mm->page_table_lock);
- for (off = 0; off < len; off += PMD_SIZE) {
- /* Walk the guest addr space page table */
- table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if (*table & _REGION_ENTRY_INVALID)
- goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 42) & 0x7ff);
- if (*table & _REGION_ENTRY_INVALID)
- goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 31) & 0x7ff);
- if (*table & _REGION_ENTRY_INVALID)
- goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 20) & 0x7ff);
-
- /* Clear segment table entry in guest address space. */
- flush |= gmap_unlink_segment(gmap, table);
- *table = _SEGMENT_ENTRY_INVALID;
- }
-out:
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE)
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ up_write(&gmap->mm->mmap_sem);
if (flush)
gmap_flush_tlb(gmap);
return 0;
@@ -348,87 +400,47 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
* @gmap: pointer to the guest address space structure
* @from: source address in the parent address space
* @to: target address in the guest address space
+ * @len: length of the memory area to map
*
* Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
*/
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len)
{
- unsigned long *table;
unsigned long off;
int flush;
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
- if (len == 0 || from + len > TASK_MAX_SIZE ||
- from + len < from || to + len < to)
+ if (len == 0 || from + len < from || to + len < to ||
+ from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
return -EINVAL;
flush = 0;
- down_read(&gmap->mm->mmap_sem);
- spin_lock(&gmap->mm->page_table_lock);
+ down_write(&gmap->mm->mmap_sem);
for (off = 0; off < len; off += PMD_SIZE) {
- /* Walk the gmap address space page table */
- table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
- goto out_unmap;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 42) & 0x7ff);
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
- goto out_unmap;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 31) & 0x7ff);
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
- goto out_unmap;
- table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 20) & 0x7ff);
-
- /* Store 'from' address in an invalid segment table entry. */
- flush |= gmap_unlink_segment(gmap, table);
- *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
- _SEGMENT_ENTRY_PROTECT);
+ /* Remove old translation */
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ /* Store new translation */
+ if (radix_tree_insert(&gmap->guest_to_host,
+ (to + off) >> PMD_SHIFT,
+ (void *) from + off))
+ break;
}
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ up_write(&gmap->mm->mmap_sem);
if (flush)
gmap_flush_tlb(gmap);
- return 0;
-
-out_unmap:
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ if (off >= len)
+ return 0;
gmap_unmap_segment(gmap, to, len);
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(gmap_map_segment);
-static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
-{
- unsigned long *table;
-
- table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return ERR_PTR(-EFAULT);
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return ERR_PTR(-EFAULT);
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return ERR_PTR(-EFAULT);
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 20) & 0x7ff);
- return table;
-}
-
/**
* __gmap_translate - translate a guest address to a user space address
- * @address: guest address
* @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
*
* Returns user space address which corresponds to the guest address or
* -EFAULT if no such mapping exists.
@@ -436,168 +448,161 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
* The mmap_sem of the mm that belongs to the address space must be held
* when this function gets called.
*/
-unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
{
- unsigned long *segment_ptr, vmaddr, segment;
- struct gmap_pgtable *mp;
- struct page *page;
+ unsigned long vmaddr;
- current->thread.gmap_addr = address;
- segment_ptr = gmap_table_walk(address, gmap);
- if (IS_ERR(segment_ptr))
- return PTR_ERR(segment_ptr);
- /* Convert the gmap address to an mm address. */
- segment = *segment_ptr;
- if (!(segment & _SEGMENT_ENTRY_INVALID)) {
- page = pfn_to_page(segment >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- return mp->vmaddr | (address & ~PMD_MASK);
- } else if (segment & _SEGMENT_ENTRY_PROTECT) {
- vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
- return vmaddr | (address & ~PMD_MASK);
- }
- return -EFAULT;
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+ return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
}
EXPORT_SYMBOL_GPL(__gmap_translate);
/**
* gmap_translate - translate a guest address to a user space address
- * @address: guest address
* @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
*
* Returns user space address which corresponds to the guest address or
* -EFAULT if no such mapping exists.
* This function does not establish potentially missing page table entries.
*/
-unsigned long gmap_translate(unsigned long address, struct gmap *gmap)
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
{
unsigned long rc;
down_read(&gmap->mm->mmap_sem);
- rc = __gmap_translate(address, gmap);
+ rc = __gmap_translate(gmap, gaddr);
up_read(&gmap->mm->mmap_sem);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_translate);
-static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
- unsigned long *segment_ptr, struct gmap *gmap)
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct gmap *gmap;
+ int flush;
+
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ }
+}
+
+/**
+ * gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
{
- unsigned long vmaddr;
- struct vm_area_struct *vma;
- struct gmap_pgtable *mp;
- struct gmap_rmap *rmap;
struct mm_struct *mm;
- struct page *page;
+ unsigned long *table;
+ spinlock_t *ptl;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
+ int rc;
- mm = gmap->mm;
- vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
- vma = find_vma(mm, vmaddr);
- if (!vma || vma->vm_start > vmaddr)
- return -EFAULT;
+ /* Create higher level tables in the gmap page table */
+ table = gmap->table;
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+ table += (gaddr >> 53) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+ gaddr & 0xffe0000000000000))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+ table += (gaddr >> 42) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+ gaddr & 0xfffffc0000000000))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+ table += (gaddr >> 31) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+ gaddr & 0xffffffff80000000))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ table += (gaddr >> 20) & 0x7ff;
/* Walk the parent mm page table */
+ mm = gmap->mm;
pgd = pgd_offset(mm, vmaddr);
- pud = pud_alloc(mm, pgd, vmaddr);
- if (!pud)
- return -ENOMEM;
- pmd = pmd_alloc(mm, pud, vmaddr);
- if (!pmd)
- return -ENOMEM;
- if (!pmd_present(*pmd) &&
- __pte_alloc(mm, vma, pmd, vmaddr))
- return -ENOMEM;
+ VM_BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd, vmaddr);
+ VM_BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, vmaddr);
+ VM_BUG_ON(pmd_none(*pmd));
/* large pmds cannot yet be handled */
if (pmd_large(*pmd))
return -EFAULT;
- /* pmd now points to a valid segment table entry. */
- rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
- if (!rmap)
- return -ENOMEM;
/* Link gmap segment table entry location to page table. */
- page = pmd_page(*pmd);
- mp = (struct gmap_pgtable *) page->index;
- rmap->gmap = gmap;
- rmap->entry = segment_ptr;
- rmap->vmaddr = address & PMD_MASK;
- spin_lock(&mm->page_table_lock);
- if (*segment_ptr == segment) {
- list_add(&rmap->list, &mp->mapper);
- /* Set gmap segment table entry to page table. */
- *segment_ptr = pmd_val(*pmd) & PAGE_MASK;
- rmap = NULL;
- }
- spin_unlock(&mm->page_table_lock);
- kfree(rmap);
- return 0;
-}
-
-static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
-{
- struct gmap_rmap *rmap, *next;
- struct gmap_pgtable *mp;
- struct page *page;
- int flush;
-
- flush = 0;
- spin_lock(&mm->page_table_lock);
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
- *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
- _SEGMENT_ENTRY_PROTECT);
- list_del(&rmap->list);
- kfree(rmap);
- flush = 1;
- }
- spin_unlock(&mm->page_table_lock);
- if (flush)
- __tlb_flush_global();
+ rc = radix_tree_preload(GFP_KERNEL);
+ if (rc)
+ return rc;
+ ptl = pmd_lock(mm, pmd);
+ spin_lock(&gmap->guest_table_lock);
+ if (*table == _SEGMENT_ENTRY_INVALID) {
+ rc = radix_tree_insert(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT, table);
+ if (!rc)
+ *table = pmd_val(*pmd);
+ } else
+ rc = 0;
+ spin_unlock(&gmap->guest_table_lock);
+ spin_unlock(ptl);
+ radix_tree_preload_end();
+ return rc;
}
-/*
- * this function is assumed to be called with mmap_sem held
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
*/
-unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+ unsigned int fault_flags)
{
- unsigned long *segment_ptr, segment;
- struct gmap_pgtable *mp;
- struct page *page;
+ unsigned long vmaddr;
int rc;
- current->thread.gmap_addr = address;
- segment_ptr = gmap_table_walk(address, gmap);
- if (IS_ERR(segment_ptr))
- return -EFAULT;
- /* Convert the gmap address to an mm address. */
- while (1) {
- segment = *segment_ptr;
- if (!(segment & _SEGMENT_ENTRY_INVALID)) {
- /* Page table is present */
- page = pfn_to_page(segment >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- return mp->vmaddr | (address & ~PMD_MASK);
- }
- if (!(segment & _SEGMENT_ENTRY_PROTECT))
- /* Nothing mapped in the gmap address space. */
- break;
- rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
- if (rc)
- return rc;
- }
- return -EFAULT;
-}
-
-unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
-{
- unsigned long rc;
-
down_read(&gmap->mm->mmap_sem);
- rc = __gmap_fault(address, gmap);
+ vmaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(vmaddr)) {
+ rc = vmaddr;
+ goto out_up;
+ }
+ if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
+ rc = -EFAULT;
+ goto out_up;
+ }
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
up_read(&gmap->mm->mmap_sem);
-
return rc;
}
EXPORT_SYMBOL_GPL(gmap_fault);
@@ -617,17 +622,24 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
free_swap_and_cache(entry);
}
-/**
- * The mm->mmap_sem lock must be held
+/*
+ * this function is assumed to be called with mmap_sem held
*/
-static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
{
- unsigned long ptev, pgstev;
+ unsigned long vmaddr, ptev, pgstev;
+ pte_t *ptep, pte;
spinlock_t *ptl;
pgste_t pgste;
- pte_t *ptep, pte;
- ptep = get_locked_pte(mm, address, &ptl);
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (!vmaddr)
+ return;
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
if (unlikely(!ptep))
return;
pte = *ptep;
@@ -639,87 +651,34 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
ptev = pte_val(pte);
if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
- gmap_zap_swap_entry(pte_to_swp_entry(pte), mm);
- pte_clear(mm, address, ptep);
+ gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
+ pte_clear(gmap->mm, vmaddr, ptep);
}
pgste_set_unlock(ptep, pgste);
out_pte:
pte_unmap_unlock(*ptep, ptl);
}
-
-/*
- * this function is assumed to be called with mmap_sem held
- */
-void __gmap_zap(unsigned long address, struct gmap *gmap)
-{
- unsigned long *table, *segment_ptr;
- unsigned long segment, pgstev, ptev;
- struct gmap_pgtable *mp;
- struct page *page;
-
- segment_ptr = gmap_table_walk(address, gmap);
- if (IS_ERR(segment_ptr))
- return;
- segment = *segment_ptr;
- if (segment & _SEGMENT_ENTRY_INVALID)
- return;
- page = pfn_to_page(segment >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- address = mp->vmaddr | (address & ~PMD_MASK);
- /* Page table is present */
- table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
- table = table + ((address >> 12) & 0xff);
- pgstev = table[PTRS_PER_PTE];
- ptev = table[0];
- /* quick check, checked again with locks held */
- if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
- ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
- gmap_zap_unused(gmap->mm, address);
-}
EXPORT_SYMBOL_GPL(__gmap_zap);
-void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
{
-
- unsigned long *table, address, size;
+ unsigned long gaddr, vmaddr, size;
struct vm_area_struct *vma;
- struct gmap_pgtable *mp;
- struct page *page;
down_read(&gmap->mm->mmap_sem);
- address = from;
- while (address < to) {
- /* Walk the gmap address space page table */
- table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
+ for (gaddr = from; gaddr < to;
+ gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (!vmaddr)
continue;
- }
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- page = pfn_to_page(*table >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- vma = find_vma(gmap->mm, mp->vmaddr);
- size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
- zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
- size, NULL);
- address = (address + PMD_SIZE) & PMD_MASK;
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Find vma in the parent mm */
+ vma = find_vma(gmap->mm, vmaddr);
+ size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+ zap_page_range(vma, vmaddr, size, NULL);
}
up_read(&gmap->mm->mmap_sem);
}
@@ -755,7 +714,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
/**
* gmap_ipte_notify - mark a range of ptes for invalidation notification
* @gmap: pointer to guest mapping meta data structure
- * @start: virtual address in the guest address space
+ * @gaddr: virtual address in the guest address space
* @len: size of area
*
* Returns 0 if for each page in the given range a gmap mapping exists and
@@ -763,7 +722,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
* for one or more pages -EFAULT is returned. If no memory could be allocated
* -ENOMEM is returned. This function establishes missing page table entries.
*/
-int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
+int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
{
unsigned long addr;
spinlock_t *ptl;
@@ -771,12 +730,12 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
pgste_t pgste;
int rc = 0;
- if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK))
+ if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
return -EINVAL;
down_read(&gmap->mm->mmap_sem);
while (len) {
/* Convert gmap address and connect the page tables */
- addr = __gmap_fault(start, gmap);
+ addr = __gmap_translate(gmap, gaddr);
if (IS_ERR_VALUE(addr)) {
rc = addr;
break;
@@ -786,6 +745,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
rc = -EFAULT;
break;
}
+ rc = __gmap_link(gmap, gaddr, addr);
+ if (rc)
+ break;
/* Walk the process page table, lock and get pte pointer */
ptep = get_locked_pte(gmap->mm, addr, &ptl);
if (unlikely(!ptep))
@@ -796,7 +758,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
pgste = pgste_get_lock(ptep);
pgste_val(pgste) |= PGSTE_IN_BIT;
pgste_set_unlock(ptep, pgste);
- start += PAGE_SIZE;
+ gaddr += PAGE_SIZE;
len -= PAGE_SIZE;
}
spin_unlock(ptl);
@@ -809,28 +771,30 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify);
/**
* gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
* @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
* @pte: pointer to the page table entry
*
* This function is assumed to be called with the page table lock held
* for the pte to notify.
*/
-void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
+void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
{
- unsigned long segment_offset;
+ unsigned long offset, gaddr;
+ unsigned long *table;
struct gmap_notifier *nb;
- struct gmap_pgtable *mp;
- struct gmap_rmap *rmap;
- struct page *page;
+ struct gmap *gmap;
- segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
- segment_offset = segment_offset * (4096 / sizeof(pte_t));
- page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
+ offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+ offset = offset * (4096 / sizeof(pte_t));
spin_lock(&gmap_notifier_lock);
- list_for_each_entry(rmap, &mp->mapper, list) {
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ table = radix_tree_lookup(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (!table)
+ continue;
+ gaddr = __gmap_segment_gaddr(table) + offset;
list_for_each_entry(nb, &gmap_notifier_list, list)
- nb->notifier_call(rmap->gmap,
- rmap->vmaddr + segment_offset);
+ nb->notifier_call(gmap, gaddr);
}
spin_unlock(&gmap_notifier_lock);
}
@@ -841,29 +805,18 @@ static inline int page_table_with_pgste(struct page *page)
return atomic_read(&page->_mapcount) == 0;
}
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
- unsigned long vmaddr)
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
{
struct page *page;
unsigned long *table;
- struct gmap_pgtable *mp;
page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (!page)
return NULL;
- mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
- if (!mp) {
- __free_page(page);
- return NULL;
- }
if (!pgtable_page_ctor(page)) {
- kfree(mp);
__free_page(page);
return NULL;
}
- mp->vmaddr = vmaddr & PMD_MASK;
- INIT_LIST_HEAD(&mp->mapper);
- page->index = (unsigned long) mp;
atomic_set(&page->_mapcount, 0);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
@@ -874,14 +827,10 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
static inline void page_table_free_pgste(unsigned long *table)
{
struct page *page;
- struct gmap_pgtable *mp;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- BUG_ON(!list_empty(&mp->mapper));
pgtable_page_dtor(page);
atomic_set(&page->_mapcount, -1);
- kfree(mp);
__free_page(page);
}
@@ -986,11 +935,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pte_t *ptep;
down_read(&mm->mmap_sem);
+retry:
ptep = get_locked_pte(current->mm, addr, &ptl);
if (unlikely(!ptep)) {
up_read(&mm->mmap_sem);
return -EFAULT;
}
+ if (!(pte_val(*ptep) & _PAGE_INVALID) &&
+ (pte_val(*ptep) & _PAGE_PROTECT)) {
+ pte_unmap_unlock(*ptep, ptl);
+ if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
+ up_read(&mm->mmap_sem);
+ return -EFAULT;
+ }
+ goto retry;
+ }
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
@@ -1028,8 +987,7 @@ static inline int page_table_with_pgste(struct page *page)
return 0;
}
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
- unsigned long vmaddr)
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
{
return NULL;
}
@@ -1043,8 +1001,8 @@ static inline void page_table_free_pgste(unsigned long *table)
{
}
-static inline void gmap_disconnect_pgtable(struct mm_struct *mm,
- unsigned long *table)
+static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+ unsigned long vmaddr)
{
}
@@ -1064,14 +1022,14 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
/*
* page table entry allocation/free routines.
*/
-unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
+unsigned long *page_table_alloc(struct mm_struct *mm)
{
unsigned long *uninitialized_var(table);
struct page *uninitialized_var(page);
unsigned int mask, bit;
if (mm_has_pgste(mm))
- return page_table_alloc_pgste(mm, vmaddr);
+ return page_table_alloc_pgste(mm);
/* Allocate fragments of a 4K page as 1K/2K page table */
spin_lock_bh(&mm->context.list_lock);
mask = FRAG_MASK;
@@ -1113,10 +1071,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
unsigned int bit, mask;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page)) {
- gmap_disconnect_pgtable(mm, table);
+ if (page_table_with_pgste(page))
return page_table_free_pgste(table);
- }
/* Free 1K/2K page table fragment of a 4K page */
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
spin_lock_bh(&mm->context.list_lock);
@@ -1148,7 +1104,8 @@ static void __page_table_free_rcu(void *table, unsigned bit)
}
}
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+ unsigned long vmaddr)
{
struct mm_struct *mm;
struct page *page;
@@ -1157,7 +1114,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
mm = tlb->mm;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
if (page_table_with_pgste(page)) {
- gmap_disconnect_pgtable(mm, table);
+ gmap_unlink(mm, table, vmaddr);
table = (unsigned long *) (__pa(table) | FRAG_MASK);
tlb_remove_table(tlb, table);
return;
@@ -1279,6 +1236,7 @@ static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
{
unsigned long next, *table, *new;
struct page *page;
+ spinlock_t *ptl;
pmd_t *pmd;
pmd = pmd_offset(pud, addr);
@@ -1292,11 +1250,11 @@ again:
if (page_table_with_pgste(page))
continue;
/* Allocate new page table with pgstes */
- new = page_table_alloc_pgste(mm, addr);
+ new = page_table_alloc_pgste(mm);
if (!new)
return -ENOMEM;
- spin_lock(&mm->page_table_lock);
+ ptl = pmd_lock(mm, pmd);
if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
/* Nuke pmd entry pointing to the "short" page table */
pmdp_flush_lazy(mm, addr, pmd);
@@ -1307,10 +1265,10 @@ again:
/* Establish new table */
pmd_populate(mm, pmd, (pte_t *) new);
/* Free old table with rcu, there might be a walker! */
- page_table_free_rcu(tlb, table);
+ page_table_free_rcu(tlb, table, addr);
new = NULL;
}
- spin_unlock(&mm->page_table_lock);
+ spin_unlock(ptl);
if (new) {
page_table_free_pgste(new);
goto again;
@@ -1432,6 +1390,9 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
{
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ entry = pmd_mkyoung(entry);
+ if (dirty)
+ entry = pmd_mkdirty(entry);
if (pmd_same(*pmdp, entry))
return 0;
pmdp_invalidate(vma, address, pmdp);