From 289d1054e972b445fe8f7bbcbebf40b1bec37384 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Thu, 7 Mar 2013 08:53:00 +1100 Subject: lguest: fix paths in comments After commit 07fe997, lguest tool has already moved from Documentation/virtual/lguest/ to tools/lguest/. Signed-off-by: Wanlong Gao Signed-off-by: Rusty Russell --- drivers/lguest/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig index 89875ea19ade..ee035ec4526b 100644 --- a/drivers/lguest/Kconfig +++ b/drivers/lguest/Kconfig @@ -5,10 +5,9 @@ config LGUEST ---help--- This is a very simple module which allows you to run multiple instances of the same Linux kernel, using the - "lguest" command found in the Documentation/virtual/lguest - directory. + "lguest" command found in the tools/lguest directory. Note that "lguest" is pronounced to rhyme with "fell quest", - not "rustyvisor". See Documentation/virtual/lguest/lguest.txt. + not "rustyvisor". See tools/lguest/lguest.txt. If unsure, say N. If curious, say M. If masochistic, say Y. -- cgit v1.2.1 From 406a590ba105bfb7b67952f0a5f948e0d374e03e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:37 +0930 Subject: lguest: prepare to make SWITCHER_ADDR a variable. We currently use the whole top PGD entry for the switcher, but that's hitting the fixmap in some configurations (mainly, large NR_CPUS). Introduce a variable, currently set to the constant. Signed-off-by: Rusty Russell --- drivers/lguest/core.c | 18 ++++++++++-------- drivers/lguest/x86/core.c | 4 ++-- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index a5ebc0083d87..099252301132 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -20,7 +20,7 @@ #include #include "lg.h" - +unsigned long switcher_addr; static struct vm_struct *switcher_vma; static struct page **switcher_page; @@ -75,25 +75,27 @@ static __init int map_switcher(void) } } + switcher_addr = SWITCHER_ADDR; + /* * First we check that the Switcher won't overlap the fixmap area at * the top of memory. It's currently nowhere near, but it could have * very strange effects if it ever happened. */ - if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ + if (switcher_addr + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ err = -ENOMEM; printk("lguest: mapping switcher would thwack fixmap\n"); goto free_pages; } /* - * Now we reserve the "virtual memory area" we want: 0xFFC00000 - * (SWITCHER_ADDR). We might not get it in theory, but in practice - * it's worked so far. The end address needs +1 because __get_vm_area - * allocates an extra guard page, so we need space for that. + * Now we reserve the "virtual memory area" we want. We might + * not get it in theory, but in practice it's worked so far. + * The end address needs +1 because __get_vm_area allocates an + * extra guard page, so we need space for that. */ switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, - VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR + VM_ALLOC, switcher_addr, switcher_addr + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); if (!switcher_vma) { err = -ENOMEM; @@ -103,7 +105,7 @@ static __init int map_switcher(void) /* * This code actually sets up the pages we've allocated to appear at - * SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the + * switcher_addr. map_vm_area() takes the vma we allocated above, the * kind of pages we're mapping (kernel pages), and a pointer to our * array of struct pages. It increments that pointer, but we don't * care. diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 4af12e1844d5..20fae765d600 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -59,14 +59,14 @@ static struct { /* Offset from where switcher.S was compiled to where we've copied it */ static unsigned long switcher_offset(void) { - return SWITCHER_ADDR - (unsigned long)start_switcher_text; + return switcher_addr - (unsigned long)start_switcher_text; } /* This cpu's struct lguest_pages. */ static struct lguest_pages *lguest_pages(unsigned int cpu) { return &(((struct lguest_pages *) - (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); + (switcher_addr + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); } static DEFINE_PER_CPU(struct lg_cpu *, lg_last_cpu); -- cgit v1.2.1 From 68a644d734e61f38b686cb755bd2a3f43d9372f4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:37 +0930 Subject: lguest: check vaddr not pgd for Switcher protection. We currently assume that the Switcher the top pgd; we want to remove this assumption, so check that vaddr is OK, rather then checking pgd index. Signed-off-by: Rusty Russell --- drivers/lguest/page_tables.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 3b62be160a6e..a2454a24a10c 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -95,13 +95,6 @@ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) { unsigned int index = pgd_index(vaddr); -#ifndef CONFIG_X86_PAE - /* We kill any Guest trying to touch the Switcher addresses. */ - if (index >= SWITCHER_PGD_INDEX) { - kill_guest(cpu, "attempt to access switcher pages"); - index = 0; - } -#endif /* Return a pointer index'th pgd entry for the i'th page table. */ return &cpu->lg->pgdirs[i].pgdir[index]; } @@ -117,13 +110,6 @@ static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) unsigned int index = pmd_index(vaddr); pmd_t *page; - /* We kill any Guest trying to touch the Switcher addresses. */ - if (pgd_index(vaddr) == SWITCHER_PGD_INDEX && - index >= SWITCHER_PMD_INDEX) { - kill_guest(cpu, "attempt to access switcher pages"); - index = 0; - } - /* You should never call this if the PGD entry wasn't valid */ BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT)); page = __va(pgd_pfn(spgd) << PAGE_SHIFT); @@ -323,6 +309,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) pmd_t gpmd; #endif + /* We never demand page the Switcher, so trying is a mistake. */ + if (vaddr >= switcher_addr) + return false; + /* First step: get the top-level Guest page table entry. */ if (unlikely(cpu->linear_pages)) { /* Faking up a linear mapping. */ @@ -495,10 +485,14 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) { pgd_t *spgd; unsigned long flags; - #ifdef CONFIG_X86_PAE pmd_t *spmd; #endif + + /* You can't put your stack in the Switcher! */ + if (vaddr >= switcher_addr) + return false; + /* Look at the current top level entry: is it present? */ spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) @@ -897,6 +891,12 @@ static void do_set_pte(struct lg_cpu *cpu, int idx, void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir, unsigned long vaddr, pte_t gpte) { + /* We don't let you remap the Switcher; we need it to get back! */ + if (vaddr >= switcher_addr) { + kill_guest(cpu, "attempt to set pte into Switcher pages"); + return; + } + /* * Kernel mappings must be changed on all top levels. Slow, but doesn't * happen often. @@ -995,12 +995,7 @@ void page_table_guest_data_init(struct lg_cpu *cpu) * "pgd_index(lg->kernel_address)". This assumes it won't hit the * Switcher mappings, so check that now. */ -#ifdef CONFIG_X86_PAE - if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX && - pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX) -#else - if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX) -#endif + if (cpu->lg->kernel_address >= switcher_addr) kill_guest(cpu, "bad kernel address %#lx", cpu->lg->kernel_address); } -- cgit v1.2.1 From c215a8b9eb17739c01d59faa7db9d1ef162a82a8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:37 +0930 Subject: lguest: remove RESERVE_MEM constant. We can use switcher_addr directly. Signed-off-by: Rusty Russell --- drivers/lguest/page_tables.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index a2454a24a10c..27cbb186a911 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -63,10 +63,8 @@ */ #ifdef CONFIG_X86_PAE #define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1) -#define RESERVE_MEM 2U #define CHECK_GPGD_MASK _PAGE_PRESENT #else -#define RESERVE_MEM 4U #define CHECK_GPGD_MASK _PAGE_TABLE #endif @@ -977,15 +975,21 @@ int init_guest_pagetable(struct lguest *lg) /*H:508 When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ void page_table_guest_data_init(struct lg_cpu *cpu) { + /* + * We tell the Guest that it can't use the virtual addresses + * used by the Switcher. This trick is equivalent to 4GB - + * switcher_addr. + */ + u32 top = ~switcher_addr + 1; + /* We get the kernel address: above this is all kernel memory. */ if (get_user(cpu->lg->kernel_address, - &cpu->lg->lguest_data->kernel_address) + &cpu->lg->lguest_data->kernel_address) /* - * We tell the Guest that it can't use the top 2 or 4 MB - * of virtual addresses used by the Switcher. + * We tell the Guest that it can't use the top virtual + * addresses (used by the Switcher). */ - || put_user(RESERVE_MEM * 1024 * 1024, - &cpu->lg->lguest_data->reserve_mem)) { + || put_user(top, &cpu->lg->lguest_data->reserve_mem)) { kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); return; } -- cgit v1.2.1 From 856c608827928d29f80605e85fc3f8f0ab3af4fb Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:38 +0930 Subject: lguest: rename switcher_page to switcher_pages. There is a single page with the Switcher in it, but it's followed by 2 pages per Host CPU. Signed-off-by: Rusty Russell --- drivers/lguest/core.c | 24 ++++++++++++------------ drivers/lguest/lg.h | 2 +- drivers/lguest/page_tables.c | 12 ++++++------ 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 099252301132..211d8267992b 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -22,7 +22,7 @@ unsigned long switcher_addr; static struct vm_struct *switcher_vma; -static struct page **switcher_page; +static struct page **switcher_pages; /* This One Big lock protects all inter-guest data structures. */ DEFINE_MUTEX(lguest_lock); @@ -56,9 +56,9 @@ static __init int map_switcher(void) * We allocate an array of struct page pointers. map_vm_area() wants * this, rather than just an array of pages. */ - switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, - GFP_KERNEL); - if (!switcher_page) { + switcher_pages = kmalloc(sizeof(switcher_pages[0])*TOTAL_SWITCHER_PAGES, + GFP_KERNEL); + if (!switcher_pages) { err = -ENOMEM; goto out; } @@ -68,8 +68,8 @@ static __init int map_switcher(void) * so we make sure they're zeroed. */ for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { - switcher_page[i] = alloc_page(GFP_KERNEL|__GFP_ZERO); - if (!switcher_page[i]) { + switcher_pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!switcher_pages[i]) { err = -ENOMEM; goto free_some_pages; } @@ -110,7 +110,7 @@ static __init int map_switcher(void) * array of struct pages. It increments that pointer, but we don't * care. */ - pagep = switcher_page; + pagep = switcher_pages; err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); if (err) { printk("lguest: map_vm_area failed: %i\n", err); @@ -135,8 +135,8 @@ free_pages: i = TOTAL_SWITCHER_PAGES; free_some_pages: for (--i; i >= 0; i--) - __free_pages(switcher_page[i], 0); - kfree(switcher_page); + __free_pages(switcher_pages[i], 0); + kfree(switcher_pages); out: return err; } @@ -151,8 +151,8 @@ static void unmap_switcher(void) vunmap(switcher_vma->addr); /* Now we just need to free the pages we copied the switcher into */ for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) - __free_pages(switcher_page[i], 0); - kfree(switcher_page); + __free_pages(switcher_pages[i], 0); + kfree(switcher_pages); } /*H:032 @@ -326,7 +326,7 @@ static int __init init(void) goto out; /* Now we set up the pagetable implementation for the Guests. */ - err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES); + err = init_pagetables(switcher_pages, SHARED_SWITCHER_PAGES); if (err) goto unmap; diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 295df06e6590..8bf68c54ff7f 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -15,7 +15,7 @@ #include void free_pagetables(void); -int init_pagetables(struct page **switcher_page, unsigned int pages); +int init_pagetables(struct page **switcher_pages, unsigned int pages); struct pgdir { unsigned long gpgdir; diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 27cbb186a911..21685580eb9f 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -1084,7 +1084,7 @@ static void free_switcher_pte_pages(void) * Currently the Switcher is less than a page long, so "pages" is always 1. */ static __init void populate_switcher_pte_page(unsigned int cpu, - struct page *switcher_page[], + struct page *switcher_pages[], unsigned int pages) { unsigned int i; @@ -1092,7 +1092,7 @@ static __init void populate_switcher_pte_page(unsigned int cpu, /* The first entries are easy: they map the Switcher code. */ for (i = 0; i < pages; i++) { - set_pte(&pte[i], mk_pte(switcher_page[i], + set_pte(&pte[i], mk_pte(switcher_pages[i], __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); } @@ -1100,14 +1100,14 @@ static __init void populate_switcher_pte_page(unsigned int cpu, i = pages + cpu*2; /* First page (Guest registers) is writable from the Guest */ - set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]), + set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_pages[i]), __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); /* * The second page contains the "struct lguest_ro_state", and is * read-only. */ - set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]), + set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_pages[i+1]), __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); } @@ -1128,7 +1128,7 @@ static __init void populate_switcher_pte_page(unsigned int cpu, * At boot or module load time, init_pagetables() allocates and populates * the Switcher PTE page for each CPU. */ -__init int init_pagetables(struct page **switcher_page, unsigned int pages) +__init int init_pagetables(struct page **switcher_pages, unsigned int pages) { unsigned int i; @@ -1138,7 +1138,7 @@ __init int init_pagetables(struct page **switcher_page, unsigned int pages) free_switcher_pte_pages(); return -ENOMEM; } - populate_switcher_pte_page(i, switcher_page, pages); + populate_switcher_pte_page(i, switcher_pages, pages); } return 0; } -- cgit v1.2.1 From 93a2cdff98243df06bafd3c4f3b31b38f0d0fe3e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:38 +0930 Subject: lguest: assume Switcher text is a single page. ie. SHARED_SWITCHER_PAGES == 1. It is well under a page, and it's a minor simplification: it's nice to have *one* simplification in a patch series! Signed-off-by: Rusty Russell --- drivers/lguest/core.c | 9 ++++++++- drivers/lguest/lg.h | 2 +- drivers/lguest/page_tables.c | 21 ++++++++------------- drivers/lguest/x86/core.c | 5 ++--- 4 files changed, 19 insertions(+), 18 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 211d8267992b..4209065b9b1e 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -52,6 +52,13 @@ static __init int map_switcher(void) * easy. */ + /* We assume Switcher text fits into a single page. */ + if (end_switcher_text - start_switcher_text > PAGE_SIZE) { + printk(KERN_ERR "lguest: switcher text too large (%zu)\n", + end_switcher_text - start_switcher_text); + return -EINVAL; + } + /* * We allocate an array of struct page pointers. map_vm_area() wants * this, rather than just an array of pages. @@ -326,7 +333,7 @@ static int __init init(void) goto out; /* Now we set up the pagetable implementation for the Guests. */ - err = init_pagetables(switcher_pages, SHARED_SWITCHER_PAGES); + err = init_pagetables(switcher_pages); if (err) goto unmap; diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 8bf68c54ff7f..4c3e532d50d6 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -15,7 +15,7 @@ #include void free_pagetables(void); -int init_pagetables(struct page **switcher_pages, unsigned int pages); +int init_pagetables(struct page **switcher_pages); struct pgdir { unsigned long gpgdir; diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 21685580eb9f..758466299b0d 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -1079,25 +1079,20 @@ static void free_switcher_pte_pages(void) /*H:520 * Setting up the Switcher PTE page for given CPU is fairly easy, given - * the CPU number and the "struct page"s for the Switcher code itself. - * - * Currently the Switcher is less than a page long, so "pages" is always 1. + * the CPU number and the "struct page"s for the Switcher and per-cpu pages. */ static __init void populate_switcher_pte_page(unsigned int cpu, - struct page *switcher_pages[], - unsigned int pages) + struct page *switcher_pages[]) { - unsigned int i; pte_t *pte = switcher_pte_page(cpu); + int i; - /* The first entries are easy: they map the Switcher code. */ - for (i = 0; i < pages; i++) { - set_pte(&pte[i], mk_pte(switcher_pages[i], + /* The first entries maps the Switcher code. */ + set_pte(&pte[0], mk_pte(switcher_pages[0], __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); - } /* The only other thing we map is this CPU's pair of pages. */ - i = pages + cpu*2; + i = 1 + cpu*2; /* First page (Guest registers) is writable from the Guest */ set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_pages[i]), @@ -1128,7 +1123,7 @@ static __init void populate_switcher_pte_page(unsigned int cpu, * At boot or module load time, init_pagetables() allocates and populates * the Switcher PTE page for each CPU. */ -__init int init_pagetables(struct page **switcher_pages, unsigned int pages) +__init int init_pagetables(struct page **switcher_pages) { unsigned int i; @@ -1138,7 +1133,7 @@ __init int init_pagetables(struct page **switcher_pages, unsigned int pages) free_switcher_pte_pages(); return -ENOMEM; } - populate_switcher_pte_page(i, switcher_pages, pages); + populate_switcher_pte_page(i, switcher_pages); } return 0; } diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 20fae765d600..f0a3347b6441 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -62,11 +62,10 @@ static unsigned long switcher_offset(void) return switcher_addr - (unsigned long)start_switcher_text; } -/* This cpu's struct lguest_pages. */ +/* This cpu's struct lguest_pages (after the Switcher text page) */ static struct lguest_pages *lguest_pages(unsigned int cpu) { - return &(((struct lguest_pages *) - (switcher_addr + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); + return &(((struct lguest_pages *)(switcher_addr + PAGE_SIZE))[cpu]); } static DEFINE_PER_CPU(struct lg_cpu *, lg_last_cpu); -- cgit v1.2.1 From e1d12606f756bdb8328a66a2873dca6c46bcb4e5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:39 +0930 Subject: lguest: make check_gpte et. al return bool. This is a bit neater: we can immediately return if a PTE/PGD/PMD entry is invalid (which also kills the guest). It means we don't risk using invalid entries as we reshuffle the code. Signed-off-by: Rusty Russell --- drivers/lguest/page_tables.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 758466299b0d..f074f34acb86 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -259,26 +259,35 @@ static void release_pte(pte_t pte) } /*:*/ -static void check_gpte(struct lg_cpu *cpu, pte_t gpte) +static bool check_gpte(struct lg_cpu *cpu, pte_t gpte) { if ((pte_flags(gpte) & _PAGE_PSE) || - pte_pfn(gpte) >= cpu->lg->pfn_limit) + pte_pfn(gpte) >= cpu->lg->pfn_limit) { kill_guest(cpu, "bad page table entry"); + return false; + } + return true; } -static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd) +static bool check_gpgd(struct lg_cpu *cpu, pgd_t gpgd) { if ((pgd_flags(gpgd) & ~CHECK_GPGD_MASK) || - (pgd_pfn(gpgd) >= cpu->lg->pfn_limit)) + (pgd_pfn(gpgd) >= cpu->lg->pfn_limit)) { kill_guest(cpu, "bad page directory entry"); + return false; + } + return true; } #ifdef CONFIG_X86_PAE -static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd) +static bool check_gpmd(struct lg_cpu *cpu, pmd_t gpmd) { if ((pmd_flags(gpmd) & ~_PAGE_TABLE) || - (pmd_pfn(gpmd) >= cpu->lg->pfn_limit)) + (pmd_pfn(gpmd) >= cpu->lg->pfn_limit)) { kill_guest(cpu, "bad page middle directory entry"); + return false; + } + return true; } #endif @@ -336,7 +345,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) return false; } /* We check that the Guest pgd is OK. */ - check_gpgd(cpu, gpgd); + if (!check_gpgd(cpu, gpgd)) + return false; /* * And we copy the flags to the shadow PGD entry. The page * number in the shadow PGD is the page we just allocated. @@ -372,7 +382,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) } /* We check that the Guest pmd is OK. */ - check_gpmd(cpu, gpmd); + if (!check_gpmd(cpu, gpmd)) + return false; /* * And we copy the flags to the shadow PMD entry. The page @@ -421,7 +432,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) * Check that the Guest PTE flags are OK, and the page number is below * the pfn_limit (ie. not mapping the Launcher binary). */ - check_gpte(cpu, gpte); + if (!check_gpte(cpu, gpte)) + return false; /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ gpte = pte_mkyoung(gpte); @@ -857,7 +869,8 @@ static void do_set_pte(struct lg_cpu *cpu, int idx, * micro-benchmark. */ if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { - check_gpte(cpu, gpte); + if (!check_gpte(cpu, gpte)) + return; set_pte(spte, gpte_to_spte(cpu, gpte, pte_flags(gpte) & _PAGE_DIRTY)); -- cgit v1.2.1 From 17427e08faae3e63271a9c2d0edb6a22e5fbb54b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:39 +0930 Subject: lguest: extract shadow PTE walking / allocating. We want a separate find_pte() function so we can call it for populating the switcher PTE entries. We can also use it in page_writable(). Signed-off-by: Rusty Russell --- drivers/lguest/page_tables.c | 170 +++++++++++++++++++++++++------------------ 1 file changed, 101 insertions(+), 69 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index f074f34acb86..009c717fda99 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -291,6 +291,88 @@ static bool check_gpmd(struct lg_cpu *cpu, pmd_t gpmd) } #endif +/*H:331 + * This is the core routine to walk the shadow page tables and find the page + * table entry for a specific address. + * + * If allocate is set, then we allocate any missing levels, setting the flags + * on the new page directory and mid-level directories using the arguments + * (which are copied from the Guest's page table entries). + */ +static pte_t *find_spte(struct lg_cpu *cpu, unsigned long vaddr, bool allocate, + int pgd_flags, int pmd_flags) +{ + pgd_t *spgd; + /* Mid level for PAE. */ +#ifdef CONFIG_X86_PAE + pmd_t *spmd; +#endif + + /* Get top level entry. */ + spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); + if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { + /* No shadow entry: allocate a new shadow PTE page. */ + unsigned long ptepage; + + /* If they didn't want us to allocate anything, stop. */ + if (!allocate) + return NULL; + + ptepage = get_zeroed_page(GFP_KERNEL); + /* + * This is not really the Guest's fault, but killing it is + * simple for this corner case. + */ + if (!ptepage) { + kill_guest(cpu, "out of memory allocating pte page"); + return NULL; + } + /* + * And we copy the flags to the shadow PGD entry. The page + * number in the shadow PGD is the page we just allocated. + */ + set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags)); + } + + /* + * Intel's Physical Address Extension actually uses three levels of + * page tables, so we need to look in the mid-level. + */ +#ifdef CONFIG_X86_PAE + /* Now look at the mid-level shadow entry. */ + spmd = spmd_addr(cpu, *spgd, vaddr); + + if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) { + /* No shadow entry: allocate a new shadow PTE page. */ + unsigned long ptepage; + + /* If they didn't want us to allocate anything, stop. */ + if (!allocate) + return NULL; + + ptepage = get_zeroed_page(GFP_KERNEL); + + /* + * This is not really the Guest's fault, but killing it is + * simple for this corner case. + */ + if (!ptepage) { + kill_guest(cpu, "out of memory allocating pmd page"); + return NULL; + } + + /* + * And we copy the flags to the shadow PMD entry. The page + * number in the shadow PMD is the page we just allocated. + */ + set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags)); + } +#endif + + /* Get the pointer to the shadow PTE entry we're going to set. */ + return spte_addr(cpu, *spgd, vaddr); +} + /*H:330 * (i) Looking up a page table entry when the Guest faults. * @@ -304,17 +386,11 @@ static bool check_gpmd(struct lg_cpu *cpu, pmd_t gpmd) */ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) { - pgd_t gpgd; - pgd_t *spgd; unsigned long gpte_ptr; pte_t gpte; pte_t *spte; - - /* Mid level for PAE. */ -#ifdef CONFIG_X86_PAE - pmd_t *spmd; pmd_t gpmd; -#endif + pgd_t gpgd; /* We never demand page the Switcher, so trying is a mistake. */ if (vaddr >= switcher_addr) @@ -329,67 +405,31 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) /* Toplevel not present? We can't map it in. */ if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) return false; - } - /* Now look at the matching shadow entry. */ - spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); - if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { - /* No shadow entry: allocate a new shadow PTE page. */ - unsigned long ptepage = get_zeroed_page(GFP_KERNEL); - /* - * This is not really the Guest's fault, but killing it is - * simple for this corner case. + /* + * This kills the Guest if it has weird flags or tries to + * refer to a "physical" address outside the bounds. */ - if (!ptepage) { - kill_guest(cpu, "out of memory allocating pte page"); - return false; - } - /* We check that the Guest pgd is OK. */ if (!check_gpgd(cpu, gpgd)) return false; - /* - * And we copy the flags to the shadow PGD entry. The page - * number in the shadow PGD is the page we just allocated. - */ - set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd))); } + /* This "mid-level" entry is only used for non-linear, PAE mode. */ + gpmd = __pmd(_PAGE_TABLE); + #ifdef CONFIG_X86_PAE - if (unlikely(cpu->linear_pages)) { - /* Faking up a linear mapping. */ - gpmd = __pmd(_PAGE_TABLE); - } else { + if (likely(!cpu->linear_pages)) { gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t); /* Middle level not present? We can't map it in. */ if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) return false; - } - - /* Now look at the matching shadow entry. */ - spmd = spmd_addr(cpu, *spgd, vaddr); - - if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) { - /* No shadow entry: allocate a new shadow PTE page. */ - unsigned long ptepage = get_zeroed_page(GFP_KERNEL); - /* - * This is not really the Guest's fault, but killing it is - * simple for this corner case. + /* + * This kills the Guest if it has weird flags or tries to + * refer to a "physical" address outside the bounds. */ - if (!ptepage) { - kill_guest(cpu, "out of memory allocating pte page"); - return false; - } - - /* We check that the Guest pmd is OK. */ if (!check_gpmd(cpu, gpmd)) return false; - - /* - * And we copy the flags to the shadow PMD entry. The page - * number in the shadow PMD is the page we just allocated. - */ - set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd))); } /* @@ -441,7 +481,9 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) gpte = pte_mkdirty(gpte); /* Get the pointer to the shadow PTE entry we're going to set. */ - spte = spte_addr(cpu, *spgd, vaddr); + spte = find_spte(cpu, vaddr, true, pgd_flags(gpgd), pmd_flags(gpmd)); + if (!spte) + return false; /* * If there was a valid shadow PTE entry here before, we release it. @@ -493,33 +535,23 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) */ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) { - pgd_t *spgd; + pte_t *spte; unsigned long flags; -#ifdef CONFIG_X86_PAE - pmd_t *spmd; -#endif /* You can't put your stack in the Switcher! */ if (vaddr >= switcher_addr) return false; - /* Look at the current top level entry: is it present? */ - spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); - if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) + /* If there's no shadow PTE, it's not writable. */ + spte = find_spte(cpu, vaddr, false, 0, 0); + if (!spte) return false; -#ifdef CONFIG_X86_PAE - spmd = spmd_addr(cpu, *spgd, vaddr); - if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) - return false; -#endif - /* * Check the flags on the pte entry itself: it must be present and * writable. */ - flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr))); - + flags = pte_flags(*spte); return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); } -- cgit v1.2.1 From f1f394b1c33d93416c90f97e201d4d386c04af55 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:40 +0930 Subject: lguest: expost switcher_pages array (as lg_switcher_pages). We will need this in page_table.c soon. Signed-off-by: Rusty Russell --- drivers/lguest/core.c | 25 +++++++++++++------------ drivers/lguest/lg.h | 1 + 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 4209065b9b1e..b6c71c32308c 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -21,8 +21,8 @@ #include "lg.h" unsigned long switcher_addr; +struct page **lg_switcher_pages; static struct vm_struct *switcher_vma; -static struct page **switcher_pages; /* This One Big lock protects all inter-guest data structures. */ DEFINE_MUTEX(lguest_lock); @@ -63,9 +63,10 @@ static __init int map_switcher(void) * We allocate an array of struct page pointers. map_vm_area() wants * this, rather than just an array of pages. */ - switcher_pages = kmalloc(sizeof(switcher_pages[0])*TOTAL_SWITCHER_PAGES, - GFP_KERNEL); - if (!switcher_pages) { + lg_switcher_pages = kmalloc(sizeof(lg_switcher_pages[0]) + * TOTAL_SWITCHER_PAGES, + GFP_KERNEL); + if (!lg_switcher_pages) { err = -ENOMEM; goto out; } @@ -75,8 +76,8 @@ static __init int map_switcher(void) * so we make sure they're zeroed. */ for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { - switcher_pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO); - if (!switcher_pages[i]) { + lg_switcher_pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!lg_switcher_pages[i]) { err = -ENOMEM; goto free_some_pages; } @@ -117,7 +118,7 @@ static __init int map_switcher(void) * array of struct pages. It increments that pointer, but we don't * care. */ - pagep = switcher_pages; + pagep = lg_switcher_pages; err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); if (err) { printk("lguest: map_vm_area failed: %i\n", err); @@ -142,8 +143,8 @@ free_pages: i = TOTAL_SWITCHER_PAGES; free_some_pages: for (--i; i >= 0; i--) - __free_pages(switcher_pages[i], 0); - kfree(switcher_pages); + __free_pages(lg_switcher_pages[i], 0); + kfree(lg_switcher_pages); out: return err; } @@ -158,8 +159,8 @@ static void unmap_switcher(void) vunmap(switcher_vma->addr); /* Now we just need to free the pages we copied the switcher into */ for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) - __free_pages(switcher_pages[i], 0); - kfree(switcher_pages); + __free_pages(lg_switcher_pages[i], 0); + kfree(lg_switcher_pages); } /*H:032 @@ -333,7 +334,7 @@ static int __init init(void) goto out; /* Now we set up the pagetable implementation for the Guests. */ - err = init_pagetables(switcher_pages); + err = init_pagetables(lg_switcher_pages); if (err) goto unmap; diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 4c3e532d50d6..9a345efa83e4 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -124,6 +124,7 @@ bool lguest_address_ok(const struct lguest *lg, unsigned long addr, unsigned long len); void __lgread(struct lg_cpu *, void *, unsigned long, unsigned); void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned); +extern struct page **lg_switcher_pages; /*H:035 * Using memory-copy operations like that is usually inconvient, so we -- cgit v1.2.1 From 3412b6ae2924e068f9932f841bdea0f2d8424502 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:40 +0930 Subject: lguest: don't share Switcher PTE pages between guests. We currently use the whole top PGD entry for the switcher, so we simply share a fixed page of PTEs between all guests (actually, it's one per Host CPU, to ensure isolation between guests). Changes to a scheme where every guest has its own mappings. Signed-off-by: Rusty Russell --- drivers/lguest/core.c | 10 +- drivers/lguest/lg.h | 3 - drivers/lguest/page_tables.c | 260 ++++++++++++++++++------------------------- 3 files changed, 107 insertions(+), 166 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index b6c71c32308c..7e1d7ee36478 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -333,15 +333,10 @@ static int __init init(void) if (err) goto out; - /* Now we set up the pagetable implementation for the Guests. */ - err = init_pagetables(lg_switcher_pages); - if (err) - goto unmap; - /* We might need to reserve an interrupt vector. */ err = init_interrupts(); if (err) - goto free_pgtables; + goto unmap; /* /dev/lguest needs to be registered. */ err = lguest_device_init(); @@ -356,8 +351,6 @@ static int __init init(void) free_interrupts: free_interrupts(); -free_pgtables: - free_pagetables(); unmap: unmap_switcher(); out: @@ -369,7 +362,6 @@ static void __exit fini(void) { lguest_device_remove(); free_interrupts(); - free_pagetables(); unmap_switcher(); lguest_arch_host_fini(); diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 9a345efa83e4..faac9fc6db22 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -14,9 +14,6 @@ #include -void free_pagetables(void); -int init_pagetables(struct page **switcher_pages); - struct pgdir { unsigned long gpgdir; pgd_t *pgdir; diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 009c717fda99..1f48f2712f3a 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -62,20 +62,11 @@ * will need the last pmd entry of the last pmd page. */ #ifdef CONFIG_X86_PAE -#define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1) #define CHECK_GPGD_MASK _PAGE_PRESENT #else #define CHECK_GPGD_MASK _PAGE_TABLE #endif -/* - * We actually need a separate PTE page for each CPU. Remember that after the - * Switcher code itself comes two pages for each CPU, and we don't want this - * CPU's guest to see the pages of any other CPU. - */ -static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); -#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) - /*H:320 * The page table code is curly enough to need helper functions to keep it * clear and clean. The kernel itself provides many of them; one advantage @@ -714,9 +705,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, int *blank_pgdir) { unsigned int next; -#ifdef CONFIG_X86_PAE - pmd_t *pmd_table; -#endif /* * We pick one entry at random to throw out. Choosing the Least @@ -731,29 +719,11 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, if (!cpu->lg->pgdirs[next].pgdir) next = cpu->cpu_pgd; else { -#ifdef CONFIG_X86_PAE /* - * In PAE mode, allocate a pmd page and populate the - * last pgd entry. + * This is a blank page, so there are no kernel + * mappings: caller must map the stack! */ - pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL); - if (!pmd_table) { - free_page((long)cpu->lg->pgdirs[next].pgdir); - set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0)); - next = cpu->cpu_pgd; - } else { - set_pgd(cpu->lg->pgdirs[next].pgdir + - SWITCHER_PGD_INDEX, - __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - /* - * This is a blank page, so there are no kernel - * mappings: caller must map the stack! - */ - *blank_pgdir = 1; - } -#else *blank_pgdir = 1; -#endif } } /* Record which Guest toplevel this shadows. */ @@ -764,6 +734,23 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, return next; } +/*H:501 + * We do need the Switcher code mapped at all times, so we allocate that + * part of the Guest page table here, and populate it when we're about to run + * the guest. + */ +static bool allocate_switcher_mapping(struct lg_cpu *cpu) +{ + int i; + + for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { + if (!find_spte(cpu, switcher_addr + i * PAGE_SIZE, true, + CHECK_GPGD_MASK, _PAGE_TABLE)) + return false; + } + return true; +} + /*H:470 * Finally, a routine which throws away everything: all PGD entries in all * the shadow page tables, including the Guest's kernel mappings. This is used @@ -774,28 +761,14 @@ static void release_all_pagetables(struct lguest *lg) unsigned int i, j; /* Every shadow pagetable this Guest has */ - for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) - if (lg->pgdirs[i].pgdir) { -#ifdef CONFIG_X86_PAE - pgd_t *spgd; - pmd_t *pmdpage; - unsigned int k; + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) { + if (!lg->pgdirs[i].pgdir) + continue; - /* Get the last pmd page. */ - spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX; - pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); - - /* - * And release the pmd entries of that pmd page, - * except for the switcher pmd. - */ - for (k = 0; k < SWITCHER_PMD_INDEX; k++) - release_pmd(&pmdpage[k]); -#endif - /* Every PGD entry except the Switcher at the top */ - for (j = 0; j < SWITCHER_PGD_INDEX; j++) - release_pgd(lg->pgdirs[i].pgdir + j); - } + /* Every PGD entry. */ + for (j = 0; j < PTRS_PER_PGD; j++) + release_pgd(lg->pgdirs[i].pgdir + j); + } } /* @@ -809,6 +782,9 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu) release_all_pagetables(cpu->lg); /* We need the Guest kernel stack mapped again. */ pin_stack_pages(cpu); + /* And we need Switcher allocated. */ + if (!allocate_switcher_mapping(cpu)) + kill_guest(cpu, "Cannot populate switcher mapping"); } /*H:430 @@ -844,9 +820,15 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) newpgdir = new_pgdir(cpu, pgtable, &repin); /* Change the current pgd index to the new one. */ cpu->cpu_pgd = newpgdir; - /* If it was completely blank, we map in the Guest kernel stack */ + /* + * If it was completely blank, we map in the Guest kernel stack and + * the Switcher. + */ if (repin) pin_stack_pages(cpu); + + if (!allocate_switcher_mapping(cpu)) + kill_guest(cpu, "Cannot populate switcher mapping"); } /*:*/ @@ -976,14 +958,23 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx) { int pgdir; - if (idx >= SWITCHER_PGD_INDEX) + if (idx > PTRS_PER_PGD) { + kill_guest(&lg->cpus[0], "Attempt to set pgd %u/%u", + idx, PTRS_PER_PGD); return; + } /* If they're talking about a page table we have a shadow for... */ pgdir = find_pgdir(lg, gpgdir); - if (pgdir < ARRAY_SIZE(lg->pgdirs)) + if (pgdir < ARRAY_SIZE(lg->pgdirs)) { /* ... throw it away. */ release_pgd(lg->pgdirs[pgdir].pgdir + idx); + /* That might have been the Switcher mapping, remap it. */ + if (!allocate_switcher_mapping(&lg->cpus[0])) { + kill_guest(&lg->cpus[0], + "Cannot populate switcher mapping"); + } + } } #ifdef CONFIG_X86_PAE @@ -1001,6 +992,9 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx) * we will populate on future faults. The Guest doesn't have any actual * pagetables yet, so we set linear_pages to tell demand_page() to fake it * for the moment. + * + * We do need the Switcher to be mapped at all times, so we allocate that + * part of the Guest page table here. */ int init_guest_pagetable(struct lguest *lg) { @@ -1014,6 +1008,13 @@ int init_guest_pagetable(struct lguest *lg) /* We start with a linear mapping until the initialize. */ cpu->linear_pages = true; + + /* Allocate the page tables for the Switcher. */ + if (!allocate_switcher_mapping(cpu)) { + release_all_pagetables(lg); + return -ENOMEM; + } + return 0; } @@ -1065,91 +1066,68 @@ void free_guest_pagetable(struct lguest *lg) * (vi) Mapping the Switcher when the Guest is about to run. * * The Switcher and the two pages for this CPU need to be visible in the - * Guest (and not the pages for other CPUs). We have the appropriate PTE pages - * for each CPU already set up, we just need to hook them in now we know which - * Guest is about to run on this CPU. + * Guest (and not the pages for other CPUs). + * + * The pages have all been allocate */ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) { - pte_t *switcher_pte_page = __this_cpu_read(switcher_pte_pages); - pte_t regs_pte; + unsigned long base, i; + struct page *percpu_switcher_page, *regs_page; + pte_t *pte; -#ifdef CONFIG_X86_PAE - pmd_t switcher_pmd; - pmd_t *pmd_table; - - switcher_pmd = pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT, - PAGE_KERNEL_EXEC); - - /* Figure out where the pmd page is, by reading the PGD, and converting - * it to a virtual address. */ - pmd_table = __va(pgd_pfn(cpu->lg-> - pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX]) - << PAGE_SHIFT); - /* Now write it into the shadow page table. */ - set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd); -#else - pgd_t switcher_pgd; + /* Code page should always be mapped, and executable. */ + pte = find_spte(cpu, switcher_addr, false, 0, 0); + get_page(lg_switcher_pages[0]); + set_pte(pte, mk_pte(lg_switcher_pages[0], PAGE_KERNEL_RX)); - /* - * Make the last PGD entry for this Guest point to the Switcher's PTE - * page for this CPU (with appropriate flags). - */ - switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC); + /* Clear all the Switcher mappings for any other CPUs. */ + /* FIXME: This is dumb: update only when Host CPU changes. */ + for_each_possible_cpu(i) { + /* Get location of lguest_pages (indexed by Host CPU) */ + base = switcher_addr + PAGE_SIZE + + i * sizeof(struct lguest_pages); - cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; + /* Get shadow PTE for first page (where we put guest regs). */ + pte = find_spte(cpu, base, false, 0, 0); + set_pte(pte, __pte(0)); + + /* This is where we put R/O state. */ + pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0); + set_pte(pte, __pte(0)); + } -#endif /* - * We also change the Switcher PTE page. When we're running the Guest, - * we want the Guest's "regs" page to appear where the first Switcher - * page for this CPU is. This is an optimization: when the Switcher - * saves the Guest registers, it saves them into the first page of this - * CPU's "struct lguest_pages": if we make sure the Guest's register - * page is already mapped there, we don't have to copy them out - * again. + * When we're running the Guest, we want the Guest's "regs" page to + * appear where the first Switcher page for this CPU is. This is an + * optimization: when the Switcher saves the Guest registers, it saves + * them into the first page of this CPU's "struct lguest_pages": if we + * make sure the Guest's register page is already mapped there, we + * don't have to copy them out again. */ - regs_pte = pfn_pte(__pa(cpu->regs_page) >> PAGE_SHIFT, PAGE_KERNEL); - set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], regs_pte); -} -/*:*/ - -static void free_switcher_pte_pages(void) -{ - unsigned int i; - - for_each_possible_cpu(i) - free_page((long)switcher_pte_page(i)); -} - -/*H:520 - * Setting up the Switcher PTE page for given CPU is fairly easy, given - * the CPU number and the "struct page"s for the Switcher and per-cpu pages. - */ -static __init void populate_switcher_pte_page(unsigned int cpu, - struct page *switcher_pages[]) -{ - pte_t *pte = switcher_pte_page(cpu); - int i; - - /* The first entries maps the Switcher code. */ - set_pte(&pte[0], mk_pte(switcher_pages[0], - __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); - - /* The only other thing we map is this CPU's pair of pages. */ - i = 1 + cpu*2; - - /* First page (Guest registers) is writable from the Guest */ - set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_pages[i]), - __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); + /* Find the shadow PTE for this regs page. */ + base = switcher_addr + PAGE_SIZE + + raw_smp_processor_id() * sizeof(struct lguest_pages); + pte = find_spte(cpu, base, false, 0, 0); + regs_page = pfn_to_page(__pa(cpu->regs_page) >> PAGE_SHIFT); + get_page(regs_page); + set_pte(pte, mk_pte(regs_page, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL))); /* - * The second page contains the "struct lguest_ro_state", and is - * read-only. + * We map the second page of the struct lguest_pages read-only in + * the Guest: the IDT, GDT and other things it's not supposed to + * change. */ - set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_pages[i+1]), - __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); + base += PAGE_SIZE; + pte = find_spte(cpu, base, false, 0, 0); + + percpu_switcher_page + = lg_switcher_pages[1 + raw_smp_processor_id()*2 + 1]; + get_page(percpu_switcher_page); + set_pte(pte, mk_pte(percpu_switcher_page, + __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL))); } +/*:*/ /* * We've made it through the page table code. Perhaps our tired brains are @@ -1163,29 +1141,3 @@ static __init void populate_switcher_pte_page(unsigned int cpu, * * There is just one file remaining in the Host. */ - -/*H:510 - * At boot or module load time, init_pagetables() allocates and populates - * the Switcher PTE page for each CPU. - */ -__init int init_pagetables(struct page **switcher_pages) -{ - unsigned int i; - - for_each_possible_cpu(i) { - switcher_pte_page(i) = (pte_t *)get_zeroed_page(GFP_KERNEL); - if (!switcher_pte_page(i)) { - free_switcher_pte_pages(); - return -ENOMEM; - } - populate_switcher_pte_page(i, switcher_pages); - } - return 0; -} -/*:*/ - -/* Cleaning up simply involves freeing the PTE page for each CPU. */ -void free_pagetables(void) -{ - free_switcher_pte_pages(); -} -- cgit v1.2.1 From 86935fc4ee4d95efe01b6c91cd5143fa4c38c02b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:41 +0930 Subject: lguest: map Switcher text whenever we allocate a new pagetable. It's always to same, so no need to put in the PTE every time we're about to run. Keep a flag to track whether the pagetable has the Switcher entries allocated, and when allocating always initialize the Switcher text PTE. Signed-off-by: Rusty Russell --- drivers/lguest/lg.h | 1 + drivers/lguest/page_tables.c | 42 ++++++++++++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 10 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index faac9fc6db22..005929a3fd52 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -16,6 +16,7 @@ struct pgdir { unsigned long gpgdir; + bool switcher_mapped; pgd_t *pgdir; }; diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 1f48f2712f3a..d1a5de45be02 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -736,18 +736,39 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, /*H:501 * We do need the Switcher code mapped at all times, so we allocate that - * part of the Guest page table here, and populate it when we're about to run - * the guest. + * part of the Guest page table here. We map the Switcher code immediately, + * but defer mapping of the guest register page and IDT/LDT etc page until + * just before we run the guest in map_switcher_in_guest(). + * + * We *could* do this setup in map_switcher_in_guest(), but at that point + * we've interrupts disabled, and allocating pages like that is fraught: we + * can't sleep if we need to free up some memory. */ static bool allocate_switcher_mapping(struct lg_cpu *cpu) { int i; for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { - if (!find_spte(cpu, switcher_addr + i * PAGE_SIZE, true, - CHECK_GPGD_MASK, _PAGE_TABLE)) + pte_t *pte = find_spte(cpu, switcher_addr + i * PAGE_SIZE, true, + CHECK_GPGD_MASK, _PAGE_TABLE); + if (!pte) return false; + + /* + * Map the switcher page if not already there. It might + * already be there because we call allocate_switcher_mapping() + * in guest_set_pgd() just in case it did discard our Switcher + * mapping, but it probably didn't. + */ + if (i == 0 && !(pte_flags(*pte) & _PAGE_PRESENT)) { + /* Get a reference to the Switcher page. */ + get_page(lg_switcher_pages[0]); + /* Create a read-only, exectuable, kernel-style PTE */ + set_pte(pte, + mk_pte(lg_switcher_pages[0], PAGE_KERNEL_RX)); + } } + cpu->lg->pgdirs[cpu->cpu_pgd].switcher_mapped = true; return true; } @@ -768,6 +789,7 @@ static void release_all_pagetables(struct lguest *lg) /* Every PGD entry. */ for (j = 0; j < PTRS_PER_PGD; j++) release_pgd(lg->pgdirs[i].pgdir + j); + lg->pgdirs[i].switcher_mapped = false; } } @@ -827,8 +849,10 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) if (repin) pin_stack_pages(cpu); - if (!allocate_switcher_mapping(cpu)) - kill_guest(cpu, "Cannot populate switcher mapping"); + if (!cpu->lg->pgdirs[cpu->cpu_pgd].switcher_mapped) { + if (!allocate_switcher_mapping(cpu)) + kill_guest(cpu, "Cannot populate switcher mapping"); + } } /*:*/ @@ -1076,10 +1100,8 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) struct page *percpu_switcher_page, *regs_page; pte_t *pte; - /* Code page should always be mapped, and executable. */ - pte = find_spte(cpu, switcher_addr, false, 0, 0); - get_page(lg_switcher_pages[0]); - set_pte(pte, mk_pte(lg_switcher_pages[0], PAGE_KERNEL_RX)); + /* Switcher page should always be mapped! */ + BUG_ON(!cpu->lg->pgdirs[cpu->cpu_pgd].switcher_mapped); /* Clear all the Switcher mappings for any other CPUs. */ /* FIXME: This is dumb: update only when Host CPU changes. */ -- cgit v1.2.1 From 6d0cda93c0d3c8bb0a553047c10f114c88c8af89 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:41 +0930 Subject: lguest: cache last cpu we ran on. This optimizes the frobbing of our Switcher map. Signed-off-by: Rusty Russell --- drivers/lguest/lg.h | 1 + drivers/lguest/page_tables.c | 78 ++++++++++++++++++++++++++++++-------------- 2 files changed, 54 insertions(+), 25 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 005929a3fd52..2eef40be4c04 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -17,6 +17,7 @@ struct pgdir { unsigned long gpgdir; bool switcher_mapped; + int last_host_cpu; pgd_t *pgdir; }; diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index d1a5de45be02..19611b0551cd 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -7,7 +7,7 @@ * converted Guest pages when running the Guest. :*/ -/* Copyright (C) Rusty Russell IBM Corporation 2006. +/* Copyright (C) Rusty Russell IBM Corporation 2013. * GPL v2 and any later version */ #include #include @@ -731,6 +731,9 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, /* Release all the non-kernel mappings. */ flush_user_mappings(cpu->lg, next); + /* This hasn't run on any CPU at all. */ + cpu->lg->pgdirs[next].last_host_cpu = -1; + return next; } @@ -790,6 +793,7 @@ static void release_all_pagetables(struct lguest *lg) for (j = 0; j < PTRS_PER_PGD; j++) release_pgd(lg->pgdirs[i].pgdir + j); lg->pgdirs[i].switcher_mapped = false; + lg->pgdirs[i].last_host_cpu = -1; } } @@ -1086,37 +1090,62 @@ void free_guest_pagetable(struct lguest *lg) free_page((long)lg->pgdirs[i].pgdir); } +/*H:481 + * This clears the Switcher mappings for cpu #i. + */ +static void remove_switcher_percpu_map(struct lg_cpu *cpu, unsigned int i) +{ + unsigned long base = switcher_addr + PAGE_SIZE + i * PAGE_SIZE*2; + pte_t *pte; + + /* Clear the mappings for both pages. */ + pte = find_spte(cpu, base, false, 0, 0); + release_pte(*pte); + set_pte(pte, __pte(0)); + + pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0); + release_pte(*pte); + set_pte(pte, __pte(0)); +} + /*H:480 * (vi) Mapping the Switcher when the Guest is about to run. * - * The Switcher and the two pages for this CPU need to be visible in the - * Guest (and not the pages for other CPUs). + * The Switcher and the two pages for this CPU need to be visible in the Guest + * (and not the pages for other CPUs). * - * The pages have all been allocate + * The pages for the pagetables have all been allocated before: we just need + * to make sure the actual PTEs are up-to-date for the CPU we're about to run + * on. */ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) { - unsigned long base, i; + unsigned long base; struct page *percpu_switcher_page, *regs_page; pte_t *pte; + struct pgdir *pgdir = &cpu->lg->pgdirs[cpu->cpu_pgd]; - /* Switcher page should always be mapped! */ - BUG_ON(!cpu->lg->pgdirs[cpu->cpu_pgd].switcher_mapped); - - /* Clear all the Switcher mappings for any other CPUs. */ - /* FIXME: This is dumb: update only when Host CPU changes. */ - for_each_possible_cpu(i) { - /* Get location of lguest_pages (indexed by Host CPU) */ - base = switcher_addr + PAGE_SIZE - + i * sizeof(struct lguest_pages); + /* Switcher page should always be mapped by now! */ + BUG_ON(!pgdir->switcher_mapped); - /* Get shadow PTE for first page (where we put guest regs). */ - pte = find_spte(cpu, base, false, 0, 0); - set_pte(pte, __pte(0)); + /* + * Remember that we have two pages for each Host CPU, so we can run a + * Guest on each CPU without them interfering. We need to make sure + * those pages are mapped correctly in the Guest, but since we usually + * run on the same CPU, we cache that, and only update the mappings + * when we move. + */ + if (pgdir->last_host_cpu == raw_smp_processor_id()) + return; - /* This is where we put R/O state. */ - pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0); - set_pte(pte, __pte(0)); + /* -1 means unknown so we remove everything. */ + if (pgdir->last_host_cpu == -1) { + unsigned int i; + for_each_possible_cpu(i) + remove_switcher_percpu_map(cpu, i); + } else { + /* We know exactly what CPU mapping to remove. */ + remove_switcher_percpu_map(cpu, pgdir->last_host_cpu); } /* @@ -1140,18 +1169,17 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) * the Guest: the IDT, GDT and other things it's not supposed to * change. */ - base += PAGE_SIZE; - pte = find_spte(cpu, base, false, 0, 0); - + pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0); percpu_switcher_page = lg_switcher_pages[1 + raw_smp_processor_id()*2 + 1]; get_page(percpu_switcher_page); set_pte(pte, mk_pte(percpu_switcher_page, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL))); + + pgdir->last_host_cpu = raw_smp_processor_id(); } -/*:*/ -/* +/*H:490 * We've made it through the page table code. Perhaps our tired brains are * still processing the details, or perhaps we're simply glad it's over. * -- cgit v1.2.1 From 6b39271746de131366a14bcf04f5740cdc4abdef Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:42 +0930 Subject: lguest: map Switcher below fixmap. Now we've adjusted all the code, we can simply set switcher_addr to wherever it needs to go below the fixmaps, rather than asserting that it should be so. With large NR_CPUS and PAE, people were hitting the "mapping switcher would thwack fixmap" message. Reported-by: Paul Bolle Signed-off-by: Rusty Russell --- drivers/lguest/core.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 7e1d7ee36478..0bf1e4edf04d 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -83,18 +83,13 @@ static __init int map_switcher(void) } } - switcher_addr = SWITCHER_ADDR; - /* - * First we check that the Switcher won't overlap the fixmap area at - * the top of memory. It's currently nowhere near, but it could have - * very strange effects if it ever happened. + * We place the Switcher underneath the fixmap area, which is the + * highest virtual address we can get. This is important, since we + * tell the Guest it can't access this memory, so we want its ceiling + * as high as possible. */ - if (switcher_addr + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ - err = -ENOMEM; - printk("lguest: mapping switcher would thwack fixmap\n"); - goto free_pages; - } + switcher_addr = FIXADDR_START - (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE; /* * Now we reserve the "virtual memory area" we want. We might -- cgit v1.2.1 From c2ecd51531c881c8d47d77ea30395f7f03d42da3 Mon Sep 17 00:00:00 2001 From: Cosmin Paraschiv Date: Tue, 30 Apr 2013 09:23:09 +0930 Subject: lguest: improve code readability in lg_cpu_start. Make the container_of call friendlier and fix some comment slip-ups. Signed-off-by: Cosmin Paraschiv Cc: Daniel Baluta Signed-off-by: Rusty Russell --- drivers/lguest/lguest_user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index ff4a0bc9904d..4263f4cc8c55 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -250,13 +250,13 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) */ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) { - /* We have a limited number the number of CPUs in the lguest struct. */ + /* We have a limited number of CPUs in the lguest struct. */ if (id >= ARRAY_SIZE(cpu->lg->cpus)) return -EINVAL; /* Set up this CPU's id, and pointer back to the lguest struct. */ cpu->id = id; - cpu->lg = container_of((cpu - id), struct lguest, cpus[0]); + cpu->lg = container_of(cpu, struct lguest, cpus[id]); cpu->lg->nr_cpus++; /* Each CPU has a timer it can set. */ @@ -270,7 +270,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) if (!cpu->regs_page) return -ENOMEM; - /* We actually put the registers at the bottom of the page. */ + /* We actually put the registers at the end of the page. */ cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs); /* -- cgit v1.2.1