From 7d0642b93780a7309d2954de6f6126d6ceb526f0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 Jul 2012 15:03:18 -0400 Subject: xen/perf: Define .glob for the different hypercalls. This allows us in perf to have this: 99.67% [kernel] [k] xen_hypercall_sched_op 0.11% [kernel] [k] xen_hypercall_xen_version instead of the borring ever-encompassing: 99.13% [kernel] [k] hypercall_page [v2: Use a macro to define the name and skip] [v3: Use balign per Jan's suggestion] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/xen-head.S | 56 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index aaa7291c9259..7faed5869e5b 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -28,9 +28,61 @@ ENTRY(startup_xen) __FINIT .pushsection .text - .align PAGE_SIZE + .balign PAGE_SIZE ENTRY(hypercall_page) - .skip PAGE_SIZE +#define NEXT_HYPERCALL(x) \ + ENTRY(xen_hypercall_##x) \ + .skip 32 + +NEXT_HYPERCALL(set_trap_table) +NEXT_HYPERCALL(mmu_update) +NEXT_HYPERCALL(set_gdt) +NEXT_HYPERCALL(stack_switch) +NEXT_HYPERCALL(set_callbacks) +NEXT_HYPERCALL(fpu_taskswitch) +NEXT_HYPERCALL(sched_op_compat) +NEXT_HYPERCALL(platform_op) +NEXT_HYPERCALL(set_debugreg) +NEXT_HYPERCALL(get_debugreg) +NEXT_HYPERCALL(update_descriptor) +NEXT_HYPERCALL(ni) +NEXT_HYPERCALL(memory_op) +NEXT_HYPERCALL(multicall) +NEXT_HYPERCALL(update_va_mapping) +NEXT_HYPERCALL(set_timer_op) +NEXT_HYPERCALL(event_channel_op_compat) +NEXT_HYPERCALL(xen_version) +NEXT_HYPERCALL(console_io) +NEXT_HYPERCALL(physdev_op_compat) +NEXT_HYPERCALL(grant_table_op) +NEXT_HYPERCALL(vm_assist) +NEXT_HYPERCALL(update_va_mapping_otherdomain) +NEXT_HYPERCALL(iret) +NEXT_HYPERCALL(vcpu_op) +NEXT_HYPERCALL(set_segment_base) +NEXT_HYPERCALL(mmuext_op) +NEXT_HYPERCALL(xsm_op) +NEXT_HYPERCALL(nmi_op) +NEXT_HYPERCALL(sched_op) +NEXT_HYPERCALL(callback_op) +NEXT_HYPERCALL(xenoprof_op) +NEXT_HYPERCALL(event_channel_op) +NEXT_HYPERCALL(physdev_op) +NEXT_HYPERCALL(hvm_op) +NEXT_HYPERCALL(sysctl) +NEXT_HYPERCALL(domctl) +NEXT_HYPERCALL(kexec_op) +NEXT_HYPERCALL(tmem_op) /* 38 */ +ENTRY(xen_hypercall_rsvr) + .skip 320 +NEXT_HYPERCALL(mca) /* 48 */ +NEXT_HYPERCALL(arch_1) +NEXT_HYPERCALL(arch_2) +NEXT_HYPERCALL(arch_3) +NEXT_HYPERCALL(arch_4) +NEXT_HYPERCALL(arch_5) +NEXT_HYPERCALL(arch_6) + .balign PAGE_SIZE .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") -- cgit v1.2.1 From a3118beb6a8cbe77ae3342125d920205871b0717 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 28 Jun 2012 22:12:36 -0400 Subject: xen/p2m: Fix the comment describing the P2M tree. It mixed up the p2m_mid_missing with p2m_missing. Also remove some extra spaces. Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 64effdc6da94..e4adbfbdfada 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -22,7 +22,7 @@ * * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to - * 512 and 1024 entries respectively. + * 512 and 1024 entries respectively. * * In short, these structures contain the Machine Frame Number (MFN) of the PFN. * @@ -139,11 +139,11 @@ * / | ~0, ~0, .... | * | \---------------/ * | - * p2m_missing p2m_missing - * /------------------\ /------------\ - * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | - * | [p2m_mid_missing]+---->| ..., ~0 | - * \------------------/ \------------/ + * p2m_mid_missing p2m_missing + * /-----------------\ /------------\ + * | [p2m_missing] +---->| ~0, ~0, ~0 | + * | [p2m_missing] +---->| ..., ~0 | + * \-----------------/ \------------/ * * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ @@ -423,7 +423,7 @@ static void free_p2m_page(void *p) free_page((unsigned long)p); } -/* +/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so -- cgit v1.2.1 From 59b294403e9814e7c1154043567f0d71bac7a511 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 19 Jul 2012 10:23:47 -0400 Subject: xen/x86: Use memblock_reserve for sensitive areas. instead of a big memblock_reserve. This way we can be more selective in freeing regions (and it also makes it easier to understand where is what). [v1: Move the auto_translate_physmap to proper line] [v2: Per Stefano suggestion add more comments] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/p2m.c | 5 +++++ arch/x86/xen/setup.c | 9 --------- 3 files changed, 53 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ff962d4b821e..e532eb50e8d7 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -998,7 +998,54 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } +/* + * If the MFN is not in the m2p (provided to us by the hypervisor) this + * function won't do anything. In practice this means that the XenBus + * MFN won't be available for the initial domain. */ +static void __init xen_reserve_mfn(unsigned long mfn) +{ + unsigned long pfn; + + if (!mfn) + return; + pfn = mfn_to_pfn(mfn); + if (phys_to_machine_mapping_valid(pfn)) + memblock_reserve(PFN_PHYS(pfn), PAGE_SIZE); +} +static void __init xen_reserve_internals(void) +{ + unsigned long size; + + if (!xen_pv_domain()) + return; + + /* xen_start_info does not exist in the M2P, hence can't use + * xen_reserve_mfn. */ + memblock_reserve(__pa(xen_start_info), PAGE_SIZE); + + xen_reserve_mfn(PFN_DOWN(xen_start_info->shared_info)); + xen_reserve_mfn(xen_start_info->store_mfn); + if (!xen_initial_domain()) + xen_reserve_mfn(xen_start_info->console.domU.mfn); + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; + + /* + * ALIGN up to compensate for the p2m_page pointing to an array that + * can partially filled (look in xen_build_dynamic_phys_to_machine). + */ + + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + + /* We could use xen_reserve_mfn here, but would end up looping quite + * a lot (and call memblock_reserve for each PAGE), so lets just use + * the easy way and reserve it wholesale. */ + memblock_reserve(__pa(xen_start_info->mfn_list), size); + + /* The pagetables are reserved in mmu.c */ +} void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1362,6 +1409,7 @@ asmlinkage void __init xen_start_kernel(void) xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); + xen_reserve_internals(); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index e4adbfbdfada..6a2bfa43c8a1 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -388,6 +388,11 @@ void __init xen_build_dynamic_phys_to_machine(void) } m2p_override_init(); + + /* NOTE: We cannot call memblock_reserve here for the mfn_list as there + * isn't enough pieces to make it work (for one - we are still using the + * Xen provided pagetable). Do it later in xen_reserve_internals. + */ } unsigned long get_phys_to_machine(unsigned long pfn) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a4790bf22c59..9efca750405d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -424,15 +424,6 @@ char * __init xen_memory_setup(void) e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_RESERVED); - /* - * Reserve Xen bits: - * - mfn_list - * - xen_start_info - * See comment above "struct start_info" in - */ - memblock_reserve(__pa(xen_start_info->mfn_list), - xen_start_info->pt_base - xen_start_info->mfn_list); - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); return "Xen"; -- cgit v1.2.1 From 806c312e50f122c47913145cf884f53dd09d9199 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 21 Aug 2012 14:31:24 -0400 Subject: xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain. If a 64-bit hypervisor is booted with a 32-bit initial domain, the hypervisor deals with the initial domain as "compat" and does some extra adjustments (like pagetables are 4 bytes instead of 8). It also adjusts the xen_start_info->pt_base incorrectly. When booted with a 32-bit hypervisor (32-bit initial domain): .. (XEN) Start info: cf831000->cf83147c (XEN) Page tables: cf832000->cf8b5000 .. [ 0.000000] PT: cf832000 (f832000) [ 0.000000] Reserving PT: f832000->f8b5000 And with a 64-bit hypervisor: (XEN) Start info: 00000000cf831000->00000000cf8314b4 (XEN) Page tables: 00000000cf832000->00000000cf8b6000 [ 0.000000] PT: cf834000 (f834000) [ 0.000000] Reserving PT: f834000->f8b8000 To deal with this, we keep keep track of the highest physical address we have reserved via memblock_reserve. If that address does not overlap with pt_base, we have a gap which we reserve. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e532eb50e8d7..511f92d79e4a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1002,19 +1002,24 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) * If the MFN is not in the m2p (provided to us by the hypervisor) this * function won't do anything. In practice this means that the XenBus * MFN won't be available for the initial domain. */ -static void __init xen_reserve_mfn(unsigned long mfn) +static unsigned long __init xen_reserve_mfn(unsigned long mfn) { - unsigned long pfn; + unsigned long pfn, end_pfn = 0; if (!mfn) - return; + return end_pfn; + pfn = mfn_to_pfn(mfn); - if (phys_to_machine_mapping_valid(pfn)) - memblock_reserve(PFN_PHYS(pfn), PAGE_SIZE); + if (phys_to_machine_mapping_valid(pfn)) { + end_pfn = PFN_PHYS(pfn) + PAGE_SIZE; + memblock_reserve(PFN_PHYS(pfn), end_pfn); + } + return end_pfn; } static void __init xen_reserve_internals(void) { unsigned long size; + unsigned long last_phys = 0; if (!xen_pv_domain()) return; @@ -1022,12 +1027,13 @@ static void __init xen_reserve_internals(void) /* xen_start_info does not exist in the M2P, hence can't use * xen_reserve_mfn. */ memblock_reserve(__pa(xen_start_info), PAGE_SIZE); + last_phys = __pa(xen_start_info) + PAGE_SIZE; - xen_reserve_mfn(PFN_DOWN(xen_start_info->shared_info)); - xen_reserve_mfn(xen_start_info->store_mfn); + last_phys = max(xen_reserve_mfn(PFN_DOWN(xen_start_info->shared_info)), last_phys); + last_phys = max(xen_reserve_mfn(xen_start_info->store_mfn), last_phys); if (!xen_initial_domain()) - xen_reserve_mfn(xen_start_info->console.domU.mfn); + last_phys = max(xen_reserve_mfn(xen_start_info->console.domU.mfn), last_phys); if (xen_feature(XENFEAT_auto_translated_physmap)) return; @@ -1043,8 +1049,14 @@ static void __init xen_reserve_internals(void) * a lot (and call memblock_reserve for each PAGE), so lets just use * the easy way and reserve it wholesale. */ memblock_reserve(__pa(xen_start_info->mfn_list), size); - + last_phys = max(__pa(xen_start_info->mfn_list) + size, last_phys); /* The pagetables are reserved in mmu.c */ + + /* Under 64-bit hypervisor with a 32-bit domain, the hypervisor + * offsets the pt_base by two pages. Hence the reservation that is done + * in mmu.c misses two pages. We correct it here if we detect this. */ + if (last_phys < __pa(xen_start_info->pt_base)) + memblock_reserve(last_phys, __pa(xen_start_info->pt_base) - last_phys); } void xen_setup_shared_info(void) { -- cgit v1.2.1 From 988f0e24bbcbbf550dff016faf8273a94f4eb1af Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 27 Jul 2012 20:10:58 -0400 Subject: xen/swiotlb: Simplify the logic. Its pretty easy: 1). We only check to see if we need Xen SWIOTLB for PV guests. 2). If swiotlb=force or iommu=soft is set, then Xen SWIOTLB will be enabled. 3). If it is an initial domain, then Xen SWIOTLB will be enabled. 4). Native SWIOTLB must be disabled for PV guests. Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/pci-swiotlb-xen.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 967633ad98c4..b6a534002ab2 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -34,19 +34,20 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { int __init pci_xen_swiotlb_detect(void) { + if (!xen_pv_domain()) + return 0; + /* If running as PV guest, either iommu=soft, or swiotlb=force will * activate this IOMMU. If running as PV privileged, activate it * irregardless. */ - if ((xen_initial_domain() || swiotlb || swiotlb_force) && - (xen_pv_domain())) + if ((xen_initial_domain() || swiotlb || swiotlb_force)) xen_swiotlb = 1; /* If we are running under Xen, we MUST disable the native SWIOTLB. * Don't worry about swiotlb_force flag activating the native, as * the 'swiotlb' flag is the only one turning it on. */ - if (xen_pv_domain()) - swiotlb = 0; + swiotlb = 0; return xen_swiotlb; } -- cgit v1.2.1 From fc2341df9e31be8a3940f4e302372d7ef46bab8c Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 27 Jul 2012 20:16:00 -0400 Subject: xen/swiotlb: With more than 4GB on 64-bit, disable the native SWIOTLB. If a PV guest is booted the native SWIOTLB should not be turned on. It does not help us (we don't have any PCI devices) and it eats 64MB of good memory. In the case of PV guests with PCI devices we need the Xen-SWIOTLB one. [v1: Rewrite comment per Stefano's suggestion] Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/pci-swiotlb-xen.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index b6a534002ab2..1c1722761eec 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -8,6 +8,11 @@ #include #include +#ifdef CONFIG_X86_64 +#include +#include +#endif + int xen_swiotlb __read_mostly; static struct dma_map_ops xen_swiotlb_dma_ops = { @@ -49,6 +54,15 @@ int __init pci_xen_swiotlb_detect(void) * the 'swiotlb' flag is the only one turning it on. */ swiotlb = 0; +#ifdef CONFIG_X86_64 + /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0 + * (so no iommu=X command line over-writes). + * Considering that PV guests do not want the *native SWIOTLB* but + * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here. + */ + if (max_pfn > MAX_DMA32_PFN) + no_iommu = 1; +#endif return xen_swiotlb; } -- cgit v1.2.1 From 4d9310e39728a87c86eb48492da7546f61189633 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Mon, 6 Aug 2012 15:27:09 +0100 Subject: xen: missing includes Changes in v2: - remove pvclock hack; - remove include linux/types.h from xen/interface/xen.h. v3: - Compile under IA64 Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/ia64/include/asm/xen/interface.h | 2 ++ arch/x86/include/asm/xen/interface.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h index 09d5f7fd9db1..ee9cad6e749b 100644 --- a/arch/ia64/include/asm/xen/interface.h +++ b/arch/ia64/include/asm/xen/interface.h @@ -265,6 +265,8 @@ typedef struct xen_callback xen_callback_t; #endif /* !__ASSEMBLY__ */ +#include + /* Size of the shared_info area (this is not related to page size). */ #define XSI_SHIFT 14 #define XSI_SIZE (1 << XSI_SHIFT) diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index cbf0c9d50b92..a93db16e9582 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -121,6 +121,8 @@ struct arch_shared_info { #include "interface_64.h" #endif +#include + #ifndef __ASSEMBLY__ /* * The following is all CPU context. Note that the fpu_ctxt block is filled -- cgit v1.2.1 From b8b0f559c7b1dcf5503817e518c81c9a18ee45e0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 21 Aug 2012 14:49:34 -0400 Subject: xen/apic/xenbus/swiotlb/pcifront/grant/tmem: Make functions or variables static. There is no need for those functions/variables to be visible. Make them static and also fix the compile warnings of this sort: drivers/xen/.c: warning: symbol '' was not declared. Should it be static? Some of them just require including the header file that declares the functions. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/apic.c | 3 ++- arch/x86/xen/enlighten.c | 2 ++ arch/x86/xen/pci-swiotlb-xen.c | 1 + arch/x86/xen/platform-pci-unplug.c | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index ec57bd3818a4..7005ced5d1ad 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -6,8 +6,9 @@ #include #include +#include "xen-ops.h" -unsigned int xen_io_apic_read(unsigned apic, unsigned reg) +static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) { struct physdev_apic apic_op; int ret; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ff962d4b821e..cb1b1914dbd3 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -79,6 +79,8 @@ #include "smp.h" #include "multicalls.h" +#include + EXPORT_SYMBOL_GPL(hypercall_page); DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 967633ad98c4..2d58b3ff4fae 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -8,6 +8,7 @@ #include #include +#include int xen_swiotlb __read_mostly; static struct dma_map_ops xen_swiotlb_dma_ops = { diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index ffcf2615640b..0a7852483ffe 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -24,6 +24,7 @@ #include #include +#include "xen-ops.h" #define XEN_PLATFORM_ERR_MAGIC -1 #define XEN_PLATFORM_ERR_PROTOCOL -2 -- cgit v1.2.1 From bd3f79b71de0410352ab506496a467fcb0620912 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 22 Aug 2012 17:20:14 +0100 Subject: xen: Introduce xen_pfn_t for pfn and mfn types All the original Xen headers have xen_pfn_t as mfn and pfn type, however when they have been imported in Linux, xen_pfn_t has been replaced with unsigned long. That might work for x86 and ia64 but it does not for arm. Bring back xen_pfn_t and let each architecture define xen_pfn_t as they see fit. Signed-off-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- arch/ia64/include/asm/xen/interface.h | 5 ++++- arch/x86/include/asm/xen/interface.h | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h index ee9cad6e749b..3d52a5bbd857 100644 --- a/arch/ia64/include/asm/xen/interface.h +++ b/arch/ia64/include/asm/xen/interface.h @@ -67,6 +67,10 @@ #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the public interface + * with Xen so that we could have one ABI that works for 32 and 64 bit + * guests. */ +typedef unsigned long xen_pfn_t; /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); @@ -79,7 +83,6 @@ DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); -typedef unsigned long xen_pfn_t; DEFINE_GUEST_HANDLE(xen_pfn_t); #define PRI_xen_pfn "lx" #endif diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index a93db16e9582..555f94d3637b 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -47,6 +47,10 @@ #endif #ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the public interface + * with Xen so that on ARM we can have one ABI that works for 32 and 64 + * bit guests. */ +typedef unsigned long xen_pfn_t; /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); @@ -57,6 +61,7 @@ DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); +DEFINE_GUEST_HANDLE(xen_pfn_t); #endif #ifndef HYPERVISOR_VIRT_START -- cgit v1.2.1 From 1a1d43318aeb74d679372c0b65029957be274529 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 22 Aug 2012 17:20:16 +0100 Subject: xen: allow privcmd for HVM guests This patch removes the "return -ENOSYS" for auto_translated_physmap guests from privcmd_mmap, thus it allows ARM guests to issue privcmd mmap calls. However privcmd mmap calls are still going to fail for HVM and hybrid guests on x86 because the xen_remap_domain_mfn_range implementation is currently PV only. Changes in v2: - better commit message; - return -EINVAL from xen_remap_domain_mfn_range if auto_translated_physmap. Signed-off-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3a73785631ce..885a22354a96 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2310,6 +2310,9 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long range; int err = 0; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -EINVAL; + prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == -- cgit v1.2.1 From 6d7083eee3bc088d1fc30eefabd6263bca40c95a Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 13 Aug 2012 11:00:08 -0400 Subject: xen/swiotlb: Fix compile warnings when using plain integer instead of NULL pointer. arch/x86/xen/pci-swiotlb-xen.c:96:1: warning: Using plain integer as NULL pointer arch/x86/xen/pci-swiotlb-xen.c:96:1: warning: Using plain integer as NULL pointer Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/pci-swiotlb-xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 2d58b3ff4fae..1ab45941502d 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -63,6 +63,6 @@ void __init pci_xen_swiotlb_init(void) } } IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, - 0, + NULL, pci_xen_swiotlb_init, - 0); + NULL); -- cgit v1.2.1 From 51faaf2b0d5c7f44d82964f0c70b1c4e44d4e633 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 22 Aug 2012 13:00:10 -0400 Subject: Revert "xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain." and "xen/x86: Use memblock_reserve for sensitive areas." This reverts commit 806c312e50f122c47913145cf884f53dd09d9199 and commit 59b294403e9814e7c1154043567f0d71bac7a511. And also documents setup.c and why we want to do it that way, which is that we tried to make the the memblock_reserve more selective so that it would be clear what region is reserved. Sadly we ran in the problem wherein on a 64-bit hypervisor with a 32-bit initial domain, the pt_base has the cr3 value which is not neccessarily where the pagetable starts! As Jan put it: " Actually, the adjustment turns out to be correct: The page tables for a 32-on-64 dom0 get allocated in the order "first L1", "first L2", "first L3", so the offset to the page table base is indeed 2. When reading xen/include/public/xen.h's comment very strictly, this is not a violation (since there nothing is said that the first thing in the page table space is pointed to by pt_base; I admit that this seems to be implied though, namely do I think that it is implied that the page table space is the range [pt_base, pt_base + nt_pt_frames), whereas that range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), which - without a priori knowledge - the kernel would have difficulty to figure out)." - so lets just fall back to the easy way and reserve the whole region. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 60 ------------------------------------------------ arch/x86/xen/p2m.c | 5 ---- arch/x86/xen/setup.c | 27 ++++++++++++++++++++++ 3 files changed, 27 insertions(+), 65 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 511f92d79e4a..ff962d4b821e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -998,66 +998,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } -/* - * If the MFN is not in the m2p (provided to us by the hypervisor) this - * function won't do anything. In practice this means that the XenBus - * MFN won't be available for the initial domain. */ -static unsigned long __init xen_reserve_mfn(unsigned long mfn) -{ - unsigned long pfn, end_pfn = 0; - - if (!mfn) - return end_pfn; - - pfn = mfn_to_pfn(mfn); - if (phys_to_machine_mapping_valid(pfn)) { - end_pfn = PFN_PHYS(pfn) + PAGE_SIZE; - memblock_reserve(PFN_PHYS(pfn), end_pfn); - } - return end_pfn; -} -static void __init xen_reserve_internals(void) -{ - unsigned long size; - unsigned long last_phys = 0; - - if (!xen_pv_domain()) - return; - - /* xen_start_info does not exist in the M2P, hence can't use - * xen_reserve_mfn. */ - memblock_reserve(__pa(xen_start_info), PAGE_SIZE); - last_phys = __pa(xen_start_info) + PAGE_SIZE; - - last_phys = max(xen_reserve_mfn(PFN_DOWN(xen_start_info->shared_info)), last_phys); - last_phys = max(xen_reserve_mfn(xen_start_info->store_mfn), last_phys); - if (!xen_initial_domain()) - last_phys = max(xen_reserve_mfn(xen_start_info->console.domU.mfn), last_phys); - - if (xen_feature(XENFEAT_auto_translated_physmap)) - return; - - /* - * ALIGN up to compensate for the p2m_page pointing to an array that - * can partially filled (look in xen_build_dynamic_phys_to_machine). - */ - - size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); - - /* We could use xen_reserve_mfn here, but would end up looping quite - * a lot (and call memblock_reserve for each PAGE), so lets just use - * the easy way and reserve it wholesale. */ - memblock_reserve(__pa(xen_start_info->mfn_list), size); - last_phys = max(__pa(xen_start_info->mfn_list) + size, last_phys); - /* The pagetables are reserved in mmu.c */ - - /* Under 64-bit hypervisor with a 32-bit domain, the hypervisor - * offsets the pt_base by two pages. Hence the reservation that is done - * in mmu.c misses two pages. We correct it here if we detect this. */ - if (last_phys < __pa(xen_start_info->pt_base)) - memblock_reserve(last_phys, __pa(xen_start_info->pt_base) - last_phys); -} void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1421,7 +1362,6 @@ asmlinkage void __init xen_start_kernel(void) xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); - xen_reserve_internals(); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 6a2bfa43c8a1..e4adbfbdfada 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -388,11 +388,6 @@ void __init xen_build_dynamic_phys_to_machine(void) } m2p_override_init(); - - /* NOTE: We cannot call memblock_reserve here for the mfn_list as there - * isn't enough pieces to make it work (for one - we are still using the - * Xen provided pagetable). Do it later in xen_reserve_internals. - */ } unsigned long get_phys_to_machine(unsigned long pfn) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 9efca750405d..740517be4da5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -424,6 +424,33 @@ char * __init xen_memory_setup(void) e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_RESERVED); + /* + * Reserve Xen bits: + * - mfn_list + * - xen_start_info + * See comment above "struct start_info" in + * We tried to make the the memblock_reserve more selective so + * that it would be clear what region is reserved. Sadly we ran + * in the problem wherein on a 64-bit hypervisor with a 32-bit + * initial domain, the pt_base has the cr3 value which is not + * neccessarily where the pagetable starts! As Jan put it: " + * Actually, the adjustment turns out to be correct: The page + * tables for a 32-on-64 dom0 get allocated in the order "first L1", + * "first L2", "first L3", so the offset to the page table base is + * indeed 2. When reading xen/include/public/xen.h's comment + * very strictly, this is not a violation (since there nothing is said + * that the first thing in the page table space is pointed to by + * pt_base; I admit that this seems to be implied though, namely + * do I think that it is implied that the page table space is the + * range [pt_base, pt_base + nt_pt_frames), whereas that + * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), + * which - without a priori knowledge - the kernel would have + * difficulty to figure out)." - so lets just fall back to the + * easy way and reserve the whole region. + */ + memblock_reserve(__pa(xen_start_info->mfn_list), + xen_start_info->pt_base - xen_start_info->mfn_list); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); return "Xen"; -- cgit v1.2.1 From 3699aad047e16a5775b1d051425f422a9384270d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 28 Jun 2012 22:47:35 -0400 Subject: xen/mmu: The xen_setup_kernel_pagetable doesn't need to return anything. We don't need to return the new PGD - as we do not use it. Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 5 +---- arch/x86/xen/mmu.c | 10 ++-------- arch/x86/xen/xen-ops.h | 2 +- 3 files changed, 4 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ff962d4b821e..d87a038a0484 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1258,7 +1258,6 @@ asmlinkage void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; int rc; - pgd_t *pgd; if (!xen_start_info) return; @@ -1350,8 +1349,6 @@ asmlinkage void __init xen_start_kernel(void) acpi_numa = -1; #endif - pgd = (pgd_t *)xen_start_info->pt_base; - /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; @@ -1360,7 +1357,7 @@ asmlinkage void __init xen_start_kernel(void) early_boot_irqs_disabled = true; xen_raw_console_write("mapping kernel into physical memory\n"); - pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); + xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3a73785631ce..4ac21a4c6da4 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1719,8 +1719,7 @@ static void convert_pfn_mfn(void *v) * of the physical mapping once some sort of allocator has been set * up. */ -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; pmd_t *l2; @@ -1781,8 +1780,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, memblock_reserve(__pa(xen_start_info->pt_base), xen_start_info->nr_pt_frames * PAGE_SIZE); - - return pgd; } #else /* !CONFIG_X86_64 */ static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); @@ -1825,8 +1822,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) pv_mmu_ops.write_cr3 = &xen_write_cr3; } -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pmd_t *kernel_pmd; @@ -1858,8 +1854,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, memblock_reserve(__pa(xen_start_info->pt_base), xen_start_info->nr_pt_frames * PAGE_SIZE); - - return initial_page_table; } #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c150154..2230f57a6ebe 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); -pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); +void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_reserve_top(void); extern unsigned long xen_max_p2m_pfn; -- cgit v1.2.1 From 4fac153a7a260e40e10008a0d7a272719684e4cd Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 Jul 2012 13:55:25 -0400 Subject: xen/mmu: Provide comments describing the _ka and _va aliasing issue Which is that the level2_kernel_pgt (__ka virtual addresses) and level2_ident_pgt (__va virtual address) contain the same PMD entries. So if you modify a PTE in __ka, it will be reflected in __va (and vice-versa). Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4ac21a4c6da4..6ba610098dd9 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1734,19 +1734,36 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) init_level4_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ + /* L4[272] -> level3_ident_pgt + * L4[511] -> level3_kernel_pgt */ convert_pfn_mfn(init_level4_pgt); + + /* L3_i[0] -> level2_ident_pgt */ convert_pfn_mfn(level3_ident_pgt); + /* L3_k[510] -> level2_kernel_pgt + * L3_i[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); + /* We get [511][511] and have Xen's version of level2_kernel_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); + /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: + * Both L4[272][0] and L4[511][511] have entries that point to the same + * L2 (PMD) tables. Meaning that if you modify it in __va space + * it will be also modified in the __ka space! (But if you just + * modify the PMD table to point to other PTE's or none, then you + * are OK - which is what cleanup_highmap does) */ memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + /* Graft it onto L4[511][511] */ memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + /* Get [511][510] and graft that in level2_fixmap_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + /* Note that we don't do anything with level1_fixmap_pgt which + * we don't need. */ /* Set up identity map */ xen_map_identity_early(level2_ident_pgt, max_pfn); -- cgit v1.2.1 From ae895ed7839f918bbc8d5425b8973b25a534f4eb Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 26 Jul 2012 11:57:04 -0400 Subject: xen/mmu: use copy_page instead of memcpy. After all, this is what it is there for. Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6ba610098dd9..7247e5a62f27 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1754,14 +1754,14 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) * it will be also modified in the __ka space! (But if you just * modify the PMD table to point to other PTE's or none, then you * are OK - which is what cleanup_highmap does) */ - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(level2_ident_pgt, l2); /* Graft it onto L4[511][511] */ - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(level2_kernel_pgt, l2); /* Get [511][510] and graft that in level2_fixmap_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(level2_fixmap_pgt, l2); /* Note that we don't do anything with level1_fixmap_pgt which * we don't need. */ @@ -1821,8 +1821,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) */ swapper_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - memcpy(swapper_kernel_pmd, initial_kernel_pmd, - sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(swapper_kernel_pmd, initial_kernel_pmd); swapper_pg_dir[KERNEL_PGD_BOUNDARY] = __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); @@ -1851,11 +1850,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(initial_kernel_pmd, kernel_pmd); xen_map_identity_early(initial_kernel_pmd, max_pfn); - memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + copy_page(initial_page_table, pgd); initial_page_table[KERNEL_PGD_BOUNDARY] = __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); -- cgit v1.2.1 From caaf9ecf16feffa4f1a5a0d617bc78906a114514 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 Jul 2012 13:59:36 -0400 Subject: xen/mmu: For 64-bit do not call xen_map_identity_early B/c we do not need it. During the startup the Xen provides us with all the initial memory mapped that we need to function. The initial memory mapped is up to the bootstack, which means we can reference using __ka up to 4.f): (from xen/interface/xen.h): 4. This the order of bootstrap elements in the initial virtual region: a. relocated kernel image b. initial ram disk [mod_start, mod_len] c. list of allocated page frames [mfn_list, nr_pages] d. start_info_t structure [register ESI (x86)] e. bootstrap page tables [pt_base, CR3 (x86)] f. bootstrap stack [register ESP (x86)] (initial ram disk may be ommitted). [v1: More comments in git commit] Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 7247e5a62f27..a59070b09055 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -84,6 +84,7 @@ */ DEFINE_SPINLOCK(xen_reservation_lock); +#ifdef CONFIG_X86_32 /* * Identity map, in addition to plain kernel map. This needs to be * large enough to allocate page table pages to allocate the rest. @@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock); */ #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); - +#endif #ifdef CONFIG_X86_64 /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; @@ -1628,7 +1629,7 @@ static void set_page_prot(void *addr, pgprot_t prot) if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) BUG(); } - +#ifdef CONFIG_X86_32 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { unsigned pmdidx, pteidx; @@ -1679,7 +1680,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } - +#endif void __init xen_setup_machphys_mapping(void) { struct xen_machphys_mapping mapping; @@ -1765,14 +1766,12 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Note that we don't do anything with level1_fixmap_pgt which * we don't need. */ - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); - /* Make pagetable pieces RO */ set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); -- cgit v1.2.1 From 488f046df922af992c1a718eff276529c0510885 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 26 Jul 2012 12:00:56 -0400 Subject: xen/mmu: Recycle the Xen provided L4, L3, and L2 pages As we are not using them. We end up only using the L1 pagetables and grafting those to our page-tables. [v1: Per Stefano's suggestion squashed two commits] [v2: Per Stefano's suggestion simplified loop] [v3: Fix smatch warnings] [v4: Add more comments] Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a59070b09055..b44e6a88ea74 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1708,7 +1708,20 @@ static void convert_pfn_mfn(void *v) for (i = 0; i < PTRS_PER_PTE; i++) pte[i] = xen_make_pte(pte[i].pte); } - +static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, + unsigned long addr) +{ + if (*pt_base == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_base)++; + } + if (*pt_end == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_end)--; + } +} /* * Set up the initial kernel pagetable. * @@ -1724,6 +1737,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; pmd_t *l2; + unsigned long addr[3]; + unsigned long pt_base, pt_end; + unsigned i; /* max_pfn_mapped is the last pfn mapped in the initial memory * mappings. Considering that on Xen after the kernel mappings we @@ -1731,6 +1747,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) * set max_pfn_mapped to the last real pfn mapped. */ max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); + pt_end = pt_base + xen_start_info->nr_pt_frames; + /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); @@ -1749,6 +1768,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); + addr[0] = (unsigned long)pgd; + addr[1] = (unsigned long)l3; + addr[2] = (unsigned long)l2; /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: * Both L4[272][0] and L4[511][511] have entries that point to the same * L2 (PMD) tables. Meaning that if you modify it in __va space @@ -1782,20 +1804,26 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - /* Switch over */ - pgd = init_level4_pgt; - /* * At this stage there can be no user pgd, and no page * structure to attach it to, so make sure we just set kernel * pgd. */ xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); + __xen_write_cr3(true, __pa(init_level4_pgt)); xen_mc_issue(PARAVIRT_LAZY_CPU); - memblock_reserve(__pa(xen_start_info->pt_base), - xen_start_info->nr_pt_frames * PAGE_SIZE); + /* We can't that easily rip out L3 and L2, as the Xen pagetables are + * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for + * the initial domain. For guests using the toolstack, they are in: + * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only + * rip out the [L4] (pgd), but for guests we shave off three pages. + */ + for (i = 0; i < ARRAY_SIZE(addr); i++) + check_pt_base(&pt_base, &pt_end, addr[i]); + + /* Our (by three pages) smaller Xen pagetable that we are using */ + memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); } #else /* !CONFIG_X86_64 */ static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); -- cgit v1.2.1 From 357a3cfb147ee8e97c6f9cdc51e9a33aa56f7d99 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 19 Jul 2012 13:52:29 -0400 Subject: xen/p2m: Add logic to revector a P2M tree to use __va leafs. During bootup Xen supplies us with a P2M array. It sticks it right after the ramdisk, as can be seen with a 128GB PV guest: (certain parts removed for clarity): xc_dom_build_image: called xc_dom_alloc_segment: kernel : 0xffffffff81000000 -> 0xffffffff81e43000 (pfn 0x1000 + 0xe43 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x1000+0xe43 at 0x7f097d8bf000 xc_dom_alloc_segment: ramdisk : 0xffffffff81e43000 -> 0xffffffff925c7000 (pfn 0x1e43 + 0x10784 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x1e43+0x10784 at 0x7f0952dd2000 xc_dom_alloc_segment: phys2mach : 0xffffffff925c7000 -> 0xffffffffa25c7000 (pfn 0x125c7 + 0x10000 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x125c7+0x10000 at 0x7f0942dd2000 xc_dom_alloc_page : start info : 0xffffffffa25c7000 (pfn 0x225c7) xc_dom_alloc_page : xenstore : 0xffffffffa25c8000 (pfn 0x225c8) xc_dom_alloc_page : console : 0xffffffffa25c9000 (pfn 0x225c9) nr_page_tables: 0x0000ffffffffffff/48: 0xffff000000000000 -> 0xffffffffffffffff, 1 table(s) nr_page_tables: 0x0000007fffffffff/39: 0xffffff8000000000 -> 0xffffffffffffffff, 1 table(s) nr_page_tables: 0x000000003fffffff/30: 0xffffffff80000000 -> 0xffffffffbfffffff, 1 table(s) nr_page_tables: 0x00000000001fffff/21: 0xffffffff80000000 -> 0xffffffffa27fffff, 276 table(s) xc_dom_alloc_segment: page tables : 0xffffffffa25ca000 -> 0xffffffffa26e1000 (pfn 0x225ca + 0x117 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x225ca+0x117 at 0x7f097d7a8000 xc_dom_alloc_page : boot stack : 0xffffffffa26e1000 (pfn 0x226e1) xc_dom_build_image : virt_alloc_end : 0xffffffffa26e2000 xc_dom_build_image : virt_pgtab_end : 0xffffffffa2800000 So the physical memory and virtual (using __START_KERNEL_map addresses) layout looks as so: phys __ka /------------\ /-------------------\ | 0 | empty | 0xffffffff80000000| | .. | | .. | | 16MB | <= kernel starts | 0xffffffff81000000| | .. | | | | 30MB | <= kernel ends => | 0xffffffff81e43000| | .. | & ramdisk starts | .. | | 293MB | <= ramdisk ends=> | 0xffffffff925c7000| | .. | & P2M starts | .. | | .. | | .. | | 549MB | <= P2M ends => | 0xffffffffa25c7000| | .. | start_info | 0xffffffffa25c7000| | .. | xenstore | 0xffffffffa25c8000| | .. | cosole | 0xffffffffa25c9000| | 549MB | <= page tables => | 0xffffffffa25ca000| | .. | | | | 550MB | <= PGT end => | 0xffffffffa26e1000| | .. | boot stack | | \------------/ \-------------------/ As can be seen, the ramdisk, P2M and pagetables are taking a bit of __ka addresses space. Which is a problem since the MODULES_VADDR starts at 0xffffffffa0000000 - and P2M sits right in there! This results during bootup with the inability to load modules, with this error: ------------[ cut here ]------------ WARNING: at /home/konrad/ssd/linux/mm/vmalloc.c:106 vmap_page_range_noflush+0x2d9/0x370() Call Trace: [] warn_slowpath_common+0x7a/0xb0 [] ? __raw_callee_save_xen_pmd_val+0x11/0x1e [] warn_slowpath_null+0x15/0x20 [] vmap_page_range_noflush+0x2d9/0x370 [] map_vm_area+0x2d/0x50 [] __vmalloc_node_range+0x160/0x250 [] ? module_alloc_update_bounds+0x19/0x80 [] ? load_module+0x66/0x19c0 [] module_alloc+0x5c/0x60 [] ? module_alloc_update_bounds+0x19/0x80 [] module_alloc_update_bounds+0x19/0x80 [] load_module+0xfa3/0x19c0 [] ? security_file_permission+0x86/0x90 [] sys_init_module+0x5a/0x220 [] system_call_fastpath+0x16/0x1b ---[ end trace fd8f7704fdea0291 ]--- vmalloc: allocation failure, allocated 16384 of 20480 bytes modprobe: page allocation failure: order:0, mode:0xd2 Since the __va and __ka are 1:1 up to MODULES_VADDR and cleanup_highmap rids __ka of the ramdisk mapping, what we want to do is similar - get rid of the P2M in the __ka address space. There are two ways of fixing this: 1) All P2M lookups instead of using the __ka address would use the __va address. This means we can safely erase from __ka space the PMD pointers that point to the PFNs for P2M array and be OK. 2). Allocate a new array, copy the existing P2M into it, revector the P2M tree to use that, and return the old P2M to the memory allocate. This has the advantage that it sets the stage for using XEN_ELF_NOTE_INIT_P2M feature. That feature allows us to set the exact virtual address space we want for the P2M - and allows us to boot as initial domain on large machines. So we pick option 2). This patch only lays the groundwork in the P2M code. The patch that modifies the MMU is called "xen/mmu: Copy and revector the P2M tree." Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/xen-ops.h | 1 + 2 files changed, 71 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index e4adbfbdfada..996ee2bf7bdb 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -389,7 +389,77 @@ void __init xen_build_dynamic_phys_to_machine(void) m2p_override_init(); } +#ifdef CONFIG_X86_64 +#include +unsigned long __init xen_revector_p2m_tree(void) +{ + unsigned long va_start; + unsigned long va_end; + unsigned long pfn; + unsigned long *mfn_list = NULL; + unsigned long size; + + va_start = xen_start_info->mfn_list; + /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), + * so make sure it is rounded up to that */ + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + va_end = va_start + size; + + /* If we were revectored already, don't do it again. */ + if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) + return 0; + + mfn_list = alloc_bootmem_align(size, PAGE_SIZE); + if (!mfn_list) { + pr_warn("Could not allocate space for a new P2M tree!\n"); + return xen_start_info->mfn_list; + } + /* Fill it out with INVALID_P2M_ENTRY value */ + memset(mfn_list, 0xFF, size); + + for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + unsigned mididx; + unsigned long *mid_p; + + if (!p2m_top[topidx]) + continue; + + if (p2m_top[topidx] == p2m_mid_missing) + continue; + + mididx = p2m_mid_index(pfn); + mid_p = p2m_top[topidx][mididx]; + if (!mid_p) + continue; + if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) + continue; + + if ((unsigned long)mid_p == INVALID_P2M_ENTRY) + continue; + + /* The old va. Rebase it on mfn_list */ + if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { + unsigned long *new; + + new = &mfn_list[pfn]; + + copy_page(new, mid_p); + p2m_top[topidx][mididx] = &mfn_list[pfn]; + p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn]); + } + /* This should be the leafs allocated for identity from _brk. */ + } + return (unsigned long)mfn_list; + +} +#else +unsigned long __init xen_revector_p2m_tree(void) +{ + return 0; +} +#endif unsigned long get_phys_to_machine(unsigned long pfn) { unsigned topidx, mididx, idx; diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 2230f57a6ebe..bb5a8105ea86 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); +unsigned long __init xen_revector_p2m_tree(void); void xen_init_irq_ops(void); void xen_setup_timer(int cpu); -- cgit v1.2.1 From 7f9140626c757b773624b97865cb53c2a8348a69 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 26 Jul 2012 12:47:40 -0400 Subject: xen/mmu: Copy and revector the P2M tree. Please first read the description in "xen/p2m: Add logic to revector a P2M tree to use __va leafs" patch. The 'xen_revector_p2m_tree()' function allocates a new P2M tree copies the contents of the old one in it, and returns the new one. At this stage, the __ka address space (which is what the old P2M tree was using) is partially disassembled. The cleanup_highmap has removed the PMD entries from 0-16MB and anything past _brk_end up to the max_pfn_mapped (which is the end of the ramdisk). We have revectored the P2M tree (and the one for save/restore as well) to use new shiny __va address to new MFNs. The xen_start_info has been taken care of already in 'xen_setup_kernel_pagetable()' and xen_start_info->shared_info in 'xen_setup_shared_info()', so we are free to roam and delete PMD entries - which is exactly what we are going to do. We rip out the __ka for the old P2M array. [v1: Fix smatch warnings] [v2: memset was doing 0 instead of 0xff] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b44e6a88ea74..a640949f78d4 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1183,9 +1183,64 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) static void xen_post_allocator_init(void); +#ifdef CONFIG_X86_64 +static void __init xen_cleanhighmap(unsigned long vaddr, + unsigned long vaddr_end) +{ + unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; + pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr); + + /* NOTE: The loop is more greedy than the cleanup_highmap variant. + * We include the PMD passed in on _both_ boundaries. */ + for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); + pmd++, vaddr += PMD_SIZE) { + if (pmd_none(*pmd)) + continue; + if (vaddr < (unsigned long) _text || vaddr > kernel_end) + set_pmd(pmd, __pmd(0)); + } + /* In case we did something silly, we should crash in this function + * instead of somewhere later and be confusing. */ + xen_mc_flush(); +} +#endif static void __init xen_pagetable_setup_done(pgd_t *base) { +#ifdef CONFIG_X86_64 + unsigned long size; + unsigned long addr; +#endif + xen_setup_shared_info(); +#ifdef CONFIG_X86_64 + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long new_mfn_list; + + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + + /* On 32-bit, we get zero so this never gets executed. */ + new_mfn_list = xen_revector_p2m_tree(); + if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { + /* using __ka address and sticking INVALID_P2M_ENTRY! */ + memset((void *)xen_start_info->mfn_list, 0xff, size); + + /* We should be in __ka space. */ + BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); + addr = xen_start_info->mfn_list; + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + /* We roundup to the PMD, which means that if anybody at this stage is + * using the __ka address of xen_start_info or xen_start_info->shared_info + * they are in going to crash. Fortunatly we have already revectored + * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ + size = roundup(size, PMD_SIZE); + xen_cleanhighmap(addr, addr + size); + + memblock_free(__pa(xen_start_info->mfn_list), size); + /* And revector! Bye bye old array */ + xen_start_info->mfn_list = new_mfn_list; + } + } +#endif xen_post_allocator_init(); } @@ -1824,6 +1879,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Our (by three pages) smaller Xen pagetable that we are using */ memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); + /* Revector the xen_start_info */ + xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); } #else /* !CONFIG_X86_64 */ static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); -- cgit v1.2.1 From 3aca7fbc8ede0dd194317b2e3144815128ffb232 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 14 Aug 2012 14:34:00 -0400 Subject: xen/mmu: Remove from __ka space PMD entries for pagetables. Please first read the description in "xen/mmu: Copy and revector the P2M tree." At this stage, the __ka address space (which is what the old P2M tree was using) is partially disassembled. The cleanup_highmap has removed the PMD entries from 0-16MB and anything past _brk_end up to the max_pfn_mapped (which is the end of the ramdisk). The xen_remove_p2m_tree and code around has ripped out the __ka for the old P2M array. Here we continue on doing it to where the Xen page-tables were. It is safe to do it, as the page-tables are addressed using __va. For good measure we delete anything that is within MODULES_VADDR and up to the end of the PMD. At this point the __ka only contains PMD entries for the start of the kernel up to __brk. [v1: Per Stefano's suggestion wrapped the MODULES_VADDR in debug] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a640949f78d4..3f8e963b76c0 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1240,6 +1240,25 @@ static void __init xen_pagetable_setup_done(pgd_t *base) xen_start_info->mfn_list = new_mfn_list; } } + /* At this stage, cleanup_highmap has already cleaned __ka space + * from _brk_limit way up to the max_pfn_mapped (which is the end of + * the ramdisk). We continue on, erasing PMD entries that point to page + * tables - do note that they are accessible at this stage via __va. + * For good measure we also round up to the PMD - which means that if + * anybody is using __ka address to the initial boot-stack - and try + * to use it - they are going to crash. The xen_start_info has been + * taken care of already in xen_setup_kernel_pagetable. */ + addr = xen_start_info->pt_base; + size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); + + xen_cleanhighmap(addr, addr + size); + xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); +#ifdef DEBUG + /* This is superflous and is not neccessary, but you know what + * lets do it. The MODULES_VADDR -> MODULES_END should be clear of + * anything at this stage. */ + xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); +#endif #endif xen_post_allocator_init(); } -- cgit v1.2.1 From 785f62314984ea3af9dd830b020289ba2509ae69 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 14 Aug 2012 16:37:31 -0400 Subject: xen/mmu: Release just the MFN list, not MFN list and part of pagetables. We call memblock_reserve for [start of mfn list] -> [PMD aligned end of mfn list] instead of -> --- arch/x86/xen/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3f8e963b76c0..5b2cb54425ce 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1227,7 +1227,6 @@ static void __init xen_pagetable_setup_done(pgd_t *base) /* We should be in __ka space. */ BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); addr = xen_start_info->mfn_list; - size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); /* We roundup to the PMD, which means that if anybody at this stage is * using the __ka address of xen_start_info or xen_start_info->shared_info * they are in going to crash. Fortunatly we have already revectored @@ -1235,6 +1234,7 @@ static void __init xen_pagetable_setup_done(pgd_t *base) size = roundup(size, PMD_SIZE); xen_cleanhighmap(addr, addr + size); + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); memblock_free(__pa(xen_start_info->mfn_list), size); /* And revector! Bye bye old array */ xen_start_info->mfn_list = new_mfn_list; -- cgit v1.2.1 From 3fc509fc0c590900568ef516a37101d88f3476f5 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 16 Aug 2012 16:38:55 -0400 Subject: xen/p2m: When revectoring deal with holes in the P2M array. When we free the PFNs and then subsequently populate them back during bootup: Freeing 20000-20200 pfn range: 512 pages freed 1-1 mapping on 20000->20200 Freeing 40000-40200 pfn range: 512 pages freed 1-1 mapping on 40000->40200 Freeing bad80-badf4 pfn range: 116 pages freed 1-1 mapping on bad80->badf4 Freeing badf6-bae7f pfn range: 137 pages freed 1-1 mapping on badf6->bae7f Freeing bb000-100000 pfn range: 282624 pages freed 1-1 mapping on bb000->100000 Released 283999 pages of unused memory Set 283999 page(s) to 1-1 mapping Populating 1acb8a-1f20e9 pfn range: 283999 pages added We end up having the P2M array (that is the one that was grafted on the P2M tree) filled with IDENTITY_FRAME or INVALID_P2M_ENTRY) entries. The patch titled "xen/p2m: Reuse existing P2M leafs if they are filled with 1:1 PFNs or INVALID." recycles said slots and replaces the P2M tree leaf's with &mfn_list[xx] with p2m_identity or p2m_missing. And re-uses the P2M array sections for other P2M tree leaf's. For the above mentioned bootup excerpt, the PFNs at 0x20000->0x20200 are going to be IDENTITY based: P2M[0][256][0] -> P2M[0][257][0] get turned in IDENTITY_FRAME. We can re-use that and replace P2M[0][256] to point to p2m_identity. The "old" page (the grafted P2M array provided by Xen) that was at P2M[0][256] gets put somewhere else. Specifically at P2M[6][358], b/c when we populate back: Populating 1acb8a-1f20e9 pfn range: 283999 pages added we fill P2M[6][358][0] (and P2M[6][358], P2M[6][359], ...) with the new MFNs. That is all OK, except when we revector we assume that the PFN count would be the same in the grafted P2M array and in the newly allocated. Since that is no longer the case, as we have holes in the P2M that point to p2m_missing or p2m_identity we have to take that into account. [v2: Check for overflow] [v3: Move within the __va check] [v4: Fix the computation] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 996ee2bf7bdb..c3e92912c3fb 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -396,6 +396,7 @@ unsigned long __init xen_revector_p2m_tree(void) unsigned long va_start; unsigned long va_end; unsigned long pfn; + unsigned long pfn_free = 0; unsigned long *mfn_list = NULL; unsigned long size; @@ -442,11 +443,18 @@ unsigned long __init xen_revector_p2m_tree(void) if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { unsigned long *new; - new = &mfn_list[pfn]; + if (pfn_free > (size / sizeof(unsigned long))) { + WARN(1, "Only allocated for %ld pages, but we want %ld!\n", + size / sizeof(unsigned long), pfn_free); + return 0; + } + new = &mfn_list[pfn_free]; copy_page(new, mid_p); - p2m_top[topidx][mididx] = &mfn_list[pfn]; - p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn]); + p2m_top[topidx][mididx] = &mfn_list[pfn_free]; + p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); + + pfn_free += P2M_PER_PAGE; } /* This should be the leafs allocated for identity from _brk. */ -- cgit v1.2.1 From 328731876451a837f56e66ffa11de053ed5daf73 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 17 Aug 2012 09:35:31 -0400 Subject: xen/mmu: If the revector fails, don't attempt to revector anything else. If the P2M revectoring would fail, we would try to continue on by cleaning the PMD for L1 (PTE) page-tables. The xen_cleanhighmap is greedy and erases the PMD on both boundaries. Since the P2M array can share the PMD, we would wipe out part of the __ka that is still used in the P2M tree to point to P2M leafs. This fixes it by bypassing the revectoring and continuing on. If the revector fails, a nice WARN is printed so we can still troubleshoot this. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5b2cb54425ce..cb9db72b33f8 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1238,7 +1238,8 @@ static void __init xen_pagetable_setup_done(pgd_t *base) memblock_free(__pa(xen_start_info->mfn_list), size); /* And revector! Bye bye old array */ xen_start_info->mfn_list = new_mfn_list; - } + } else + goto skip; } /* At this stage, cleanup_highmap has already cleaned __ka space * from _brk_limit way up to the max_pfn_mapped (which is the end of @@ -1259,6 +1260,7 @@ static void __init xen_pagetable_setup_done(pgd_t *base) * anything at this stage. */ xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); #endif +skip: #endif xen_post_allocator_init(); } -- cgit v1.2.1 From 69870a847856a1ba81f655a8633fce5f5b614730 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 30 Aug 2012 13:58:11 +0100 Subject: xen/mm: return more precise error from xen_remap_domain_range() Callers of xen_remap_domain_range() need to know if the remap failed because frame is currently paged out. So they can retry the remap later on. Return -ENOENT in this case. This assumes that the error codes returned by Xen are a subset of those used by the kernel. It is unclear if this is defined as part of the hypercall ABI. Acked-by: Andres Lagar-Cavilla Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 885a22354a96..2d9e7c9c0e7b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2331,8 +2331,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (err) goto out; - err = -EFAULT; - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) + err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); + if (err < 0) goto out; nr -= batch; -- cgit v1.2.1 From b82776005369899c1c7ca2e4b2414bb64b538d2c Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 23 Aug 2012 14:36:15 -0400 Subject: xen/swiotlb: Use the swiotlb_late_init_with_tbl to init Xen-SWIOTLB late when PV PCI is used. With this patch we provide the functionality to initialize the Xen-SWIOTLB late in the bootup cycle - specifically for Xen PCI-frontend. We still will work if the user had supplied 'iommu=soft' on the Linux command line. Note: We cannot depend on after_bootmem to automatically determine whether this is early or not. This is because when PCI IOMMUs are initialized it is after after_bootmem but before a lot of "other" subsystems are initialized. CC: FUJITA Tomonori [v1: Fix smatch warnings] [v2: Added check for xen_swiotlb] [v3: Rebased with new xen-swiotlb changes] [v4: squashed xen/swiotlb: Depending on after_bootmem is not correct in] Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/swiotlb-xen.h | 2 ++ arch/x86/xen/pci-swiotlb-xen.c | 24 ++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 1be1ab7d6a41..ee52fcac6f72 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -5,10 +5,12 @@ extern int xen_swiotlb; extern int __init pci_xen_swiotlb_detect(void); extern void __init pci_xen_swiotlb_init(void); +extern int pci_xen_swiotlb_init_late(void); #else #define xen_swiotlb (0) static inline int __init pci_xen_swiotlb_detect(void) { return 0; } static inline void __init pci_xen_swiotlb_init(void) { } +static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } #endif #endif /* _ASM_X86_SWIOTLB_XEN_H */ diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 1c1722761eec..b152640d8388 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -12,7 +12,7 @@ #include #include #endif - +#include int xen_swiotlb __read_mostly; static struct dma_map_ops xen_swiotlb_dma_ops = { @@ -69,13 +69,33 @@ int __init pci_xen_swiotlb_detect(void) void __init pci_xen_swiotlb_init(void) { if (xen_swiotlb) { - xen_swiotlb_init(1); + xen_swiotlb_init(1, true /* early */); dma_ops = &xen_swiotlb_dma_ops; /* Make sure ACS will be enabled */ pci_request_acs(); } } + +int pci_xen_swiotlb_init_late(void) +{ + int rc; + + if (xen_swiotlb) + return 0; + + rc = xen_swiotlb_init(1, false /* late */); + if (rc) + return rc; + + dma_ops = &xen_swiotlb_dma_ops; + /* Make sure ACS will be enabled */ + pci_request_acs(); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); + IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, 0, pci_xen_swiotlb_init, -- cgit v1.2.1 From 2a3bce8f6afb9118a7ac3c360a5baf7cdaec87bc Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 13 Aug 2012 11:00:08 -0400 Subject: xen/swiotlb: Fix compile warnings when using plain integer instead of NULL pointer. arch/x86/xen/pci-swiotlb-xen.c:96:1: warning: Using plain integer as NULL pointer arch/x86/xen/pci-swiotlb-xen.c:96:1: warning: Using plain integer as NULL pointer Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/pci-swiotlb-xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index b152640d8388..1608244756de 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -97,6 +97,6 @@ int pci_xen_swiotlb_init_late(void) EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, - 0, + NULL, pci_xen_swiotlb_init, - 0); + NULL); -- cgit v1.2.1 From ffb8b233c2261b7978dc3bd759aaa19bd1a7fadf Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 21 Sep 2012 12:30:35 -0400 Subject: xen/x86: retrieve keyboard shift status flags from hypervisor. The xen c/s 25873 allows the hypervisor to retrieve the NUMLOCK flag. With this patch, the Linux kernel can get the state according to the data in the BIOS. Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 47b3acdc2ac5..67897152237c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1440,11 +1440,19 @@ asmlinkage void __init xen_start_kernel(void) const struct dom0_vga_console_info *info = (void *)((char *)xen_start_info + xen_start_info->console.dom0.info_off); + struct xen_platform_op op = { + .cmd = XENPF_firmware_info, + .interface_version = XENPF_INTERFACE_VERSION, + .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, + }; xen_init_vga(info, xen_start_info->console.dom0.info_size); xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; + if (HYPERVISOR_dom0_op(&op) == 0) + boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; + xen_init_apic(); /* Make sure ACS will be enabled */ -- cgit v1.2.1 From aa387d630cfed1a694a9c8c61fba3877ba8d4f07 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 9 Feb 2012 11:33:51 +0800 Subject: xen/vga: add the xen EFI video mode support In order to add xen EFI frambebuffer video support, it is required to add xen-efi's new video type (XEN_VGATYPE_EFI_LFB) case and handle it in the function xen_init_vga and set the video type to VIDEO_TYPE_EFI to enable efi video mode. The original patch from which this was broken out from: http://marc.info/?i=4E099AA6020000780004A4C6@nat28.tlf.novell.com Signed-off-by: Jan Beulich Signed-off-by: Tang Liang [v2: The original author is Jan Beulich and Liang Tang ported it to upstream] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/vga.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index 1cd7f4d11e29..6722e3733f02 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c @@ -35,6 +35,7 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) info->u.text_mode_3.font_height; break; + case XEN_VGATYPE_EFI_LFB: case XEN_VGATYPE_VESA_LFB: if (size < offsetof(struct dom0_vga_console_info, u.vesa_lfb.gbl_caps)) @@ -54,6 +55,12 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) screen_info->blue_pos = info->u.vesa_lfb.blue_pos; screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; + + if (info->video_type == XEN_VGATYPE_EFI_LFB) { + screen_info->orig_video_isVGA = VIDEO_TYPE_EFI; + break; + } + if (size >= offsetof(struct dom0_vga_console_info, u.vesa_lfb.gbl_caps) + sizeof(info->u.vesa_lfb.gbl_caps)) -- cgit v1.2.1