diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-16 14:01:03 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-16 14:01:03 -0500 |
commit | 497a5df7bf6ffd136ae21c49d1a01292930d7ca2 (patch) | |
tree | 774215d43cdc9fbd8660086d1801400163dcbd52 | |
parent | 714d8e7e27197dd39b2550e762a6a6fcf397a471 (diff) | |
parent | 0b97b03d88b40bfbd7ff0e069186a137d9786d43 (diff) | |
download | linux-next-497a5df7bf6ffd136ae21c49d1a01292930d7ca2.tar.gz |
Merge tag 'stable/for-linus-4.1-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen features and fixes from David Vrabel:
- use a single source list of hypercalls, generating other tables etc.
at build time.
- add a "Xen PV" APIC driver to support >255 VCPUs in PV guests.
- significant performance improve to guest save/restore/migration.
- scsiback/front save/restore support.
- infrastructure for multi-page xenbus rings.
- misc fixes.
* tag 'stable/for-linus-4.1-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen/pci: Try harder to get PXM information for Xen
xenbus_client: Extend interface to support multi-page ring
xen-pciback: also support disabling of bus-mastering and memory-write-invalidate
xen: support suspend/resume in pvscsi frontend
xen: scsiback: add LUN of restored domain
xen-scsiback: define a pr_fmt macro with xen-pvscsi
xen/mce: fix up xen_late_init_mcelog() error handling
xen/privcmd: improve performance of MMAPBATCH_V2
xen: unify foreign GFN map/unmap for auto-xlated physmap guests
x86/xen/apic: WARN with details.
x86/xen: Provide a "Xen PV" APIC driver to support >255 VCPUs
xen/pciback: Don't print scary messages when unsupported by hypervisor.
xen: use generated hypercall symbols in arch/x86/xen/xen-head.S
xen: use generated hypervisor symbols in arch/x86/xen/trace.c
xen: synchronize include/xen/interface/xen.h with xen
xen: build infrastructure for generating hypercall depending symbols
xen: balloon: Use static attribute groups for sysfs entries
xen: pcpu: Use static attribute groups for sysfs entry
31 files changed, 1246 insertions, 694 deletions
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 263a2044c65b..224081ccc92f 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -53,105 +53,33 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); static __read_mostly int xen_events_irq = -1; -/* map fgmfn of domid to lpfn in the current domain */ -static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn, - unsigned int domid) +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, + struct page **pages) { - int rc; - struct xen_add_to_physmap_range xatp = { - .domid = DOMID_SELF, - .foreign_domid = domid, - .size = 1, - .space = XENMAPSPACE_gmfn_foreign, - }; - xen_ulong_t idx = fgmfn; - xen_pfn_t gpfn = lpfn; - int err = 0; - - set_xen_guest_handle(xatp.idxs, &idx); - set_xen_guest_handle(xatp.gpfns, &gpfn); - set_xen_guest_handle(xatp.errs, &err); - - rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); - if (rc || err) { - pr_warn("Failed to map pfn to mfn rc:%d:%d pfn:%lx mfn:%lx\n", - rc, err, lpfn, fgmfn); - return 1; - } - return 0; -} - -struct remap_data { - xen_pfn_t fgmfn; /* foreign domain's gmfn */ - pgprot_t prot; - domid_t domid; - struct vm_area_struct *vma; - int index; - struct page **pages; - struct xen_remap_mfn_info *info; -}; - -static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, - void *data) -{ - struct remap_data *info = data; - struct page *page = info->pages[info->index++]; - unsigned long pfn = page_to_pfn(page); - pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot)); - - if (map_foreign_page(pfn, info->fgmfn, info->domid)) - return -EFAULT; - set_pte_at(info->vma->vm_mm, addr, ptep, pte); - - return 0; + return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr, + prot, domid, pages); } +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); +/* Not used by XENFEAT_auto_translated guests. */ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t mfn, int nr, - pgprot_t prot, unsigned domid, - struct page **pages) + unsigned long addr, + xen_pfn_t mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages) { - int err; - struct remap_data data; - - /* TBD: Batching, current sole caller only does page at a time */ - if (nr > 1) - return -EINVAL; - - data.fgmfn = mfn; - data.prot = prot; - data.domid = domid; - data.vma = vma; - data.index = 0; - data.pages = pages; - err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, - remap_pte_fn, &data); - return err; + return -ENOSYS; } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, int nr, struct page **pages) { - int i; - - for (i = 0; i < nr; i++) { - struct xen_remove_from_physmap xrp; - unsigned long rc, pfn; - - pfn = page_to_pfn(pages[i]); - - xrp.domid = DOMID_SELF; - xrp.gpfn = pfn; - rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); - if (rc) { - pr_warn("Failed to unmap pfn:%lx rc:%ld\n", - pfn, rc); - return rc; - } - } - return 0; + return xen_xlate_unmap_gfn_range(vma, nr, pages); } EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile index 3323c2745248..a55abb9f6c5e 100644 --- a/arch/x86/syscalls/Makefile +++ b/arch/x86/syscalls/Makefile @@ -19,6 +19,9 @@ quiet_cmd_syshdr = SYSHDR $@ quiet_cmd_systbl = SYSTBL $@ cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@ +quiet_cmd_hypercalls = HYPERCALLS $@ + cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<,$^) + syshdr_abi_unistd_32 := i386 $(uapi)/unistd_32.h: $(syscall32) $(syshdr) $(call if_changed,syshdr) @@ -47,10 +50,16 @@ $(out)/syscalls_32.h: $(syscall32) $(systbl) $(out)/syscalls_64.h: $(syscall64) $(systbl) $(call if_changed,systbl) +$(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh + $(call if_changed,hypercalls) + +$(out)/xen-hypercalls.h: $(srctree)/include/xen/interface/xen*.h + uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h syshdr-y += syscalls_32.h syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h syshdr-$(CONFIG_X86_64) += syscalls_64.h +syshdr-$(CONFIG_XEN) += xen-hypercalls.h targets += $(uapisyshdr-y) $(syshdr-y) diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 7005ced5d1ad..70e060ad879a 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -7,6 +7,7 @@ #include <xen/xen.h> #include <xen/interface/physdev.h> #include "xen-ops.h" +#include "smp.h" static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) { @@ -28,7 +29,186 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) return 0xfd; } +static unsigned long xen_set_apic_id(unsigned int x) +{ + WARN_ON(1); + return x; +} + +static unsigned int xen_get_apic_id(unsigned long x) +{ + return ((x)>>24) & 0xFFu; +} + +static u32 xen_apic_read(u32 reg) +{ + struct xen_platform_op op = { + .cmd = XENPF_get_cpuinfo, + .interface_version = XENPF_INTERFACE_VERSION, + .u.pcpu_info.xen_cpuid = 0, + }; + int ret = 0; + + /* Shouldn't need this as APIC is turned off for PV, and we only + * get called on the bootup processor. But just in case. */ + if (!xen_initial_domain() || smp_processor_id()) + return 0; + + if (reg == APIC_LVR) + return 0x10; +#ifdef CONFIG_X86_32 + if (reg == APIC_LDR) + return SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); +#endif + if (reg != APIC_ID) + return 0; + + ret = HYPERVISOR_dom0_op(&op); + if (ret) + return 0; + + return op.u.pcpu_info.apic_id << 24; +} + +static void xen_apic_write(u32 reg, u32 val) +{ + /* Warn to see if there's any stray references */ + WARN(1,"register: %x, value: %x\n", reg, val); +} + +static u64 xen_apic_icr_read(void) +{ + return 0; +} + +static void xen_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static u32 xen_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static int xen_apic_probe_pv(void) +{ + if (xen_pv_domain()) + return 1; + + return 0; +} + +static int xen_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return xen_pv_domain(); +} + +static int xen_id_always_valid(int apicid) +{ + return 1; +} + +static int xen_id_always_registered(void) +{ + return 1; +} + +static int xen_phys_pkg_id(int initial_apic_id, int index_msb) +{ + return initial_apic_id >> index_msb; +} + +#ifdef CONFIG_X86_32 +static int xen_x86_32_early_logical_apicid(int cpu) +{ + /* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */ + return 1 << cpu; +} +#endif + +static void xen_noop(void) +{ +} + +static void xen_silent_inquire(int apicid) +{ +} + +static struct apic xen_pv_apic = { + .name = "Xen PV", + .probe = xen_apic_probe_pv, + .acpi_madt_oem_check = xen_madt_oem_check, + .apic_id_valid = xen_id_always_valid, + .apic_id_registered = xen_id_always_registered, + + /* .irq_delivery_mode - used in native_compose_msi_msg only */ + /* .irq_dest_mode - used in native_compose_msi_msg only */ + + .target_cpus = default_target_cpus, + .disable_esr = 0, + /* .dest_logical - default_send_IPI_ use it but we use our own. */ + .check_apicid_used = default_check_apicid_used, /* Used on 32-bit */ + + .vector_allocation_domain = flat_vector_allocation_domain, + .init_apic_ldr = xen_noop, /* setup_local_APIC calls it */ + + .ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */ + .setup_apic_routing = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = physid_set_mask_of_physid, /* Used on 32-bit */ + .check_phys_apicid_present = default_check_phys_apicid_present, /* smp_sanity_check needs it */ + .phys_pkg_id = xen_phys_pkg_id, /* detect_ht */ + + .get_apic_id = xen_get_apic_id, + .set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */ + .apic_id_mask = 0xFF << 24, /* Used by verify_local_APIC. Match with what xen_get_apic_id does. */ + + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + +#ifdef CONFIG_SMP + .send_IPI_mask = xen_send_IPI_mask, + .send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself, + .send_IPI_allbutself = xen_send_IPI_allbutself, + .send_IPI_all = xen_send_IPI_all, + .send_IPI_self = xen_send_IPI_self, +#endif + /* .wait_for_init_deassert- used by AP bootup - smp_callin which we don't use */ + .inquire_remote_apic = xen_silent_inquire, + + .read = xen_apic_read, + .write = xen_apic_write, + .eoi_write = xen_apic_write, + + .icr_read = xen_apic_icr_read, + .icr_write = xen_apic_icr_write, + .wait_icr_idle = xen_noop, + .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, + +#ifdef CONFIG_X86_32 + /* generic_processor_info and setup_local_APIC. */ + .x86_32_early_logical_apicid = xen_x86_32_early_logical_apicid, +#endif +}; + +static void __init xen_apic_check(void) +{ + if (apic == &xen_pv_apic) + return; + + pr_info("Switched APIC routing from %s to %s.\n", apic->name, + xen_pv_apic.name); + apic = &xen_pv_apic; +} void __init xen_init_apic(void) { x86_io_apic_ops.read = xen_io_apic_read; + /* On PV guests the APIC CPUID bit is disabled so none of the + * routines end up executing. */ + if (!xen_initial_domain()) + apic = &xen_pv_apic; + + x86_platform.apic_post_init = xen_apic_check; } +apic_driver(xen_pv_apic); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 81665c9f2132..94578efd3067 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -928,92 +928,6 @@ static void xen_io_delay(void) { } -#ifdef CONFIG_X86_LOCAL_APIC -static unsigned long xen_set_apic_id(unsigned int x) -{ - WARN_ON(1); - return x; -} -static unsigned int xen_get_apic_id(unsigned long x) -{ - return ((x)>>24) & 0xFFu; -} -static u32 xen_apic_read(u32 reg) -{ - struct xen_platform_op op = { - .cmd = XENPF_get_cpuinfo, - .interface_version = XENPF_INTERFACE_VERSION, - .u.pcpu_info.xen_cpuid = 0, - }; - int ret = 0; - - /* Shouldn't need this as APIC is turned off for PV, and we only - * get called on the bootup processor. But just in case. */ - if (!xen_initial_domain() || smp_processor_id()) - return 0; - - if (reg == APIC_LVR) - return 0x10; - - if (reg != APIC_ID) - return 0; - - ret = HYPERVISOR_dom0_op(&op); - if (ret) - return 0; - - return op.u.pcpu_info.apic_id << 24; -} - -static void xen_apic_write(u32 reg, u32 val) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static u64 xen_apic_icr_read(void) -{ - return 0; -} - -static void xen_apic_icr_write(u32 low, u32 id) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static void xen_apic_wait_icr_idle(void) -{ - return; -} - -static u32 xen_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static void set_xen_basic_apic_ops(void) -{ - apic->read = xen_apic_read; - apic->write = xen_apic_write; - apic->icr_read = xen_apic_icr_read; - apic->icr_write = xen_apic_icr_write; - apic->wait_icr_idle = xen_apic_wait_icr_idle; - apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; - apic->set_apic_id = xen_set_apic_id; - apic->get_apic_id = xen_get_apic_id; - -#ifdef CONFIG_SMP - apic->send_IPI_allbutself = xen_send_IPI_allbutself; - apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself; - apic->send_IPI_mask = xen_send_IPI_mask; - apic->send_IPI_all = xen_send_IPI_all; - apic->send_IPI_self = xen_send_IPI_self; -#endif -} - -#endif - static void xen_clts(void) { struct multicall_space mcs; @@ -1619,7 +1533,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* * set up the basic apic ops. */ - set_xen_basic_apic_ops(); + xen_init_apic(); #endif if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { @@ -1732,8 +1646,6 @@ asmlinkage __visible void __init xen_start_kernel(void) if (HYPERVISOR_dom0_op(&op) == 0) boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; - xen_init_apic(); - /* Make sure ACS will be enabled */ pci_request_acs(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 65083ad63b6f..dd151b2045b0 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2436,99 +2436,11 @@ void __init xen_hvm_init_mmu_ops(void) } #endif -#ifdef CONFIG_XEN_PVH -/* - * Map foreign gfn (fgfn), to local pfn (lpfn). This for the user - * space creating new guest on pvh dom0 and needing to map domU pages. - */ -static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn, - unsigned int domid) -{ - int rc, err = 0; - xen_pfn_t gpfn = lpfn; - xen_ulong_t idx = fgfn; - - struct xen_add_to_physmap_range xatp = { - .domid = DOMID_SELF, - .foreign_domid = domid, - .size = 1, - .space = XENMAPSPACE_gmfn_foreign, - }; - set_xen_guest_handle(xatp.idxs, &idx); - set_xen_guest_handle(xatp.gpfns, &gpfn); - set_xen_guest_handle(xatp.errs, &err); - - rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); - if (rc < 0) - return rc; - return err; -} - -static int xlate_remove_from_p2m(unsigned long spfn, int count) -{ - struct xen_remove_from_physmap xrp; - int i, rc; - - for (i = 0; i < count; i++) { - xrp.domid = DOMID_SELF; - xrp.gpfn = spfn+i; - rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); - if (rc) - break; - } - return rc; -} - -struct xlate_remap_data { - unsigned long fgfn; /* foreign domain's gfn */ - pgprot_t prot; - domid_t domid; - int index; - struct page **pages; -}; - -static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, - void *data) -{ - int rc; - struct xlate_remap_data *remap = data; - unsigned long pfn = page_to_pfn(remap->pages[remap->index++]); - pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot)); - - rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid); - if (rc) - return rc; - native_set_pte(ptep, pteval); - - return 0; -} - -static int xlate_remap_gfn_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long mfn, - int nr, pgprot_t prot, unsigned domid, - struct page **pages) -{ - int err; - struct xlate_remap_data pvhdata; - - BUG_ON(!pages); - - pvhdata.fgfn = mfn; - pvhdata.prot = prot; - pvhdata.domid = domid; - pvhdata.index = 0; - pvhdata.pages = pages; - err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, - xlate_map_pte_fn, &pvhdata); - flush_tlb_all(); - return err; -} -#endif - #define REMAP_BATCH_SIZE 16 struct remap_data { - unsigned long mfn; + xen_pfn_t *mfn; + bool contiguous; pgprot_t prot; struct mmu_update *mmu_update; }; @@ -2537,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot)); + pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot)); + + /* If we have a contigious range, just update the mfn itself, + else update pointer to be "next mfn". */ + if (rmd->contiguous) + (*rmd->mfn)++; + else + rmd->mfn++; rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; rmd->mmu_update->val = pte_val_ma(pte); @@ -2546,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, return 0; } -int xen_remap_domain_mfn_range(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t mfn, int nr, - pgprot_t prot, unsigned domid, - struct page **pages) - +static int do_remap_mfn(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, + struct page **pages) { + int err = 0; struct remap_data rmd; struct mmu_update mmu_update[REMAP_BATCH_SIZE]; - int batch; unsigned long range; - int err = 0; + int mapped = 0; BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); if (xen_feature(XENFEAT_auto_translated_physmap)) { #ifdef CONFIG_XEN_PVH /* We need to update the local page tables and the xen HAP */ - return xlate_remap_gfn_range(vma, addr, mfn, nr, prot, - domid, pages); + return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr, + prot, domid, pages); #else return -EINVAL; #endif @@ -2573,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, rmd.mfn = mfn; rmd.prot = prot; + /* We use the err_ptr to indicate if there we are doing a contigious + * mapping or a discontigious mapping. */ + rmd.contiguous = !err_ptr; while (nr) { - batch = min(REMAP_BATCH_SIZE, nr); + int index = 0; + int done = 0; + int batch = min(REMAP_BATCH_SIZE, nr); + int batch_left = batch; range = (unsigned long)batch << PAGE_SHIFT; rmd.mmu_update = mmu_update; @@ -2584,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (err) goto out; - err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); - if (err < 0) - goto out; + /* We record the error for each page that gives an error, but + * continue mapping until the whole set is done */ + do { + int i; + + err = HYPERVISOR_mmu_update(&mmu_update[index], + batch_left, &done, domid); + + /* + * @err_ptr may be the same buffer as @mfn, so + * only clear it after each chunk of @mfn is + * used. + */ + if (err_ptr) { + for (i = index; i < index + done; i++) + err_ptr[i] = 0; + } + if (err < 0) { + if (!err_ptr) + goto out; + err_ptr[i] = err; + done++; /* Skip failed frame. */ + } else + mapped += done; + batch_left -= done; + index += done; + } while (batch_left); nr -= batch; addr += range; + if (err_ptr) + err_ptr += batch; } - - err = 0; out: xen_flush_tlb_all(); - return err; + return err < 0 ? err : mapped; +} + +int xen_remap_domain_mfn_range(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages) +{ + return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, struct page **pages) +{ + /* We BUG_ON because it's a programmer error to pass a NULL err_ptr, + * and the consequences later is quite hard to detect what the actual + * cause of "wrong memory was mapped in". + */ + BUG_ON(err_ptr == NULL); + return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages); +} +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); + + /* Returns: 0 success */ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, int numpgs, struct page **pages) @@ -2609,22 +2583,7 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, return 0; #ifdef CONFIG_XEN_PVH - while (numpgs--) { - /* - * The mmu has already cleaned up the process mmu - * resources at this point (lookup_address will return - * NULL). - */ - unsigned long pfn = page_to_pfn(pages[numpgs]); - - xlate_remove_from_p2m(pfn, 1); - } - /* - * We don't need to flush tlbs because as part of - * xlate_remove_from_p2m, the hypervisor will do tlb flushes - * after removing the p2m entries from the EPT/NPT - */ - return 0; + return xen_xlate_unmap_gfn_range(vma, numpgs, pages); #else return -EINVAL; #endif diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c index 520022d1a181..a702ec2f5931 100644 --- a/arch/x86/xen/trace.c +++ b/arch/x86/xen/trace.c @@ -1,54 +1,12 @@ #include <linux/ftrace.h> #include <xen/interface/xen.h> +#include <xen/interface/xen-mca.h> -#define N(x) [__HYPERVISOR_##x] = "("#x")" +#define HYPERCALL(x) [__HYPERVISOR_##x] = "("#x")", static const char *xen_hypercall_names[] = { - N(set_trap_table), - N(mmu_update), - N(set_gdt), - N(stack_switch), - N(set_callbacks), - N(fpu_taskswitch), - N(sched_op_compat), - N(dom0_op), - N(set_debugreg), - N(get_debugreg), - N(update_descriptor), - N(memory_op), - N(multicall), - N(update_va_mapping), - N(set_timer_op), - N(event_channel_op_compat), - N(xen_version), - N(console_io), - N(physdev_op_compat), - N(grant_table_op), - N(vm_assist), - N(update_va_mapping_otherdomain), - N(iret), - N(vcpu_op), - N(set_segment_base), - N(mmuext_op), - N(acm_op), - N(nmi_op), - N(sched_op), - N(callback_op), - N(xenoprof_op), - N(event_channel_op), - N(physdev_op), - N(hvm_op), - -/* Architecture-specific hypercall definitions. */ - N(arch_0), - N(arch_1), - N(arch_2), - N(arch_3), - N(arch_4), - N(arch_5), - N(arch_6), - N(arch_7), +#include <asm/xen-hypercalls.h> }; -#undef N +#undef HYPERCALL static const char *xen_hypercall_name(unsigned op) { diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 674b222544b7..8afdfccf6086 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -12,6 +12,8 @@ #include <xen/interface/elfnote.h> #include <xen/interface/features.h> +#include <xen/interface/xen.h> +#include <xen/interface/xen-mca.h> #include <asm/xen/interface.h> #ifdef CONFIG_XEN_PVH @@ -85,59 +87,14 @@ ENTRY(xen_pvh_early_cpu_init) .pushsection .text .balign PAGE_SIZE ENTRY(hypercall_page) -#define NEXT_HYPERCALL(x) \ - ENTRY(xen_hypercall_##x) \ - .skip 32 - -NEXT_HYPERCALL(set_trap_table) -NEXT_HYPERCALL(mmu_update) -NEXT_HYPERCALL(set_gdt) -NEXT_HYPERCALL(stack_switch) -NEXT_HYPERCALL(set_callbacks) -NEXT_HYPERCALL(fpu_taskswitch) -NEXT_HYPERCALL(sched_op_compat) -NEXT_HYPERCALL(platform_op) -NEXT_HYPERCALL(set_debugreg) -NEXT_HYPERCALL(get_debugreg) -NEXT_HYPERCALL(update_descriptor) -NEXT_HYPERCALL(ni) -NEXT_HYPERCALL(memory_op) -NEXT_HYPERCALL(multicall) -NEXT_HYPERCALL(update_va_mapping) -NEXT_HYPERCALL(set_timer_op) -NEXT_HYPERCALL(event_channel_op_compat) -NEXT_HYPERCALL(xen_version) -NEXT_HYPERCALL(console_io) -NEXT_HYPERCALL(physdev_op_compat) -NEXT_HYPERCALL(grant_table_op) -NEXT_HYPERCALL(vm_assist) -NEXT_HYPERCALL(update_va_mapping_otherdomain) -NEXT_HYPERCALL(iret) -NEXT_HYPERCALL(vcpu_op) -NEXT_HYPERCALL(set_segment_base) -NEXT_HYPERCALL(mmuext_op) -NEXT_HYPERCALL(xsm_op) -NEXT_HYPERCALL(nmi_op) -NEXT_HYPERCALL(sched_op) -NEXT_HYPERCALL(callback_op) -NEXT_HYPERCALL(xenoprof_op) -NEXT_HYPERCALL(event_channel_op) -NEXT_HYPERCALL(physdev_op) -NEXT_HYPERCALL(hvm_op) -NEXT_HYPERCALL(sysctl) -NEXT_HYPERCALL(domctl) -NEXT_HYPERCALL(kexec_op) -NEXT_HYPERCALL(tmem_op) /* 38 */ -ENTRY(xen_hypercall_rsvr) - .skip 320 -NEXT_HYPERCALL(mca) /* 48 */ -NEXT_HYPERCALL(arch_1) -NEXT_HYPERCALL(arch_2) -NEXT_HYPERCALL(arch_3) -NEXT_HYPERCALL(arch_4) -NEXT_HYPERCALL(arch_5) -NEXT_HYPERCALL(arch_6) - .balign PAGE_SIZE + .skip PAGE_SIZE + +#define HYPERCALL(n) \ + .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ + .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 +#include <asm/xen-hypercalls.h> +#undef HYPERCALL + .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index e3afe97280b1..ff3025922c14 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -193,7 +193,7 @@ fail: return ERR_PTR(-ENOMEM); } -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, +static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, unsigned int evtchn) { int err; @@ -202,7 +202,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, if (blkif->irq) return 0; - err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); + err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1, + &blkif->blk_ring); if (err < 0) return err; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 37779e4c4585..2c61cf8c6f61 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1245,6 +1245,7 @@ static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) { struct blkif_sring *sring; + grant_ref_t gref; int err; info->ring_ref = GRANT_INVALID_REF; @@ -1257,13 +1258,13 @@ static int setup_blkring(struct xenbus_device *dev, SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); + err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref); if (err < 0) { free_page((unsigned long)sring); info->ring.sring = NULL; goto fail; } - info->ring_ref = err; + info->ring_ref = gref; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index c3b4f5a5ac10..3111f2778079 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -193,6 +193,7 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv) struct xenbus_transaction xbt; const char *message = NULL; int rv; + grant_ref_t gref; priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); if (!priv->shr) { @@ -200,11 +201,11 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv) return -ENOMEM; } - rv = xenbus_grant_ring(dev, virt_to_mfn(priv->shr)); + rv = xenbus_grant_ring(dev, &priv->shr, 1, &gref); if (rv < 0) return rv; - priv->ring_ref = rv; + priv->ring_ref = gref; rv = xenbus_alloc_evtchn(dev, &priv->evtchn); if (rv) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index b8c471813f4c..4de46aa61d95 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1780,7 +1780,7 @@ int xenvif_map_frontend_rings(struct xenvif_queue *queue, int err = -ENOMEM; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), - tx_ring_ref, &addr); + &tx_ring_ref, 1, &addr); if (err) goto err; @@ -1788,7 +1788,7 @@ int xenvif_map_frontend_rings(struct xenvif_queue *queue, BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE); err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), - rx_ring_ref, &addr); + &rx_ring_ref, 1, &addr); if (err) goto err; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 720aaf6313d2..4c08f98f4484 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1483,6 +1483,7 @@ static int setup_netfront(struct xenbus_device *dev, { struct xen_netif_tx_sring *txs; struct xen_netif_rx_sring *rxs; + grant_ref_t gref; int err; queue->tx_ring_ref = GRANT_INVALID_REF; @@ -1499,10 +1500,10 @@ static int setup_netfront(struct xenbus_device *dev, SHARED_RING_INIT(txs); FRONT_RING_INIT(&queue->tx, txs, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(txs)); + err = xenbus_grant_ring(dev, txs, 1, &gref); if (err < 0) goto grant_tx_ring_fail; - queue->tx_ring_ref = err; + queue->tx_ring_ref = gref; rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); if (!rxs) { @@ -1513,10 +1514,10 @@ static int setup_netfront(struct xenbus_device *dev, SHARED_RING_INIT(rxs); FRONT_RING_INIT(&queue->rx, rxs, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); + err = xenbus_grant_ring(dev, rxs, 1, &gref); if (err < 0) goto grant_rx_ring_fail; - queue->rx_ring_ref = err; + queue->rx_ring_ref = gref; if (feature_split_evtchn) err = setup_netfront_split(queue); diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index b1ffebec9b9e..7cfd2db02deb 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -777,12 +777,13 @@ static int pcifront_publish_info(struct pcifront_device *pdev) { int err = 0; struct xenbus_transaction trans; + grant_ref_t gref; - err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); + err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &gref); if (err < 0) goto out; - pdev->gnt_ref = err; + pdev->gnt_ref = gref; err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); if (err) diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 34199d206ba6..fad22caf0eff 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -63,6 +63,7 @@ #define VSCSIFRONT_OP_ADD_LUN 1 #define VSCSIFRONT_OP_DEL_LUN 2 +#define VSCSIFRONT_OP_READD_LUN 3 /* Tuning point. */ #define VSCSIIF_DEFAULT_CMD_PER_LUN 10 @@ -113,8 +114,13 @@ struct vscsifrnt_info { DECLARE_BITMAP(shadow_free_bitmap, VSCSIIF_MAX_REQS); struct vscsifrnt_shadow *shadow[VSCSIIF_MAX_REQS]; + /* Following items are protected by the host lock. */ wait_queue_head_t wq_sync; + wait_queue_head_t wq_pause; unsigned int wait_ring_available:1; + unsigned int waiting_pause:1; + unsigned int pause:1; + unsigned callers; char dev_state_path[64]; struct task_struct *curr; @@ -274,31 +280,31 @@ static void scsifront_sync_cmd_done(struct vscsifrnt_info *info, wake_up(&shadow->wq_reset); } -static int scsifront_cmd_done(struct vscsifrnt_info *info) +static void scsifront_do_response(struct vscsifrnt_info *info, + struct vscsiif_response *ring_rsp) +{ + if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS || + test_bit(ring_rsp->rqid, info->shadow_free_bitmap), + "illegal rqid %u returned by backend!\n", ring_rsp->rqid)) + return; + + if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB) + scsifront_cdb_cmd_done(info, ring_rsp); + else + scsifront_sync_cmd_done(info, ring_rsp); +} + +static int scsifront_ring_drain(struct vscsifrnt_info *info) { struct vscsiif_response *ring_rsp; RING_IDX i, rp; int more_to_do = 0; - unsigned long flags; - - spin_lock_irqsave(info->host->host_lock, flags); rp = info->ring.sring->rsp_prod; rmb(); /* ordering required respective to dom0 */ for (i = info->ring.rsp_cons; i != rp; i++) { - ring_rsp = RING_GET_RESPONSE(&info->ring, i); - - if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS || - test_bit(ring_rsp->rqid, info->shadow_free_bitmap), - "illegal rqid %u returned by backend!\n", - ring_rsp->rqid)) - continue; - - if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB) - scsifront_cdb_cmd_done(info, ring_rsp); - else - scsifront_sync_cmd_done(info, ring_rsp); + scsifront_do_response(info, ring_rsp); } info->ring.rsp_cons = i; @@ -308,6 +314,18 @@ static int scsifront_cmd_done(struct vscsifrnt_info *info) else info->ring.sring->rsp_event = i + 1; + return more_to_do; +} + +static int scsifront_cmd_done(struct vscsifrnt_info *info) +{ + int more_to_do; + unsigned long flags; + + spin_lock_irqsave(info->host->host_lock, flags); + + more_to_do = scsifront_ring_drain(info); + info->wait_ring_available = 0; spin_unlock_irqrestore(info->host->host_lock, flags); @@ -328,6 +346,24 @@ static irqreturn_t scsifront_irq_fn(int irq, void *dev_id) return IRQ_HANDLED; } +static void scsifront_finish_all(struct vscsifrnt_info *info) +{ + unsigned i; + struct vscsiif_response resp; + + scsifront_ring_drain(info); + + for (i = 0; i < VSCSIIF_MAX_REQS; i++) { + if (test_bit(i, info->shadow_free_bitmap)) + continue; + resp.rqid = i; + resp.sense_len = 0; + resp.rslt = DID_RESET << 16; + resp.residual_len = 0; + scsifront_do_response(info, &resp); + } +} + static int map_data_for_request(struct vscsifrnt_info *info, struct scsi_cmnd *sc, struct vscsiif_request *ring_req, @@ -475,6 +511,27 @@ static struct vscsiif_request *scsifront_command2ring( return ring_req; } +static int scsifront_enter(struct vscsifrnt_info *info) +{ + if (info->pause) + return 1; + info->callers++; + return 0; +} + +static void scsifront_return(struct vscsifrnt_info *info) +{ + info->callers--; + if (info->callers) + return; + + if (!info->waiting_pause) + return; + + info->waiting_pause = 0; + wake_up(&info->wq_pause); +} + static int scsifront_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc) { @@ -486,6 +543,10 @@ static int scsifront_queuecommand(struct Scsi_Host *shost, uint16_t rqid; spin_lock_irqsave(shost->host_lock, flags); + if (scsifront_enter(info)) { + spin_unlock_irqrestore(shost->host_lock, flags); + return SCSI_MLQUEUE_HOST_BUSY; + } if (RING_FULL(&info->ring)) goto busy; @@ -505,6 +566,7 @@ static int scsifront_queuecommand(struct Scsi_Host *shost, if (err < 0) { pr_debug("%s: err %d\n", __func__, err); scsifront_put_rqid(info, rqid); + scsifront_return(info); spin_unlock_irqrestore(shost->host_lock, flags); if (err == -ENOMEM) return SCSI_MLQUEUE_HOST_BUSY; @@ -514,11 +576,13 @@ static int scsifront_queuecommand(struct Scsi_Host *shost, } scsifront_do_request(info); + scsifront_return(info); spin_unlock_irqrestore(shost->host_lock, flags); return 0; busy: + scsifront_return(info); spin_unlock_irqrestore(shost->host_lock, flags); pr_debug("%s: busy\n", __func__); return SCSI_MLQUEUE_HOST_BUSY; @@ -549,7 +613,7 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act) if (ring_req) break; } - if (err) { + if (err || info->pause) { spin_unlock_irq(host->host_lock); kfree(shadow); return FAILED; @@ -561,6 +625,11 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act) spin_lock_irq(host->host_lock); } + if (scsifront_enter(info)) { + spin_unlock_irq(host->host_lock); + return FAILED; + } + ring_req->act = act; ring_req->ref_rqid = s->rqid; @@ -587,6 +656,7 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act) err = FAILED; } + scsifront_return(info); spin_unlock_irq(host->host_lock); return err; } @@ -644,6 +714,7 @@ static int scsifront_alloc_ring(struct vscsifrnt_info *info) { struct xenbus_device *dev = info->dev; struct vscsiif_sring *sring; + grant_ref_t gref; int err = -ENOMEM; /***** Frontend to Backend ring start *****/ @@ -656,14 +727,14 @@ static int scsifront_alloc_ring(struct vscsifrnt_info *info) SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(sring)); + err = xenbus_grant_ring(dev, sring, 1, &gref); if (err < 0) { free_page((unsigned long)sring); xenbus_dev_fatal(dev, err, "fail to grant shared ring (Front to Back)"); return err; } - info->ring_ref = err; + info->ring_ref = gref; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) { @@ -698,6 +769,13 @@ free_gnttab: return err; } +static void scsifront_free_ring(struct vscsifrnt_info *info) +{ + unbind_from_irqhandler(info->irq, info); + gnttab_end_foreign_access(info->ring_ref, 0, + (unsigned long)info->ring.sring); +} + static int scsifront_init_ring(struct vscsifrnt_info *info) { struct xenbus_device *dev = info->dev; @@ -744,9 +822,7 @@ again: fail: xenbus_transaction_end(xbt, 1); free_sring: - unbind_from_irqhandler(info->irq, info); - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); + scsifront_free_ring(info); return err; } @@ -779,6 +855,7 @@ static int scsifront_probe(struct xenbus_device *dev, } init_waitqueue_head(&info->wq_sync); + init_waitqueue_head(&info->wq_pause); spin_lock_init(&info->shadow_lock); snprintf(name, TASK_COMM_LEN, "vscsiif.%d", host->host_no); @@ -802,13 +879,60 @@ static int scsifront_probe(struct xenbus_device *dev, return 0; free_sring: - unbind_from_irqhandler(info->irq, info); - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); + scsifront_free_ring(info); scsi_host_put(host); return err; } +static int scsifront_resume(struct xenbus_device *dev) +{ + struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); + struct Scsi_Host *host = info->host; + int err; + + spin_lock_irq(host->host_lock); + + /* Finish all still pending commands. */ + scsifront_finish_all(info); + + spin_unlock_irq(host->host_lock); + + /* Reconnect to dom0. */ + scsifront_free_ring(info); + err = scsifront_init_ring(info); + if (err) { + dev_err(&dev->dev, "fail to resume %d\n", err); + scsi_host_put(host); + return err; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + + return 0; +} + +static int scsifront_suspend(struct xenbus_device *dev) +{ + struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); + struct Scsi_Host *host = info->host; + int err = 0; + + /* No new commands for the backend. */ + spin_lock_irq(host->host_lock); + info->pause = 1; + while (info->callers && !err) { + info->waiting_pause = 1; + info->wait_ring_available = 0; + spin_unlock_irq(host->host_lock); + wake_up(&info->wq_sync); + err = wait_event_interruptible(info->wq_pause, + !info->waiting_pause); + spin_lock_irq(host->host_lock); + } + spin_unlock_irq(host->host_lock); + return err; +} + static int scsifront_remove(struct xenbus_device *dev) { struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); @@ -823,10 +947,7 @@ static int scsifront_remove(struct xenbus_device *dev) } mutex_unlock(&scsifront_mutex); - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); - unbind_from_irqhandler(info->irq, info); - + scsifront_free_ring(info); scsi_host_put(info->host); return 0; @@ -919,6 +1040,12 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) scsi_device_put(sdev); } break; + case VSCSIFRONT_OP_READD_LUN: + if (device_state == XenbusStateConnected) + xenbus_printf(XBT_NIL, dev->nodename, + info->dev_state_path, + "%d", XenbusStateConnected); + break; default: break; } @@ -932,21 +1059,29 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) static void scsifront_read_backend_params(struct xenbus_device *dev, struct vscsifrnt_info *info) { - unsigned int sg_grant; + unsigned int sg_grant, nr_segs; int ret; struct Scsi_Host *host = info->host; ret = xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg-grant", "%u", &sg_grant); - if (ret == 1 && sg_grant) { - sg_grant = min_t(unsigned int, sg_grant, SG_ALL); - sg_grant = max_t(unsigned int, sg_grant, VSCSIIF_SG_TABLESIZE); - host->sg_tablesize = min_t(unsigned int, sg_grant, + if (ret != 1) + sg_grant = 0; + nr_segs = min_t(unsigned int, sg_grant, SG_ALL); + nr_segs = max_t(unsigned int, nr_segs, VSCSIIF_SG_TABLESIZE); + nr_segs = min_t(unsigned int, nr_segs, VSCSIIF_SG_TABLESIZE * PAGE_SIZE / sizeof(struct scsiif_request_segment)); - host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512; - } - dev_info(&dev->dev, "using up to %d SG entries\n", host->sg_tablesize); + + if (!info->pause && sg_grant) + dev_info(&dev->dev, "using up to %d SG entries\n", nr_segs); + else if (info->pause && nr_segs < host->sg_tablesize) + dev_warn(&dev->dev, + "SG entries decreased from %d to %u - device may not work properly anymore\n", + host->sg_tablesize, nr_segs); + + host->sg_tablesize = nr_segs; + host->max_sectors = (nr_segs - 1) * PAGE_SIZE / 512; } static void scsifront_backend_changed(struct xenbus_device *dev, @@ -965,6 +1100,14 @@ static void scsifront_backend_changed(struct xenbus_device *dev, case XenbusStateConnected: scsifront_read_backend_params(dev, info); + + if (info->pause) { + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_READD_LUN); + xenbus_switch_state(dev, XenbusStateConnected); + info->pause = 0; + return; + } + if (xenbus_read_driver_state(dev->nodename) == XenbusStateInitialised) scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN); @@ -1002,6 +1145,8 @@ static struct xenbus_driver scsifront_driver = { .ids = scsifront_ids, .probe = scsifront_probe, .remove = scsifront_remove, + .resume = scsifront_resume, + .suspend = scsifront_suspend, .otherend_changed = scsifront_backend_changed, }; diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 94d96809e686..a270004c9605 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -270,4 +270,10 @@ config XEN_EFI def_bool y depends on X86_64 && EFI +config XEN_AUTO_XLATE + def_bool y + depends on ARM || ARM64 || XEN_PVHVM + help + Support for auto-translated physmap guests. + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 2ccd3592d41f..40edd1cbb60d 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU) += xen-acpi-cpuhotplug.o obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o obj-$(CONFIG_XEN_EFI) += efi.o obj-$(CONFIG_XEN_SCSI_BACKEND) += xen-scsiback.o +obj-$(CONFIG_XEN_AUTO_XLATE) += xlate_mmu.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c index 6ab6a79c38a5..a493c7315e94 100644 --- a/drivers/xen/mcelog.c +++ b/drivers/xen/mcelog.c @@ -393,14 +393,25 @@ static int bind_virq_for_mce(void) static int __init xen_late_init_mcelog(void) { + int ret; + /* Only DOM0 is responsible for MCE logging */ - if (xen_initial_domain()) { - /* register character device /dev/mcelog for xen mcelog */ - if (misc_register(&xen_mce_chrdev_device)) - return -ENODEV; - return bind_virq_for_mce(); - } + if (!xen_initial_domain()) + return -ENODEV; + + /* register character device /dev/mcelog for xen mcelog */ + ret = misc_register(&xen_mce_chrdev_device); + if (ret) + return ret; + + ret = bind_virq_for_mce(); + if (ret) + goto deregister; - return -ENODEV; + return 0; + +deregister: + misc_deregister(&xen_mce_chrdev_device); + return ret; } device_initcall(xen_late_init_mcelog); diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index 95ee4302ffb8..7494dbeb4409 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -19,6 +19,7 @@ #include <linux/pci.h> #include <linux/acpi.h> +#include <linux/pci-acpi.h> #include <xen/xen.h> #include <xen/interface/physdev.h> #include <xen/interface/xen.h> @@ -67,12 +68,22 @@ static int xen_add_device(struct device *dev) #ifdef CONFIG_ACPI handle = ACPI_HANDLE(&pci_dev->dev); - if (!handle && pci_dev->bus->bridge) - handle = ACPI_HANDLE(pci_dev->bus->bridge); #ifdef CONFIG_PCI_IOV if (!handle && pci_dev->is_virtfn) handle = ACPI_HANDLE(physfn->bus->bridge); #endif + if (!handle) { + /* + * This device was not listed in the ACPI name space at + * all. Try to get acpi handle of parent pci bus. + */ + struct pci_bus *pbus; + for (pbus = pci_dev->bus; pbus; pbus = pbus->parent) { + handle = acpi_pci_get_bridge_handle(pbus); + if (handle) + break; + } + } if (handle) { acpi_status status; diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index 0aac403d53fd..49e88f2ce7a1 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -132,6 +132,33 @@ static ssize_t __ref store_online(struct device *dev, } static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online); +static struct attribute *pcpu_dev_attrs[] = { + &dev_attr_online.attr, + NULL +}; + +static umode_t pcpu_dev_is_visible(struct kobject *kobj, + struct attribute *attr, int idx) +{ + struct device *dev = kobj_to_dev(kobj); + /* + * Xen never offline cpu0 due to several restrictions + * and assumptions. This basically doesn't add a sys control + * to user, one cannot attempt to offline BSP. + */ + return dev->id ? attr->mode : 0; +} + +static const struct attribute_group pcpu_dev_group = { + .attrs = pcpu_dev_attrs, + .is_visible = pcpu_dev_is_visible, +}; + +static const struct attribute_group *pcpu_dev_groups[] = { + &pcpu_dev_group, + NULL +}; + static bool xen_pcpu_online(uint32_t flags) { return !!(flags & XEN_PCPU_FLAGS_ONLINE); @@ -181,9 +208,6 @@ static void unregister_and_remove_pcpu(struct pcpu *pcpu) return; dev = &pcpu->dev; - if (dev->id) - device_remove_file(dev, &dev_attr_online); - /* pcpu remove would be implicitly done */ device_unregister(dev); } @@ -200,6 +224,7 @@ static int register_pcpu(struct pcpu *pcpu) dev->bus = &xen_pcpu_subsys; dev->id = pcpu->cpu_id; dev->release = pcpu_release; + dev->groups = pcpu_dev_groups; err = device_register(dev); if (err) { @@ -207,19 +232,6 @@ static int register_pcpu(struct pcpu *pcpu) return err; } - /* - * Xen never offline cpu0 due to several restrictions - * and assumptions. This basically doesn't add a sys control - * to user, one cannot attempt to offline BSP. - */ - if (dev->id) { - err = device_create_file(dev, &dev_attr_online); - if (err) { - device_unregister(dev); - return err; - } - } - return 0; } diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 59ac71c4a043..5a296161d843 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size, return ret; } +/* + * Similar to traverse_pages, but use each page as a "block" of + * data to be processed as one unit. + */ +static int traverse_pages_block(unsigned nelem, size_t size, + struct list_head *pos, + int (*fn)(void *data, int nr, void *state), + void *state) +{ + void *pagedata; + unsigned pageidx; + int ret = 0; + + BUG_ON(size > PAGE_SIZE); + + pageidx = PAGE_SIZE; + + while (nelem) { + int nr = (PAGE_SIZE/size); + struct page *page; + if (nr > nelem) + nr = nelem; + pos = pos->next; + page = list_entry(pos, struct page, lru); + pagedata = page_address(page); + ret = (*fn)(pagedata, nr, state); + if (ret) + break; + nelem -= nr; + } + + return ret; +} + struct mmap_mfn_state { unsigned long va; struct vm_area_struct *vma; @@ -274,39 +308,25 @@ struct mmap_batch_state { /* auto translated dom0 note: if domU being created is PV, then mfn is * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). */ -static int mmap_batch_fn(void *data, void *state) +static int mmap_batch_fn(void *data, int nr, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; struct vm_area_struct *vma = st->vma; struct page **pages = vma->vm_private_data; - struct page *cur_page = NULL; + struct page **cur_pages = NULL; int ret; if (xen_feature(XENFEAT_auto_translated_physmap)) - cur_page = pages[st->index++]; + cur_pages = &pages[st->index]; - ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain, - &cur_page); + BUG_ON(nr < 0); + ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr, + (int *)mfnp, st->vma->vm_page_prot, + st->domain, cur_pages); - /* Store error code for second pass. */ - if (st->version == 1) { - if (ret < 0) { - /* - * V1 encodes the error codes in the 32bit top nibble of the - * mfn (with its known limitations vis-a-vis 64 bit callers). - */ - *mfnp |= (ret == -ENOENT) ? - PRIVCMD_MMAPBATCH_PAGED_ERROR : - PRIVCMD_MMAPBATCH_MFN_ERROR; - } - } else { /* st->version == 2 */ - *((int *) mfnp) = ret; - } - - /* And see if it affects the global_error. */ - if (ret < 0) { + /* Adjust the global_error? */ + if (ret != nr) { if (ret == -ENOENT) st->global_error = -ENOENT; else { @@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state) st->global_error = 1; } } - st->va += PAGE_SIZE; + st->va += PAGE_SIZE * nr; + st->index += nr; return 0; } -static int mmap_return_errors(void *data, void *state) +static int mmap_return_error(int err, struct mmap_batch_state *st) { - struct mmap_batch_state *st = state; + int ret; if (st->version == 1) { - xen_pfn_t mfnp = *((xen_pfn_t *) data); - if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR) - return __put_user(mfnp, st->user_mfn++); - else + if (err) { + xen_pfn_t mfn; + + ret = get_user(mfn, st->user_mfn); + if (ret < 0) + return ret; + /* + * V1 encodes the error codes in the 32bit top + * nibble of the mfn (with its known + * limitations vis-a-vis 64 bit callers). + */ + mfn |= (err == -ENOENT) ? + PRIVCMD_MMAPBATCH_PAGED_ERROR : + PRIVCMD_MMAPBATCH_MFN_ERROR; + return __put_user(mfn, st->user_mfn++); + } else st->user_mfn++; } else { /* st->version == 2 */ - int err = *((int *) data); if (err) return __put_user(err, st->user_err++); else @@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state) return 0; } +static int mmap_return_errors(void *data, int nr, void *state) +{ + struct mmap_batch_state *st = state; + int *errs = data; + int i; + int ret; + + for (i = 0; i < nr; i++) { + ret = mmap_return_error(errs[i], st); + if (ret < 0) + return ret; + } + return 0; +} + /* Allocate pfns that are then mapped with gmfns from foreign domid. Update * the vma with the page info to use later. * Returns: 0 if success, otherwise -errno @@ -472,8 +519,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) state.version = version; /* mmap_batch_fn guarantees ret == 0 */ - BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_batch_fn, &state)); + BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_batch_fn, &state)); up_write(&mm->mmap_sem); @@ -481,8 +528,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) /* Write back errors in second pass. */ state.user_mfn = (xen_pfn_t *)m.arr; state.user_err = m.err; - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_return_errors, &state); + ret = traverse_pages_block(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_return_errors, &state); } else ret = 0; diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index e555845d61fa..39e7ef8d3957 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -193,13 +193,18 @@ static DEVICE_ATTR(target, S_IRUGO | S_IWUSR, show_target, store_target); -static struct device_attribute *balloon_attrs[] = { - &dev_attr_target_kb, - &dev_attr_target, - &dev_attr_schedule_delay.attr, - &dev_attr_max_schedule_delay.attr, - &dev_attr_retry_count.attr, - &dev_attr_max_retry_count.attr +static struct attribute *balloon_attrs[] = { + &dev_attr_target_kb.attr, + &dev_attr_target.attr, + &dev_attr_schedule_delay.attr.attr, + &dev_attr_max_schedule_delay.attr.attr, + &dev_attr_retry_count.attr.attr, + &dev_attr_max_retry_count.attr.attr, + NULL +}; + +static const struct attribute_group balloon_group = { + .attrs = balloon_attrs }; static struct attribute *balloon_info_attrs[] = { @@ -214,6 +219,12 @@ static const struct attribute_group balloon_info_group = { .attrs = balloon_info_attrs }; +static const struct attribute_group *balloon_groups[] = { + &balloon_group, + &balloon_info_group, + NULL +}; + static struct bus_type balloon_subsys = { .name = BALLOON_CLASS_NAME, .dev_name = BALLOON_CLASS_NAME, @@ -221,7 +232,7 @@ static struct bus_type balloon_subsys = { static int register_balloon(struct device *dev) { - int i, error; + int error; error = subsys_system_register(&balloon_subsys, NULL); if (error) @@ -229,6 +240,7 @@ static int register_balloon(struct device *dev) dev->id = 0; dev->bus = &balloon_subsys; + dev->groups = balloon_groups; error = device_register(dev); if (error) { @@ -236,24 +248,7 @@ static int register_balloon(struct device *dev) return error; } - for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { - error = device_create_file(dev, balloon_attrs[i]); - if (error) - goto fail; - } - - error = sysfs_create_group(&dev->kobj, &balloon_info_group); - if (error) - goto fail; - return 0; - - fail: - while (--i >= 0) - device_remove_file(dev, balloon_attrs[i]); - device_unregister(dev); - bus_unregister(&balloon_subsys); - return error; } MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index 2d7369391472..c2260a0456c9 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -88,9 +88,15 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n", pci_name(dev)); pci_set_master(dev); + } else if (dev->is_busmaster && !is_master_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: clear bus master\n", + pci_name(dev)); + pci_clear_master(dev); } - if (value & PCI_COMMAND_INVALIDATE) { + if (!(cmd->val & PCI_COMMAND_INVALIDATE) && + (value & PCI_COMMAND_INVALIDATE)) { if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: enable memory-write-invalidate\n", @@ -101,6 +107,13 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) pci_name(dev), err); value &= ~PCI_COMMAND_INVALIDATE; } + } else if ((cmd->val & PCI_COMMAND_INVALIDATE) && + !(value & PCI_COMMAND_INVALIDATE)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG + DRV_NAME ": %s: disable memory-write-invalidate\n", + pci_name(dev)); + pci_clear_mwi(dev); } cmd->val = value; diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index cc3cbb4435f8..258b7c325649 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -118,7 +118,7 @@ static void pcistub_device_release(struct kref *kref) int err = HYPERVISOR_physdev_op(PHYSDEVOP_release_msix, &ppdev); - if (err) + if (err && err != -ENOSYS) dev_warn(&dev->dev, "MSI-X release failed (%d)\n", err); } @@ -402,7 +402,7 @@ static int pcistub_init_device(struct pci_dev *dev) }; err = HYPERVISOR_physdev_op(PHYSDEVOP_prepare_msix, &ppdev); - if (err) + if (err && err != -ENOSYS) dev_err(&dev->dev, "MSI-X preparation failed (%d)\n", err); } diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index fe17c80ff4b7..98bc345f296e 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -113,7 +113,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", gnt_ref, remote_evtchn); - err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); + err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr); if (err < 0) { xenbus_dev_fatal(pdev->xdev, err, "Error mapping other domain page in ours."); diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 42bd55a6c237..07ef38325223 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -31,6 +31,8 @@ * IN THE SOFTWARE. */ +#define pr_fmt(fmt) "xen-pvscsi: " fmt + #include <stdarg.h> #include <linux/module.h> @@ -69,9 +71,6 @@ #include <xen/interface/grant_table.h> #include <xen/interface/io/vscsiif.h> -#define DPRINTK(_f, _a...) \ - pr_debug("(file=%s, line=%d) " _f, __FILE__ , __LINE__ , ## _a) - #define VSCSI_VERSION "v0.1" #define VSCSI_NAMELEN 32 @@ -271,7 +270,7 @@ static void scsiback_print_status(char *sense_buffer, int errors, { struct scsiback_tpg *tpg = pending_req->v2p->tpg; - pr_err("xen-pvscsi[%s:%d] cmnd[0]=%02x -> st=%02x msg=%02x host=%02x drv=%02x\n", + pr_err("[%s:%d] cmnd[0]=%02x -> st=%02x msg=%02x host=%02x drv=%02x\n", tpg->tport->tport_name, pending_req->v2p->lun, pending_req->cmnd[0], status_byte(errors), msg_byte(errors), host_byte(errors), driver_byte(errors)); @@ -427,7 +426,7 @@ static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map, BUG_ON(err); for (i = 0; i < cnt; i++) { if (unlikely(map[i].status != GNTST_okay)) { - pr_err("xen-pvscsi: invalid buffer -- could not remap it\n"); + pr_err("invalid buffer -- could not remap it\n"); map[i].handle = SCSIBACK_INVALID_HANDLE; err = -ENOMEM; } else { @@ -449,7 +448,7 @@ static int scsiback_gnttab_data_map_list(struct vscsibk_pend *pending_req, for (i = 0; i < cnt; i++) { if (get_free_page(pg + mapcount)) { put_free_pages(pg, mapcount); - pr_err("xen-pvscsi: no grant page\n"); + pr_err("no grant page\n"); return -ENOMEM; } gnttab_set_map_op(&map[mapcount], vaddr_page(pg[mapcount]), @@ -492,7 +491,7 @@ static int scsiback_gnttab_data_map(struct vscsiif_request *ring_req, return 0; if (nr_segments > VSCSIIF_SG_TABLESIZE) { - DPRINTK("xen-pvscsi: invalid parameter nr_seg = %d\n", + pr_debug("invalid parameter nr_seg = %d\n", ring_req->nr_segments); return -EINVAL; } @@ -516,13 +515,12 @@ static int scsiback_gnttab_data_map(struct vscsiif_request *ring_req, nr_segments += n_segs; } if (nr_segments > SG_ALL) { - DPRINTK("xen-pvscsi: invalid nr_seg = %d\n", - nr_segments); + pr_debug("invalid nr_seg = %d\n", nr_segments); return -EINVAL; } } - /* free of (sgl) in fast_flush_area()*/ + /* free of (sgl) in fast_flush_area() */ pending_req->sgl = kmalloc_array(nr_segments, sizeof(struct scatterlist), GFP_KERNEL); if (!pending_req->sgl) @@ -679,7 +677,8 @@ static int prepare_pending_reqs(struct vscsibk_info *info, v2p = scsiback_do_translation(info, &vir); if (!v2p) { pending_req->v2p = NULL; - DPRINTK("xen-pvscsi: doesn't exist.\n"); + pr_debug("the v2p of (chn:%d, tgt:%d, lun:%d) doesn't exist.\n", + vir.chn, vir.tgt, vir.lun); return -ENODEV; } pending_req->v2p = v2p; @@ -690,14 +689,14 @@ static int prepare_pending_reqs(struct vscsibk_info *info, (pending_req->sc_data_direction != DMA_TO_DEVICE) && (pending_req->sc_data_direction != DMA_FROM_DEVICE) && (pending_req->sc_data_direction != DMA_NONE)) { - DPRINTK("xen-pvscsi: invalid parameter data_dir = %d\n", + pr_debug("invalid parameter data_dir = %d\n", pending_req->sc_data_direction); return -EINVAL; } pending_req->cmd_len = ring_req->cmd_len; if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) { - DPRINTK("xen-pvscsi: invalid parameter cmd_len = %d\n", + pr_debug("invalid parameter cmd_len = %d\n", pending_req->cmd_len); return -EINVAL; } @@ -721,7 +720,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) if (RING_REQUEST_PROD_OVERFLOW(ring, rp)) { rc = ring->rsp_prod_pvt; - pr_warn("xen-pvscsi: Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n", + pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n", info->domid, rp, rc, rp - rc); info->ring_error = 1; return 0; @@ -772,7 +771,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) scsiback_device_action(pending_req, TMR_LUN_RESET, 0); break; default: - pr_err_ratelimited("xen-pvscsi: invalid request\n"); + pr_err_ratelimited("invalid request\n"); scsiback_do_resp_with_sense(NULL, DRIVER_ERROR << 24, 0, pending_req); kmem_cache_free(scsiback_cachep, pending_req); @@ -810,7 +809,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref, if (info->irq) return -1; - err = xenbus_map_ring_valloc(info->dev, ring_ref, &area); + err = xenbus_map_ring_valloc(info->dev, &ring_ref, 1, &area); if (err) return err; @@ -874,14 +873,13 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, lunp = strrchr(phy, ':'); if (!lunp) { - pr_err("xen-pvscsi: illegal format of physical device %s\n", - phy); + pr_err("illegal format of physical device %s\n", phy); return -EINVAL; } *lunp = 0; lunp++; if (kstrtouint(lunp, 10, &lun) || lun >= TRANSPORT_MAX_LUNS_PER_TPG) { - pr_err("xen-pvscsi: lun number not valid: %s\n", lunp); + pr_err("lun number not valid: %s\n", lunp); return -EINVAL; } @@ -909,7 +907,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, mutex_unlock(&scsiback_mutex); if (!tpg) { - pr_err("xen-pvscsi: %s:%d %s\n", phy, lun, error); + pr_err("%s:%d %s\n", phy, lun, error); return -ENODEV; } @@ -926,7 +924,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, if ((entry->v.chn == v->chn) && (entry->v.tgt == v->tgt) && (entry->v.lun == v->lun)) { - pr_warn("xen-pvscsi: Virtual ID is already used. Assignment was not performed.\n"); + pr_warn("Virtual ID is already used. Assignment was not performed.\n"); err = -EEXIST; goto out; } @@ -992,15 +990,15 @@ found: } static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state, - char *phy, struct ids_tuple *vir) + char *phy, struct ids_tuple *vir, int try) { if (!scsiback_add_translation_entry(info, phy, vir)) { if (xenbus_printf(XBT_NIL, info->dev->nodename, state, "%d", XenbusStateInitialised)) { - pr_err("xen-pvscsi: xenbus_printf error %s\n", state); + pr_err("xenbus_printf error %s\n", state); scsiback_del_translation_entry(info, vir); } - } else { + } else if (!try) { xenbus_printf(XBT_NIL, info->dev->nodename, state, "%d", XenbusStateClosed); } @@ -1012,7 +1010,7 @@ static void scsiback_do_del_lun(struct vscsibk_info *info, const char *state, if (!scsiback_del_translation_entry(info, vir)) { if (xenbus_printf(XBT_NIL, info->dev->nodename, state, "%d", XenbusStateClosed)) - pr_err("xen-pvscsi: xenbus_printf error %s\n", state); + pr_err("xenbus_printf error %s\n", state); } } @@ -1060,10 +1058,19 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op, switch (op) { case VSCSIBACK_OP_ADD_OR_DEL_LUN: - if (device_state == XenbusStateInitialising) - scsiback_do_add_lun(info, state, phy, &vir); - if (device_state == XenbusStateClosing) + switch (device_state) { + case XenbusStateInitialising: + scsiback_do_add_lun(info, state, phy, &vir, 0); + break; + case XenbusStateConnected: + scsiback_do_add_lun(info, state, phy, &vir, 1); + break; + case XenbusStateClosing: scsiback_do_del_lun(info, state, &vir); + break; + default: + break; + } break; case VSCSIBACK_OP_UPDATEDEV_STATE: @@ -1071,15 +1078,14 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op, /* modify vscsi-devs/dev-x/state */ if (xenbus_printf(XBT_NIL, dev->nodename, state, "%d", XenbusStateConnected)) { - pr_err("xen-pvscsi: xenbus_printf error %s\n", - str); + pr_err("xenbus_printf error %s\n", str); scsiback_del_translation_entry(info, &vir); xenbus_printf(XBT_NIL, dev->nodename, state, "%d", XenbusStateClosed); } } break; - /*When it is necessary, processing is added here.*/ + /* When it is necessary, processing is added here. */ default: break; } @@ -1196,7 +1202,7 @@ static int scsiback_probe(struct xenbus_device *dev, struct vscsibk_info *info = kzalloc(sizeof(struct vscsibk_info), GFP_KERNEL); - DPRINTK("%p %d\n", dev, dev->otherend_id); + pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id); if (!info) { xenbus_dev_fatal(dev, -ENOMEM, "allocating backend structure"); @@ -1227,7 +1233,7 @@ static int scsiback_probe(struct xenbus_device *dev, return 0; fail: - pr_warn("xen-pvscsi: %s failed\n", __func__); + pr_warn("%s failed\n", __func__); scsiback_remove(dev); return err; @@ -1432,7 +1438,7 @@ check_len: } snprintf(&tport->tport_name[0], VSCSI_NAMELEN, "%s", &name[off]); - pr_debug("xen-pvscsi: Allocated emulated Target %s Address: %s\n", + pr_debug("Allocated emulated Target %s Address: %s\n", scsiback_dump_proto_id(tport), name); return &tport->tport_wwn; @@ -1443,7 +1449,7 @@ static void scsiback_drop_tport(struct se_wwn *wwn) struct scsiback_tport *tport = container_of(wwn, struct scsiback_tport, tport_wwn); - pr_debug("xen-pvscsi: Deallocating emulated Target %s Address: %s\n", + pr_debug("Deallocating emulated Target %s Address: %s\n", scsiback_dump_proto_id(tport), tport->tport_name); kfree(tport); @@ -1470,8 +1476,8 @@ static u32 scsiback_tpg_get_inst_index(struct se_portal_group *se_tpg) static int scsiback_check_stop_free(struct se_cmd *se_cmd) { /* - * Do not release struct se_cmd's containing a valid TMR - * pointer. These will be released directly in scsiback_device_action() + * Do not release struct se_cmd's containing a valid TMR pointer. + * These will be released directly in scsiback_device_action() * with transport_generic_free_cmd(). */ if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) @@ -1637,7 +1643,7 @@ static int scsiback_make_nexus(struct scsiback_tpg *tpg, return -ENOMEM; } /* - * Initialize the struct se_session pointer + * Initialize the struct se_session pointer */ tv_nexus->tvn_se_sess = transport_init_session(TARGET_PROT_NORMAL); if (IS_ERR(tv_nexus->tvn_se_sess)) { @@ -1705,7 +1711,7 @@ static int scsiback_drop_nexus(struct scsiback_tpg *tpg) return -EBUSY; } - pr_debug("xen-pvscsi: Removing I_T Nexus to emulated %s Initiator Port: %s\n", + pr_debug("Removing I_T Nexus to emulated %s Initiator Port: %s\n", scsiback_dump_proto_id(tpg->tport), tv_nexus->tvn_se_sess->se_node_acl->initiatorname); @@ -1751,7 +1757,7 @@ static ssize_t scsiback_tpg_store_nexus(struct se_portal_group *se_tpg, unsigned char i_port[VSCSI_NAMELEN], *ptr, *port_ptr; int ret; /* - * Shutdown the active I_T nexus if 'NULL' is passed.. + * Shutdown the active I_T nexus if 'NULL' is passed. */ if (!strncmp(page, "NULL", 4)) { ret = scsiback_drop_nexus(tpg); @@ -1922,7 +1928,7 @@ static void scsiback_drop_tpg(struct se_portal_group *se_tpg) */ scsiback_drop_nexus(tpg); /* - * Deregister the se_tpg from TCM.. + * Deregister the se_tpg from TCM. */ core_tpg_deregister(se_tpg); kfree(tpg); @@ -1992,7 +1998,7 @@ static int scsiback_register_configfs(void) struct target_fabric_configfs *fabric; int ret; - pr_debug("xen-pvscsi: fabric module %s on %s/%s on "UTS_RELEASE"\n", + pr_debug("fabric module %s on %s/%s on "UTS_RELEASE"\n", VSCSI_VERSION, utsname()->sysname, utsname()->machine); /* * Register the top level struct config_item_type with TCM core @@ -2029,7 +2035,7 @@ static int scsiback_register_configfs(void) * Setup our local pointer to *fabric */ scsiback_fabric_configfs = fabric; - pr_debug("xen-pvscsi: Set fabric -> scsiback_fabric_configfs\n"); + pr_debug("Set fabric -> scsiback_fabric_configfs\n"); return 0; }; @@ -2040,7 +2046,7 @@ static void scsiback_deregister_configfs(void) target_fabric_configfs_deregister(scsiback_fabric_configfs); scsiback_fabric_configfs = NULL; - pr_debug("xen-pvscsi: Cleared scsiback_fabric_configfs\n"); + pr_debug("Cleared scsiback_fabric_configfs\n"); }; static const struct xenbus_device_id scsiback_ids[] = { @@ -2091,7 +2097,7 @@ out_unregister_xenbus: xenbus_unregister_driver(&scsiback_driver); out_cache_destroy: kmem_cache_destroy(scsiback_cachep); - pr_err("xen-pvscsi: %s: error %d\n", __func__, ret); + pr_err("%s: error %d\n", __func__, ret); return ret; } diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index ca744102b666..96b2011d25f3 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -52,17 +52,25 @@ struct xenbus_map_node { struct list_head next; union { - struct vm_struct *area; /* PV */ - struct page *page; /* HVM */ + struct { + struct vm_struct *area; + } pv; + struct { + struct page *pages[XENBUS_MAX_RING_PAGES]; + void *addr; + } hvm; }; - grant_handle_t handle; + grant_handle_t handles[XENBUS_MAX_RING_PAGES]; + unsigned int nr_handles; }; static DEFINE_SPINLOCK(xenbus_valloc_lock); static LIST_HEAD(xenbus_valloc_pages); struct xenbus_ring_ops { - int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); + int (*map)(struct xenbus_device *dev, + grant_ref_t *gnt_refs, unsigned int nr_grefs, + void **vaddr); int (*unmap)(struct xenbus_device *dev, void *vaddr); }; @@ -355,17 +363,39 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, /** * xenbus_grant_ring * @dev: xenbus device - * @ring_mfn: mfn of ring to grant - - * Grant access to the given @ring_mfn to the peer of the given device. Return - * a grant reference on success, or -errno on error. On error, the device will - * switch to XenbusStateClosing, and the error will be saved in the store. + * @vaddr: starting virtual address of the ring + * @nr_pages: number of pages to be granted + * @grefs: grant reference array to be filled in + * + * Grant access to the given @vaddr to the peer of the given device. + * Then fill in @grefs with grant references. Return 0 on success, or + * -errno on error. On error, the device will switch to + * XenbusStateClosing, and the error will be saved in the store. */ -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + unsigned int nr_pages, grant_ref_t *grefs) { - int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); - if (err < 0) - xenbus_dev_fatal(dev, err, "granting access to ring page"); + int err; + int i, j; + + for (i = 0; i < nr_pages; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + err = gnttab_grant_foreign_access(dev->otherend_id, + virt_to_mfn(addr), 0); + if (err < 0) { + xenbus_dev_fatal(dev, err, + "granting access to ring page"); + goto fail; + } + grefs[i] = err; + } + + return 0; + +fail: + for (j = 0; j < i; j++) + gnttab_end_foreign_access_ref(grefs[j], 0); return err; } EXPORT_SYMBOL_GPL(xenbus_grant_ring); @@ -419,62 +449,130 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); /** * xenbus_map_ring_valloc * @dev: xenbus device - * @gnt_ref: grant reference + * @gnt_refs: grant reference array + * @nr_grefs: number of grant references * @vaddr: pointer to address to be filled out by mapping * - * Based on Rusty Russell's skeleton driver's map_page. - * Map a page of memory into this domain from another domain's grant table. - * xenbus_map_ring_valloc allocates a page of virtual address space, maps the - * page to that address, and sets *vaddr to that address. - * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) - * or -ENOMEM on error. If an error is returned, device will switch to + * Map @nr_grefs pages of memory into this domain from another + * domain's grant table. xenbus_map_ring_valloc allocates @nr_grefs + * pages of virtual address space, maps the pages to that address, and + * sets *vaddr to that address. Returns 0 on success, and GNTST_* + * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on + * error. If an error is returned, device will switch to * XenbusStateClosing and the error message will be saved in XenStore. */ -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, + unsigned int nr_grefs, void **vaddr) { - return ring_ops->map(dev, gnt_ref, vaddr); + return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr); } EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); +/* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned + * long), e.g. 32-on-64. Caller is responsible for preparing the + * right array to feed into this function */ +static int __xenbus_map_ring(struct xenbus_device *dev, + grant_ref_t *gnt_refs, + unsigned int nr_grefs, + grant_handle_t *handles, + phys_addr_t *addrs, + unsigned int flags, + bool *leaked) +{ + struct gnttab_map_grant_ref map[XENBUS_MAX_RING_PAGES]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + int i, j; + int err = GNTST_okay; + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + + for (i = 0; i < nr_grefs; i++) { + memset(&map[i], 0, sizeof(map[i])); + gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i], + dev->otherend_id); + handles[i] = INVALID_GRANT_HANDLE; + } + + gnttab_batch_map(map, i); + + for (i = 0; i < nr_grefs; i++) { + if (map[i].status != GNTST_okay) { + err = map[i].status; + xenbus_dev_fatal(dev, map[i].status, + "mapping in shared page %d from domain %d", + gnt_refs[i], dev->otherend_id); + goto fail; + } else + handles[i] = map[i].handle; + } + + return GNTST_okay; + + fail: + for (i = j = 0; i < nr_grefs; i++) { + if (handles[i] != INVALID_GRANT_HANDLE) { + memset(&unmap[j], 0, sizeof(unmap[j])); + gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i], + GNTMAP_host_map, handles[i]); + j++; + } + } + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j)) + BUG(); + + *leaked = false; + for (i = 0; i < j; i++) { + if (unmap[i].status != GNTST_okay) { + *leaked = true; + break; + } + } + + return err; +} + static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, - int gnt_ref, void **vaddr) + grant_ref_t *gnt_refs, + unsigned int nr_grefs, + void **vaddr) { - struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map | GNTMAP_contains_pte, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; struct xenbus_map_node *node; struct vm_struct *area; - pte_t *pte; + pte_t *ptes[XENBUS_MAX_RING_PAGES]; + phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; + int err = GNTST_okay; + int i; + bool leaked; *vaddr = NULL; + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; - area = alloc_vm_area(PAGE_SIZE, &pte); + area = alloc_vm_area(PAGE_SIZE * nr_grefs, ptes); if (!area) { kfree(node); return -ENOMEM; } - op.host_addr = arbitrary_virt_to_machine(pte).maddr; + for (i = 0; i < nr_grefs; i++) + phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr; - gnttab_batch_map(&op, 1); - - if (op.status != GNTST_okay) { - free_vm_area(area); - kfree(node); - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - return op.status; - } + err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles, + phys_addrs, + GNTMAP_host_map | GNTMAP_contains_pte, + &leaked); + if (err) + goto failed; - node->handle = op.handle; - node->area = area; + node->nr_handles = nr_grefs; + node->pv.area = area; spin_lock(&xenbus_valloc_lock); list_add(&node->next, &xenbus_valloc_pages); @@ -482,14 +580,33 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, *vaddr = area->addr; return 0; + +failed: + if (!leaked) + free_vm_area(area); + else + pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs); + + kfree(node); + return err; } static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, - int gnt_ref, void **vaddr) + grant_ref_t *gnt_ref, + unsigned int nr_grefs, + void **vaddr) { struct xenbus_map_node *node; + int i; int err; void *addr; + bool leaked = false; + /* Why do we need two arrays? See comment of __xenbus_map_ring */ + phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; + unsigned long addrs[XENBUS_MAX_RING_PAGES]; + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; *vaddr = NULL; @@ -497,15 +614,32 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, if (!node) return -ENOMEM; - err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); + err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages, + false /* lowmem */); if (err) goto out_err; - addr = pfn_to_kaddr(page_to_pfn(node->page)); + for (i = 0; i < nr_grefs; i++) { + unsigned long pfn = page_to_pfn(node->hvm.pages[i]); + phys_addrs[i] = (unsigned long)pfn_to_kaddr(pfn); + addrs[i] = (unsigned long)pfn_to_kaddr(pfn); + } + + err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles, + phys_addrs, GNTMAP_host_map, &leaked); + node->nr_handles = nr_grefs; - err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); if (err) - goto out_err_free_ballooned_pages; + goto out_free_ballooned_pages; + + addr = vmap(node->hvm.pages, nr_grefs, VM_MAP | VM_IOREMAP, + PAGE_KERNEL); + if (!addr) { + err = -ENOMEM; + goto out_xenbus_unmap_ring; + } + + node->hvm.addr = addr; spin_lock(&xenbus_valloc_lock); list_add(&node->next, &xenbus_valloc_pages); @@ -514,8 +648,16 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, *vaddr = addr; return 0; - out_err_free_ballooned_pages: - free_xenballooned_pages(1, &node->page); + out_xenbus_unmap_ring: + if (!leaked) + xenbus_unmap_ring(dev, node->handles, node->nr_handles, + addrs); + else + pr_alert("leaking %p size %u page(s)", + addr, nr_grefs); + out_free_ballooned_pages: + if (!leaked) + free_xenballooned_pages(nr_grefs, node->hvm.pages); out_err: kfree(node); return err; @@ -525,35 +667,37 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, /** * xenbus_map_ring * @dev: xenbus device - * @gnt_ref: grant reference - * @handle: pointer to grant handle to be filled - * @vaddr: address to be mapped to + * @gnt_refs: grant reference array + * @nr_grefs: number of grant reference + * @handles: pointer to grant handle to be filled + * @vaddrs: addresses to be mapped to + * @leaked: fail to clean up a failed map, caller should not free vaddr * - * Map a page of memory into this domain from another domain's grant table. + * Map pages of memory into this domain from another domain's grant table. * xenbus_map_ring does not allocate the virtual address space (you must do - * this yourself!). It only maps in the page to the specified address. + * this yourself!). It only maps in the pages to the specified address. * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) - * or -ENOMEM on error. If an error is returned, device will switch to - * XenbusStateClosing and the error message will be saved in XenStore. + * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch to + * XenbusStateClosing and the first error message will be saved in XenStore. + * Further more if we fail to map the ring, caller should check @leaked. + * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller + * should not free the address space of @vaddr. */ -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr) +int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs, + unsigned int nr_grefs, grant_handle_t *handles, + unsigned long *vaddrs, bool *leaked) { - struct gnttab_map_grant_ref op; - - gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref, - dev->otherend_id); + phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; + int i; - gnttab_batch_map(&op, 1); + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; - if (op.status != GNTST_okay) { - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - } else - *handle = op.handle; + for (i = 0; i < nr_grefs; i++) + phys_addrs[i] = (unsigned long)vaddrs[i]; - return op.status; + return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles, + phys_addrs, GNTMAP_host_map, leaked); } EXPORT_SYMBOL_GPL(xenbus_map_ring); @@ -579,14 +723,15 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) { struct xenbus_map_node *node; - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - }; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; unsigned int level; + int i; + bool leaked = false; + int err; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { - if (node->area->addr == vaddr) { + if (node->pv.area->addr == vaddr) { list_del(&node->next); goto found; } @@ -601,22 +746,41 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) return GNTST_bad_virt_addr; } - op.handle = node->handle; - op.host_addr = arbitrary_virt_to_machine( - lookup_address((unsigned long)vaddr, &level)).maddr; + for (i = 0; i < node->nr_handles; i++) { + unsigned long addr; + + memset(&unmap[i], 0, sizeof(unmap[i])); + addr = (unsigned long)vaddr + (PAGE_SIZE * i); + unmap[i].host_addr = arbitrary_virt_to_machine( + lookup_address(addr, &level)).maddr; + unmap[i].dev_bus_addr = 0; + unmap[i].handle = node->handles[i]; + } - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)) BUG(); - if (op.status == GNTST_okay) - free_vm_area(node->area); + err = GNTST_okay; + leaked = false; + for (i = 0; i < node->nr_handles; i++) { + if (unmap[i].status != GNTST_okay) { + leaked = true; + xenbus_dev_error(dev, unmap[i].status, + "unmapping page at handle %d error %d", + node->handles[i], unmap[i].status); + err = unmap[i].status; + break; + } + } + + if (!leaked) + free_vm_area(node->pv.area); else - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - node->handle, op.status); + pr_alert("leaking VM area %p size %u page(s)", + node->pv.area, node->nr_handles); kfree(node); - return op.status; + return err; } static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) @@ -624,10 +788,12 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) int rv; struct xenbus_map_node *node; void *addr; + unsigned long addrs[XENBUS_MAX_RING_PAGES]; + int i; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { - addr = pfn_to_kaddr(page_to_pfn(node->page)); + addr = node->hvm.addr; if (addr == vaddr) { list_del(&node->next); goto found; @@ -643,12 +809,16 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) return GNTST_bad_virt_addr; } - rv = xenbus_unmap_ring(dev, node->handle, addr); + for (i = 0; i < node->nr_handles; i++) + addrs[i] = (unsigned long)pfn_to_kaddr(page_to_pfn(node->hvm.pages[i])); + rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles, + addrs); if (!rv) - free_xenballooned_pages(1, &node->page); + vunmap(vaddr); else - WARN(1, "Leaking %p\n", vaddr); + WARN(1, "Leaking %p, size %u page(s)\n", vaddr, + node->nr_handles); kfree(node); return rv; @@ -657,29 +827,44 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) /** * xenbus_unmap_ring * @dev: xenbus device - * @handle: grant handle - * @vaddr: addr to unmap + * @handles: grant handle array + * @nr_handles: number of handles in the array + * @vaddrs: addresses to unmap * - * Unmap a page of memory in this domain that was imported from another domain. + * Unmap memory in this domain that was imported from another domain. * Returns 0 on success and returns GNTST_* on error * (see xen/include/interface/grant_table.h). */ int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr) + grant_handle_t *handles, unsigned int nr_handles, + unsigned long *vaddrs) { - struct gnttab_unmap_grant_ref op; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + int i; + int err; - gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, handle); + if (nr_handles > XENBUS_MAX_RING_PAGES) + return -EINVAL; - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + for (i = 0; i < nr_handles; i++) + gnttab_set_unmap_op(&unmap[i], vaddrs[i], + GNTMAP_host_map, handles[i]); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)) BUG(); - if (op.status != GNTST_okay) - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - handle, op.status); + err = GNTST_okay; + for (i = 0; i < nr_handles; i++) { + if (unmap[i].status != GNTST_okay) { + xenbus_dev_error(dev, unmap[i].status, + "unmapping page at handle %d error %d", + handles[i], unmap[i].status); + err = unmap[i].status; + break; + } + } - return op.status; + return err; } EXPORT_SYMBOL_GPL(xenbus_unmap_ring); diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c new file mode 100644 index 000000000000..58a5389aec89 --- /dev/null +++ b/drivers/xen/xlate_mmu.c @@ -0,0 +1,143 @@ +/* + * MMU operations common to all auto-translated physmap guests. + * + * Copyright (C) 2015 Citrix Systems R&D Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include <linux/kernel.h> +#include <linux/mm.h> + +#include <asm/xen/hypercall.h> +#include <asm/xen/hypervisor.h> + +#include <xen/xen.h> +#include <xen/page.h> +#include <xen/interface/xen.h> +#include <xen/interface/memory.h> + +/* map fgmfn of domid to lpfn in the current domain */ +static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn, + unsigned int domid) +{ + int rc; + struct xen_add_to_physmap_range xatp = { + .domid = DOMID_SELF, + .foreign_domid = domid, + .size = 1, + .space = XENMAPSPACE_gmfn_foreign, + }; + xen_ulong_t idx = fgmfn; + xen_pfn_t gpfn = lpfn; + int err = 0; + + set_xen_guest_handle(xatp.idxs, &idx); + set_xen_guest_handle(xatp.gpfns, &gpfn); + set_xen_guest_handle(xatp.errs, &err); + + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); + return rc < 0 ? rc : err; +} + +struct remap_data { + xen_pfn_t *fgmfn; /* foreign domain's gmfn */ + pgprot_t prot; + domid_t domid; + struct vm_area_struct *vma; + int index; + struct page **pages; + struct xen_remap_mfn_info *info; + int *err_ptr; + int mapped; +}; + +static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, + void *data) +{ + struct remap_data *info = data; + struct page *page = info->pages[info->index++]; + unsigned long pfn = page_to_pfn(page); + pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot)); + int rc; + + rc = map_foreign_page(pfn, *info->fgmfn, info->domid); + *info->err_ptr++ = rc; + if (!rc) { + set_pte_at(info->vma->vm_mm, addr, ptep, pte); + info->mapped++; + } + info->fgmfn++; + + return 0; +} + +int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, + struct page **pages) +{ + int err; + struct remap_data data; + unsigned long range = nr << PAGE_SHIFT; + + /* Kept here for the purpose of making sure code doesn't break + x86 PVOPS */ + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); + + data.fgmfn = mfn; + data.prot = prot; + data.domid = domid; + data.vma = vma; + data.pages = pages; + data.index = 0; + data.err_ptr = err_ptr; + data.mapped = 0; + + err = apply_to_page_range(vma->vm_mm, addr, range, + remap_pte_fn, &data); + return err < 0 ? err : data.mapped; +} +EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array); + +int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, + int nr, struct page **pages) +{ + int i; + + for (i = 0; i < nr; i++) { + struct xen_remove_from_physmap xrp; + unsigned long pfn; + + pfn = page_to_pfn(pages[i]); + + xrp.domid = DOMID_SELF; + xrp.gpfn = pfn; + (void)HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); + } + return 0; +} +EXPORT_SYMBOL_GPL(xen_xlate_unmap_gfn_range); diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index f68719f405af..a48378958062 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -67,7 +67,7 @@ #define __HYPERVISOR_vcpu_op 24 #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26 -#define __HYPERVISOR_acm_op 27 +#define __HYPERVISOR_xsm_op 27 #define __HYPERVISOR_nmi_op 28 #define __HYPERVISOR_sched_op 29 #define __HYPERVISOR_callback_op 30 @@ -75,7 +75,11 @@ #define __HYPERVISOR_event_channel_op 32 #define __HYPERVISOR_physdev_op 33 #define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 #define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 83338210ee04..c643e6a94c9a 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -27,13 +27,58 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); struct vm_area_struct; + +/* + * xen_remap_domain_mfn_array() - map an array of foreign frames + * @vma: VMA to map the pages into + * @addr: Address at which to map the pages + * @gfn: Array of GFNs to map + * @nr: Number entries in the GFN array + * @err_ptr: Returns per-GFN error status. + * @prot: page protection mask + * @domid: Domain owning the pages + * @pages: Array of pages if this domain has an auto-translated physmap + * + * @gfn and @err_ptr may point to the same buffer, the GFNs will be + * overwritten by the error codes after they are mapped. + * + * Returns the number of successfully mapped frames, or a -ve error + * code. + */ +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *gfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, + struct page **pages); + +/* xen_remap_domain_mfn_range() - map a range of foreign frames + * @vma: VMA to map the pages into + * @addr: Address at which to map the pages + * @gfn: First GFN to map. + * @nr: Number frames to map + * @prot: page protection mask + * @domid: Domain owning the pages + * @pages: Array of pages if this domain has an auto-translated physmap + * + * Returns the number of successfully mapped frames, or a -ve error + * code. + */ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t mfn, int nr, + xen_pfn_t gfn, int nr, pgprot_t prot, unsigned domid, struct page **pages); int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, int numpgs, struct page **pages); +int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *gfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, + struct page **pages); +int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, + int nr, struct page **pages); bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index b0f1c9e5d687..289c0b5f08fe 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -46,6 +46,10 @@ #include <xen/interface/io/xenbus.h> #include <xen/interface/io/xs_wire.h> +#define XENBUS_MAX_RING_PAGE_ORDER 4 +#define XENBUS_MAX_RING_PAGES (1U << XENBUS_MAX_RING_PAGE_ORDER) +#define INVALID_GRANT_HANDLE (~0U) + /* Register callback to watch this node. */ struct xenbus_watch { @@ -199,15 +203,19 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, const char *pathfmt, ...); int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); -int xenbus_map_ring_valloc(struct xenbus_device *dev, - int gnt_ref, void **vaddr); -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr); +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + unsigned int nr_pages, grant_ref_t *grefs); +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, + unsigned int nr_grefs, void **vaddr); +int xenbus_map_ring(struct xenbus_device *dev, + grant_ref_t *gnt_refs, unsigned int nr_grefs, + grant_handle_t *handles, unsigned long *vaddrs, + bool *leaked); int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr); + grant_handle_t *handles, unsigned int nr_handles, + unsigned long *vaddrs); int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); int xenbus_free_evtchn(struct xenbus_device *dev, int port); diff --git a/scripts/xen-hypercalls.sh b/scripts/xen-hypercalls.sh new file mode 100644 index 000000000000..676d9226814f --- /dev/null +++ b/scripts/xen-hypercalls.sh @@ -0,0 +1,12 @@ +#!/bin/sh +out="$1" +shift +in="$@" + +for i in $in; do + eval $CPP $LINUXINCLUDE -dD -imacros "$i" -x c /dev/null +done | \ +awk '$1 == "#define" && $2 ~ /__HYPERVISOR_[a-z][a-z_0-9]*/ { v[$3] = $2 } + END { print "/* auto-generated by scripts/xen-hypercall.sh */" + for (i in v) if (!(v[i] in v)) + print "HYPERCALL("substr(v[i], 14)")"}' | sort -u >$out |