From 32c5fc10e79a7053ac5728b01a0bff55cbcb9d49 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Mon, 2 Nov 2009 16:50:33 +0000 Subject: mm: remove incorrect swap_count() from try_to_unuse() In try_to_unuse(), swcount is a local copy of *swap_map, including the SWAP_HAS_CACHE bit; but a wrong comparison against swap_count(*swap_map), which masks off the SWAP_HAS_CACHE bit, succeeded where it should fail. That had the effect of resetting the mm from which to start searching for the next swap page, to an irrelevant mm instead of to an mm in which this swap page had been found: which may increase search time by ~20%. But we're used to swapoff being slow, so never noticed the slowdown. Remove that one spurious use of swap_count(): Bo Liu thought it merely redundant, Hugh rewrote the description since it was measurably wrong. Signed-off-by: Bo Liu Signed-off-by: Hugh Dickins Reviewed-by: KAMEZAWA Hiroyuki Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/swapfile.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/swapfile.c b/mm/swapfile.c index a1bc6b9af9a2..9c590eef7912 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1151,8 +1151,7 @@ static int try_to_unuse(unsigned int type) } else retval = unuse_mm(mm, entry, page); - if (set_start_mm && - swap_count(*swap_map) < swcount) { + if (set_start_mm && *swap_map < swcount) { mmput(new_start_mm); atomic_inc(&mm->mm_users); new_start_mm = mm; -- cgit v1.2.1 From d178f27fc5150d680d9df865ea9dfe3269cf00a6 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 9 Nov 2009 15:58:23 +0000 Subject: ksm: cond_resched in unstable tree KSM needs a cond_resched() for CONFIG_PREEMPT_NONE, in its unbounded search of the unstable tree. The stable tree cases already have one, and originally there was one down inside get_user_pages(); but I missed it when I converted to follow_page() instead. Signed-off-by: Hugh Dickins Acked-by: Izik Eidus Signed-off-by: Linus Torvalds --- mm/ksm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm') diff --git a/mm/ksm.c b/mm/ksm.c index bef1af4f77e3..5575f8628fef 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1012,6 +1012,7 @@ static struct rmap_item *unstable_tree_search_insert(struct page *page, struct rmap_item *tree_rmap_item; int ret; + cond_resched(); tree_rmap_item = rb_entry(*new, struct rmap_item, node); page2[0] = get_mergeable_page(tree_rmap_item); if (!page2[0]) -- cgit v1.2.1 From 5ebd4c22897dce65845807a9bd3a31cc4e142b53 Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Date: Wed, 28 Oct 2009 18:55:36 +0100 Subject: highmem: Fix race in debug_kmap_atomic() which could cause warn_count to underflow debug_kmap_atomic() tries to prevent ever printing more than 10 warnings, but it does so by testing whether an unsigned integer is equal to 0. However, if the warning is caused by a nested IRQ, then this counter may underflow and the stream of warnings will never end. Fix that by using a signed integer instead. Signed-off-by: Soeren Sandmann Pedersen Cc: Linus Torvalds Cc: a.p.zijlstra@chello.nl Cc: # .31.x LKML-Reference: Signed-off-by: Ingo Molnar --- mm/highmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/highmem.c b/mm/highmem.c index 25878cc49daa..33587de6b871 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -426,9 +426,9 @@ void __init page_address_init(void) void debug_kmap_atomic(enum km_type type) { - static unsigned warn_count = 10; + static int warn_count = 10; - if (unlikely(warn_count == 0)) + if (unlikely(warn_count < 0)) return; if (unlikely(in_interrupt())) { -- cgit v1.2.1 From d4515646699b6ad7b1a98ceb871296b957f3ef47 Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Date: Wed, 28 Oct 2009 18:56:35 +0100 Subject: highmem: Fix debug_kmap_atomic() to also handle KM_IRQ_PTE, KM_NMI, and KM_NMI_PTE Previously calling debug_kmap_atomic() with these types would cause spurious warnings. (triggered by SysProf using perf events) Signed-off-by: Soeren Sandmann Pedersen Cc: Linus Torvalds Cc: a.p.zijlstra@chello.nl Cc: # .31.x LKML-Reference: Signed-off-by: Ingo Molnar --- mm/highmem.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/highmem.c b/mm/highmem.c index 33587de6b871..9c1e627f282e 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -432,10 +432,15 @@ void debug_kmap_atomic(enum km_type type) return; if (unlikely(in_interrupt())) { - if (in_irq()) { + if (in_nmi()) { + if (type != KM_NMI && type != KM_NMI_PTE) { + WARN_ON(1); + warn_count--; + } + } else if (in_irq()) { if (type != KM_IRQ0 && type != KM_IRQ1 && type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && - type != KM_BOUNCE_READ) { + type != KM_BOUNCE_READ && type != KM_IRQ_PTE) { WARN_ON(1); warn_count--; } @@ -452,7 +457,9 @@ void debug_kmap_atomic(enum km_type type) } if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || - type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { + type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ || + type == KM_IRQ_PTE || type == KM_NMI || + type == KM_NMI_PTE ) { if (!irqs_disabled()) { WARN_ON(1); warn_count--; -- cgit v1.2.1 From c62b17a58ab5e97534ff6487241addd5fcc606de Mon Sep 17 00:00:00 2001 From: Romit Dasgupta Date: Thu, 12 Nov 2009 13:08:11 +0100 Subject: Thaw refrigerated bdi flusher threads before invoking kthread_stop on them Unfreezes the bdi flusher task when the said task needs to exit. Steps to reproduce this. 1) Mount a file system from MMC/SD card. 2) Unmount the file system. This creates a flusher task. 3) Attempt suspend to RAM. System is unresponsive. This is because the bdi flusher thread is already in the refrigerator and will remain so until it is thawed. The MMC driver suspend routine call stack will ultimately issue a 'kthread_stop' on the bdi flusher thread and will block until the flusher thread is exited. Since the bdi flusher thread is in the refrigerator it never cleans up until thawed. Signed-off-by: Romit Dasgupta Signed-off-by: Jens Axboe --- mm/backing-dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 11aee09dd2a6..67a33a5a1a93 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -604,10 +604,14 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) /* * Finally, kill the kernel threads. We don't need to be RCU - * safe anymore, since the bdi is gone from visibility. + * safe anymore, since the bdi is gone from visibility. Force + * unfreeze of the thread before calling kthread_stop(), otherwise + * it would never exet if it is currently stuck in the refrigerator. */ - list_for_each_entry(wb, &bdi->wb_list, list) + list_for_each_entry(wb, &bdi->wb_list, list) { + wb->task->flags &= ~PF_FROZEN; kthread_stop(wb->task); + } } /* -- cgit v1.2.1 From cc4a6851466039a8a688c843962a05689059ff3b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 11 Nov 2009 14:26:14 -0800 Subject: page allocator: always wake kswapd when restarting an allocation attempt after direct reclaim failed If a direct reclaim makes no forward progress, it considers whether it should go OOM or not. Whether OOM is triggered or not, it may retry the allocation afterwards. In times past, this would always wake kswapd as well but currently, kswapd is not woken up after direct reclaim fails. For order-0 allocations, this makes little difference but if there is a heavy mix of higher-order allocations that direct reclaim is failing for, it might mean that kswapd is not rewoken for higher orders as much as it did previously. This patch wakes up kswapd when an allocation is being retried after a direct reclaim failure. It would be expected that kswapd is already awake, but this has the effect of telling kswapd to reclaim at the higher order as well. Signed-off-by: Mel Gorman Reviewed-by: Christoph Lameter Reviewed-by: Pekka Enberg Reviewed-by: KOSAKI Motohiro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cdcedf661616..250d0552a92d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1817,9 +1817,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) goto nopage; +restart: wake_all_kswapd(order, zonelist, high_zoneidx); -restart: /* * OK, we're below the kswapd watermark and have kicked background * reclaim. Now things get more complex, so set up alloc_flags according -- cgit v1.2.1 From 9d0ed60fe9cd1fbf57f755cd27a23ae9114d7210 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 11 Nov 2009 14:26:17 -0800 Subject: page allocator: Do not allow interrupts to use ALLOC_HARDER Commit 341ce06f69abfafa31b9468410a13dbd60e2b237 ("page allocator: calculate the alloc_flags for allocation only once") altered watermark logic slightly by allowing rt_tasks that are handling an interrupt to set ALLOC_HARDER. This patch brings the watermark logic more in line with 2.6.30. This change results in a reduction of the number high-order GFP_ATOMIC allocation failures reported. See http://www.gossamer-threads.com/lists/linux/kernel/1144153 [rientjes@google.com: Spotted the problem] Signed-off-by: Mel Gorman Reviewed-by: Pekka Enberg Reviewed-by: Rik van Riel Reviewed-by: KOSAKI Motohiro Cc: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 250d0552a92d..2bc2ac63f41e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1769,7 +1769,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask) * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ alloc_flags &= ~ALLOC_CPUSET; - } else if (unlikely(rt_task(p))) + } else if (unlikely(rt_task(p)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER; if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { -- cgit v1.2.1 From e00e431612c3a6e437a01f2129fd3843da0c982a Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 11 Nov 2009 14:26:26 -0800 Subject: memcg: fix wrong pointer initialization at page migration when memcg is disabled. Lee Schermerhorn reported that he saw bad pointer dereference in mem_cgroup_end_migration() when he disabled memcg by boot option. memcg's page migration logic works as mem_cgroup_prepare_migration(page, &ptr); do page migration mem_cgroup_end_migration(page, ptr); Now, ptr is not initialized in prepare_migration when memcg is disabled by boot option. This causes panic in end_migration. This patch fixes it. Reported-by: Lee Schermerhorn Cc: Balbir Singh Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Daisuke Nishimura Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/migrate.c b/mm/migrate.c index 1a4bf4813780..7dbcb22316d2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -602,7 +602,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, struct page *newpage = get_new_page(page, private, &result); int rcu_locked = 0; int charge = 0; - struct mem_cgroup *mem; + struct mem_cgroup *mem = NULL; if (!newpage) return -ENOMEM; -- cgit v1.2.1 From 833af8427be4b217b5bc522f61afdbd3f1d282c2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 11 Nov 2009 15:35:18 +0900 Subject: percpu: restructure pcpu_extend_area_map() to fix bugs and improve readability pcpu_extend_area_map() had the following two bugs. * It should return 1 if pcpu_lock was dropped and reacquired but it returned 0. This could lead to oops if free_percpu() races with area map extension. * pcpu_mem_free() was called under pcpu_lock. pcpu_mem_free() might end up calling vfree() which isn't IRQ safe. This could lead to deadlock through lock order inversion via IRQ. In addition, Linus pointed out that the temporary lock dropping and subtle three-way return value of pcpu_extend_area_map() was very ugly and suggested to split the function into two - pcpu_need_to_extend() and pcpu_extend_area_map(). This patch restructures pcpu_extend_area_map() as suggested and fixes the two bugs. Signed-off-by: Tejun Heo Acked-by: Linus Torvalds Cc: Ingo Molnar --- mm/percpu.c | 121 ++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 81 insertions(+), 40 deletions(-) (limited to 'mm') diff --git a/mm/percpu.c b/mm/percpu.c index d90797160c2a..5adfc268b408 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -355,62 +355,86 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) } /** - * pcpu_extend_area_map - extend area map for allocation - * @chunk: target chunk + * pcpu_need_to_extend - determine whether chunk area map needs to be extended + * @chunk: chunk of interest * - * Extend area map of @chunk so that it can accomodate an allocation. - * A single allocation can split an area into three areas, so this - * function makes sure that @chunk->map has at least two extra slots. + * Determine whether area map of @chunk needs to be extended to + * accomodate a new allocation. * * CONTEXT: - * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired - * if area map is extended. + * pcpu_lock. * * RETURNS: - * 0 if noop, 1 if successfully extended, -errno on failure. + * New target map allocation length if extension is necessary, 0 + * otherwise. */ -static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags) +static int pcpu_need_to_extend(struct pcpu_chunk *chunk) { int new_alloc; - int *new; - size_t size; - /* has enough? */ if (chunk->map_alloc >= chunk->map_used + 2) return 0; - spin_unlock_irqrestore(&pcpu_lock, *flags); - new_alloc = PCPU_DFL_MAP_ALLOC; while (new_alloc < chunk->map_used + 2) new_alloc *= 2; - new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); - if (!new) { - spin_lock_irqsave(&pcpu_lock, *flags); + return new_alloc; +} + +/** + * pcpu_extend_area_map - extend area map of a chunk + * @chunk: chunk of interest + * @new_alloc: new target allocation length of the area map + * + * Extend area map of @chunk to have @new_alloc entries. + * + * CONTEXT: + * Does GFP_KERNEL allocation. Grabs and releases pcpu_lock. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) +{ + int *old = NULL, *new = NULL; + size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); + unsigned long flags; + + new = pcpu_mem_alloc(new_size); + if (!new) return -ENOMEM; - } - /* - * Acquire pcpu_lock and switch to new area map. Only free - * could have happened inbetween, so map_used couldn't have - * grown. - */ - spin_lock_irqsave(&pcpu_lock, *flags); - BUG_ON(new_alloc < chunk->map_used + 2); + /* acquire pcpu_lock and switch to new area map */ + spin_lock_irqsave(&pcpu_lock, flags); + + if (new_alloc <= chunk->map_alloc) + goto out_unlock; - size = chunk->map_alloc * sizeof(chunk->map[0]); - memcpy(new, chunk->map, size); + old_size = chunk->map_alloc * sizeof(chunk->map[0]); + memcpy(new, chunk->map, old_size); /* * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is * one of the first chunks and still using static map. */ if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) - pcpu_mem_free(chunk->map, size); + old = chunk->map; chunk->map_alloc = new_alloc; chunk->map = new; + new = NULL; + +out_unlock: + spin_unlock_irqrestore(&pcpu_lock, flags); + + /* + * pcpu_mem_free() might end up calling vfree() which uses + * IRQ-unsafe lock and thus can't be called under pcpu_lock. + */ + pcpu_mem_free(old, old_size); + pcpu_mem_free(new, new_size); + return 0; } @@ -1049,7 +1073,7 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) static int warn_limit = 10; struct pcpu_chunk *chunk; const char *err; - int slot, off; + int slot, off, new_alloc; unsigned long flags; if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { @@ -1064,14 +1088,25 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) /* serve reserved allocations from the reserved chunk if available */ if (reserved && pcpu_reserved_chunk) { chunk = pcpu_reserved_chunk; - if (size > chunk->contig_hint || - pcpu_extend_area_map(chunk, &flags) < 0) { - err = "failed to extend area map of reserved chunk"; + + if (size > chunk->contig_hint) { + err = "alloc from reserved chunk failed"; goto fail_unlock; } + + while ((new_alloc = pcpu_need_to_extend(chunk))) { + spin_unlock_irqrestore(&pcpu_lock, flags); + if (pcpu_extend_area_map(chunk, new_alloc) < 0) { + err = "failed to extend area map of reserved chunk"; + goto fail_unlock_mutex; + } + spin_lock_irqsave(&pcpu_lock, flags); + } + off = pcpu_alloc_area(chunk, size, align); if (off >= 0) goto area_found; + err = "alloc from reserved chunk failed"; goto fail_unlock; } @@ -1083,14 +1118,20 @@ restart: if (size > chunk->contig_hint) continue; - switch (pcpu_extend_area_map(chunk, &flags)) { - case 0: - break; - case 1: - goto restart; /* pcpu_lock dropped, restart */ - default: - err = "failed to extend area map"; - goto fail_unlock; + new_alloc = pcpu_need_to_extend(chunk); + if (new_alloc) { + spin_unlock_irqrestore(&pcpu_lock, flags); + if (pcpu_extend_area_map(chunk, + new_alloc) < 0) { + err = "failed to extend area map"; + goto fail_unlock_mutex; + } + spin_lock_irqsave(&pcpu_lock, flags); + /* + * pcpu_lock has been dropped, need to + * restart cpu_slot list walking. + */ + goto restart; } off = pcpu_alloc_area(chunk, size, align); -- cgit v1.2.1 From e13193319d3a5545c82ed4b724bffd16f87873e3 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 17 Nov 2009 14:06:18 -0800 Subject: mm/memory_hotplug: fix section mismatch With CONFIG_MEMORY_HOTPLUG I got following warning: WARNING: vmlinux.o(.text+0x1276b0): Section mismatch in reference from the function hotadd_new_pgdat() to the function .meminit.text:free_area_init_node() The function hotadd_new_pgdat() references the function __meminit free_area_init_node(). This is often because hotadd_new_pgdat lacks a __meminit annotation or the annotation of free_area_init_node is wrong. Use __ref to fix this. Signed-off-by: Hidetoshi Seto Cc: Minchan Kim Cc: Mel Gorman Cc: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 821dee596377..380aef45c2cf 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -447,7 +447,8 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) } #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ -static pg_data_t *hotadd_new_pgdat(int nid, u64 start) +/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ +static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) { struct pglist_data *pgdat; unsigned long zones_size[MAX_NR_ZONES] = {0}; -- cgit v1.2.1 From 6ad696d2cf535772dff659298ec7e7260e344595 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 17 Nov 2009 14:06:22 -0800 Subject: mm: allow memory hotplug and hibernation in the same kernel Allow memory hotplug and hibernation in the same kernel Memory hotplug and hibernation were exclusive in Kconfig. This is obviously a problem for distribution kernels who want to support both in the same image. After some discussions with Rafael and others the only problem is with parallel memory hotadd or removal while a hibernation operation is in process. It was also working for s390 before. This patch removes the Kconfig level exclusion, and simply makes the memory add / remove functions grab the pm_mutex to exclude against hibernation. Fixes a regression - old kernels didn't exclude memory hotadd and hibernation. Signed-off-by: Andi Kleen Cc: Gerald Schaefer Cc: KOSAKI Motohiro Cc: Yasunori Goto Acked-by: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/Kconfig | 5 +---- mm/memory_hotplug.c | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'mm') diff --git a/mm/Kconfig b/mm/Kconfig index fd3386242cf0..44cf6f0a3a6d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -128,12 +128,9 @@ config SPARSEMEM_VMEMMAP config MEMORY_HOTPLUG bool "Allow for memory hot-add" depends on SPARSEMEM || X86_64_ACPI_NUMA - depends on HOTPLUG && !(HIBERNATION && !S390) && ARCH_ENABLE_MEMORY_HOTPLUG + depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) -comment "Memory hotplug is currently incompatible with Software Suspend" - depends on SPARSEMEM && HOTPLUG && HIBERNATION && !S390 - config MEMORY_HOTPLUG_SPARSE def_bool y depends on SPARSEMEM && MEMORY_HOTPLUG diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 380aef45c2cf..2047465cd27c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -485,14 +486,18 @@ int __ref add_memory(int nid, u64 start, u64 size) struct resource *res; int ret; + lock_system_sleep(); + res = register_memory_resource(start, size); + ret = -EEXIST; if (!res) - return -EEXIST; + goto out; if (!node_online(nid)) { pgdat = hotadd_new_pgdat(nid, start); + ret = -ENOMEM; if (!pgdat) - return -ENOMEM; + goto out; new_pgdat = 1; } @@ -515,7 +520,8 @@ int __ref add_memory(int nid, u64 start, u64 size) BUG_ON(ret); } - return ret; + goto out; + error: /* rollback pgdat allocation and others */ if (new_pgdat) @@ -523,6 +529,8 @@ error: if (res) release_memory_resource(res); +out: + unlock_system_sleep(); return ret; } EXPORT_SYMBOL_GPL(add_memory); @@ -759,6 +767,8 @@ int offline_pages(unsigned long start_pfn, if (!test_pages_in_a_zone(start_pfn, end_pfn)) return -EINVAL; + lock_system_sleep(); + zone = page_zone(pfn_to_page(start_pfn)); node = zone_to_nid(zone); nr_pages = end_pfn - start_pfn; @@ -766,7 +776,7 @@ int offline_pages(unsigned long start_pfn, /* set above range as isolated */ ret = start_isolate_page_range(start_pfn, end_pfn); if (ret) - return ret; + goto out; arg.start_pfn = start_pfn; arg.nr_pages = nr_pages; @@ -844,6 +854,7 @@ repeat: writeback_set_ratelimit(); memory_notify(MEM_OFFLINE, &arg); + unlock_system_sleep(); return 0; failed_removal: @@ -853,6 +864,8 @@ failed_removal: /* pushback to free area */ undo_isolate_page_range(start_pfn, end_pfn); +out: + unlock_system_sleep(); return ret; } -- cgit v1.2.1