diff options
author | Rashmica Gupta <rashmica.g@gmail.com> | 2018-08-10 13:38:36 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2018-08-15 18:16:55 +1000 |
commit | c7c01e575d75a722edf8f33bac6c61eca3ab3086 (patch) | |
tree | bc3cdd4883b9dd97a2df937a5818c45b92b487fa | |
parent | be51039216fdd84cf6a623df985c06a02f7c020c (diff) | |
download | linux-next-c7c01e575d75a722edf8f33bac6c61eca3ab3086.tar.gz |
resource: merge resources on a node when hot-adding memory
When hot-removing memory release_mem_region_adjustable() splits
iomem resources if they are not the exact size of the memory being
hot-deleted. Adding this memory back to the kernel adds a new
resource.
Eg a node has memory 0x0 - 0xfffffffff. Offlining and hot-removing
1GB from 0xf40000000 results in the single resource 0x0-0xfffffffff being
split into two resources: 0x0-0xf3fffffff and 0xf80000000-0xfffffffff.
When we hot-add the memory back we now have three resources:
0x0-0xf3fffffff, 0xf40000000-0xf7fffffff, and 0xf80000000-0xfffffffff.
Now if we try to remove some memory that overlaps these resources,
like 2GB from 0xf40000000, release_mem_region_adjustable() fails as it
expects the chunk of memory to be within the boundaries of a single
resource.
This patch adds a function request_resource_and_merge(). This is called
instead of request_resource_conflict() when registering a resource in
add_memory(). It calls request_resource_conflict() and if hot-removing is
enabled (if it isn't we won't get resource fragmentation) we attempt to
merge contiguous resources on the node.
Link: http://lkml.kernel.org/r/20180809025409.31552-1-rashmica.g@gmail.com
Signed-off-by: Rashmica Gupta <rashmica.g@gmail.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: <yasu.isimatu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
-rw-r--r-- | drivers/xen/balloon.c | 3 | ||||
-rw-r--r-- | include/linux/ioport.h | 2 | ||||
-rw-r--r-- | include/linux/memory_hotplug.h | 2 | ||||
-rw-r--r-- | kernel/resource.c | 120 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 20 |
5 files changed, 135 insertions, 12 deletions
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index e12bb256036f..559e77a20a4d 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -395,7 +395,8 @@ static enum bp_state reserve_additional_memory(void) * callers drop the mutex before trying again. */ mutex_unlock(&balloon_mutex); - rc = add_memory_resource(nid, resource, memhp_auto_online); + rc = add_memory_resource(nid, resource->start, resource_size(resource), + memhp_auto_online); mutex_lock(&balloon_mutex); if (rc) { diff --git a/include/linux/ioport.h b/include/linux/ioport.h index da0ebaec25f0..f5b93a711e86 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -189,6 +189,8 @@ extern int allocate_resource(struct resource *root, struct resource *new, resource_size_t, resource_size_t), void *alignf_data); +extern struct resource *request_resource_and_merge(struct resource *parent, + struct resource *new, int nid); struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 34a28227068d..233f066a2cb6 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -323,7 +323,7 @@ extern void __ref free_area_init_core_hotplug(int nid); extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); -extern int add_memory_resource(int nid, struct resource *resource, bool online); +extern int add_memory_resource(int nid, u64 start, u64 size, bool online); extern int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, diff --git a/kernel/resource.c b/kernel/resource.c index 30e1bc68503b..a31d3f5bccb7 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1621,3 +1621,123 @@ static int __init strict_iomem(char *str) } __setup("iomem=", strict_iomem); + +#ifdef CONFIG_MEMORY_HOTPLUG +#ifdef CONFIG_MEMORY_HOTREMOVE +/* + * Attempt to merge resource and it's sibling + */ +static int merge_resources(struct resource *res) +{ + struct resource *next; + struct resource *tmp; + uint64_t size; + int ret = -EINVAL; + + next = res->sibling; + + /* + * Not sure how to handle two different children. So only attempt + * to merge two resources if neither have children, only one has a + * child or if both have the same child. + */ + if ((res->child && next->child) && (res->child != next->child)) + return ret; + + if (res->end + 1 != next->start) + return ret; + + if (res->flags != next->flags) + return ret; + + /* Update sibling and child of resource */ + res->sibling = next->sibling; + tmp = res->child; + if (!res->child) + res->child = next->child; + + size = next->end - res->start + 1; + ret = __adjust_resource(res, res->start, size); + if (ret) { + /* Failed so restore resource to original state */ + res->sibling = next; + res->child = tmp; + return ret; + } + + free_resource(next); + + return ret; +} + +/* + * Attempt to merge resources on the node + */ +static void merge_node_resources(int nid, struct resource *parent) +{ + struct resource *res; + uint64_t start_addr; + uint64_t end_addr; + int ret; + + start_addr = node_start_pfn(nid) << PAGE_SHIFT; + end_addr = node_end_pfn(nid) << PAGE_SHIFT; + + write_lock(&resource_lock); + + /* Get the first resource */ + res = parent->child; + + while (res) { + /* Check that the resource is within the node */ + if (res->start < start_addr) { + res = res->sibling; + continue; + } + /* Exit if sibling resource is past end of node */ + if (res->sibling->end >= end_addr) + break; + + ret = merge_resources(res); + if (!ret) + continue; + res = res->sibling; + } + write_unlock(&resource_lock); +} +#endif /* CONFIG_MEMORY_HOTREMOVE */ + +/** + * request_resource_and_merge() - request an I/O or memory resource for hot-add + * @parent: parent resource descriptor + * @new: resource descriptor desired by caller + * @nid: node id of the node we want the resource on + * + * If no conflict resource then attempt to merge resources on the node. + * + * This is intended to cleanup the fragmentation of resources that occurs when + * hot-removing memory (see release_mem_region_adjustable). If hot-removing is + * not enabled then there is no point trying to merge resources. + * + * Note that the inability to merge resources is not an error. + * + * Return: NULL for successful request of resource and conflict resource if + * there was a conflict. + */ +struct resource *request_resource_and_merge(struct resource *parent, + struct resource *new, int nid) +{ + struct resource *conflict; + + conflict = request_resource_conflict(parent, new); + + if (conflict) + return conflict; + +#ifdef CONFIG_MEMORY_HOTREMOVE + merge_node_resources(nid, parent); +#endif /* CONFIG_MEMORY_HOTREMOVE */ + + return NULL; +} +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9eea6e809a4e..5d50057e7716 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -97,7 +97,7 @@ void mem_hotplug_done(void) } /* add this memory to iomem resource */ -static struct resource *register_memory_resource(u64 start, u64 size) +static struct resource *register_memory_resource(int nid, u64 start, u64 size) { struct resource *res, *conflict; res = kzalloc(sizeof(struct resource), GFP_KERNEL); @@ -108,7 +108,7 @@ static struct resource *register_memory_resource(u64 start, u64 size) res->start = start; res->end = start + size - 1; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - conflict = request_resource_conflict(&iomem_resource, res); + conflict = request_resource_and_merge(&iomem_resource, res, nid); if (conflict) { if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) { pr_debug("Device unaddressable memory block " @@ -122,11 +122,15 @@ static struct resource *register_memory_resource(u64 start, u64 size) return res; } -static void release_memory_resource(struct resource *res) +static void release_memory_resource(struct resource *res, u64 start, u64 size) { if (!res) return; +#ifdef CONFIG_MEMORY_HOTREMOVE + release_mem_region_adjustable(&iomem_resource, start, size); +#else release_resource(res); +#endif kfree(res); return; } @@ -1114,13 +1118,9 @@ static int online_memory_block(struct memory_block *mem, void *arg) /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ int __ref add_memory_resource(int nid, struct resource *res, bool online) { - u64 start, size; bool new_node = false; int ret; - start = res->start; - size = resource_size(res); - ret = check_hotplug_memory_range(start, size); if (ret) return ret; @@ -1187,13 +1187,13 @@ int __ref add_memory(int nid, u64 start, u64 size) struct resource *res; int ret; - res = register_memory_resource(start, size); + res = register_memory_resource(nid, start, size); if (IS_ERR(res)) return PTR_ERR(res); - ret = add_memory_resource(nid, res, memhp_auto_online); + ret = add_memory_resource(nid, start, size, memhp_auto_online); if (ret < 0) - release_memory_resource(res); + release_memory_resource(res, start, size); return ret; } EXPORT_SYMBOL_GPL(add_memory); |