summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRashmica Gupta <rashmica.g@gmail.com>2018-08-10 13:38:36 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2018-08-15 18:16:55 +1000
commitc7c01e575d75a722edf8f33bac6c61eca3ab3086 (patch)
treebc3cdd4883b9dd97a2df937a5818c45b92b487fa
parentbe51039216fdd84cf6a623df985c06a02f7c020c (diff)
downloadlinux-next-c7c01e575d75a722edf8f33bac6c61eca3ab3086.tar.gz
resource: merge resources on a node when hot-adding memory
When hot-removing memory release_mem_region_adjustable() splits iomem resources if they are not the exact size of the memory being hot-deleted. Adding this memory back to the kernel adds a new resource. Eg a node has memory 0x0 - 0xfffffffff. Offlining and hot-removing 1GB from 0xf40000000 results in the single resource 0x0-0xfffffffff being split into two resources: 0x0-0xf3fffffff and 0xf80000000-0xfffffffff. When we hot-add the memory back we now have three resources: 0x0-0xf3fffffff, 0xf40000000-0xf7fffffff, and 0xf80000000-0xfffffffff. Now if we try to remove some memory that overlaps these resources, like 2GB from 0xf40000000, release_mem_region_adjustable() fails as it expects the chunk of memory to be within the boundaries of a single resource. This patch adds a function request_resource_and_merge(). This is called instead of request_resource_conflict() when registering a resource in add_memory(). It calls request_resource_conflict() and if hot-removing is enabled (if it isn't we won't get resource fragmentation) we attempt to merge contiguous resources on the node. Link: http://lkml.kernel.org/r/20180809025409.31552-1-rashmica.g@gmail.com Signed-off-by: Rashmica Gupta <rashmica.g@gmail.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Toshi Kani <toshi.kani@hpe.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Borislav Petkov <bp@suse.de> Cc: Brijesh Singh <brijesh.singh@amd.com> Cc: Tom Lendacky <thomas.lendacky@amd.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Yaowei Bai <baiyaowei@cmss.chinamobile.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mathieu Malaterre <malat@debian.org> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: <yasu.isimatu@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
-rw-r--r--drivers/xen/balloon.c3
-rw-r--r--include/linux/ioport.h2
-rw-r--r--include/linux/memory_hotplug.h2
-rw-r--r--kernel/resource.c120
-rw-r--r--mm/memory_hotplug.c20
5 files changed, 135 insertions, 12 deletions
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index e12bb256036f..559e77a20a4d 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -395,7 +395,8 @@ static enum bp_state reserve_additional_memory(void)
* callers drop the mutex before trying again.
*/
mutex_unlock(&balloon_mutex);
- rc = add_memory_resource(nid, resource, memhp_auto_online);
+ rc = add_memory_resource(nid, resource->start, resource_size(resource),
+ memhp_auto_online);
mutex_lock(&balloon_mutex);
if (rc) {
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index da0ebaec25f0..f5b93a711e86 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -189,6 +189,8 @@ extern int allocate_resource(struct resource *root, struct resource *new,
resource_size_t,
resource_size_t),
void *alignf_data);
+extern struct resource *request_resource_and_merge(struct resource *parent,
+ struct resource *new, int nid);
struct resource *lookup_resource(struct resource *root, resource_size_t start);
int adjust_resource(struct resource *res, resource_size_t start,
resource_size_t size);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 34a28227068d..233f066a2cb6 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -323,7 +323,7 @@ extern void __ref free_area_init_core_hotplug(int nid);
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size);
-extern int add_memory_resource(int nid, struct resource *resource, bool online);
+extern int add_memory_resource(int nid, u64 start, u64 size, bool online);
extern int arch_add_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap, bool want_memblock);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
diff --git a/kernel/resource.c b/kernel/resource.c
index 30e1bc68503b..a31d3f5bccb7 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1621,3 +1621,123 @@ static int __init strict_iomem(char *str)
}
__setup("iomem=", strict_iomem);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+#ifdef CONFIG_MEMORY_HOTREMOVE
+/*
+ * Attempt to merge resource and it's sibling
+ */
+static int merge_resources(struct resource *res)
+{
+ struct resource *next;
+ struct resource *tmp;
+ uint64_t size;
+ int ret = -EINVAL;
+
+ next = res->sibling;
+
+ /*
+ * Not sure how to handle two different children. So only attempt
+ * to merge two resources if neither have children, only one has a
+ * child or if both have the same child.
+ */
+ if ((res->child && next->child) && (res->child != next->child))
+ return ret;
+
+ if (res->end + 1 != next->start)
+ return ret;
+
+ if (res->flags != next->flags)
+ return ret;
+
+ /* Update sibling and child of resource */
+ res->sibling = next->sibling;
+ tmp = res->child;
+ if (!res->child)
+ res->child = next->child;
+
+ size = next->end - res->start + 1;
+ ret = __adjust_resource(res, res->start, size);
+ if (ret) {
+ /* Failed so restore resource to original state */
+ res->sibling = next;
+ res->child = tmp;
+ return ret;
+ }
+
+ free_resource(next);
+
+ return ret;
+}
+
+/*
+ * Attempt to merge resources on the node
+ */
+static void merge_node_resources(int nid, struct resource *parent)
+{
+ struct resource *res;
+ uint64_t start_addr;
+ uint64_t end_addr;
+ int ret;
+
+ start_addr = node_start_pfn(nid) << PAGE_SHIFT;
+ end_addr = node_end_pfn(nid) << PAGE_SHIFT;
+
+ write_lock(&resource_lock);
+
+ /* Get the first resource */
+ res = parent->child;
+
+ while (res) {
+ /* Check that the resource is within the node */
+ if (res->start < start_addr) {
+ res = res->sibling;
+ continue;
+ }
+ /* Exit if sibling resource is past end of node */
+ if (res->sibling->end >= end_addr)
+ break;
+
+ ret = merge_resources(res);
+ if (!ret)
+ continue;
+ res = res->sibling;
+ }
+ write_unlock(&resource_lock);
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
+/**
+ * request_resource_and_merge() - request an I/O or memory resource for hot-add
+ * @parent: parent resource descriptor
+ * @new: resource descriptor desired by caller
+ * @nid: node id of the node we want the resource on
+ *
+ * If no conflict resource then attempt to merge resources on the node.
+ *
+ * This is intended to cleanup the fragmentation of resources that occurs when
+ * hot-removing memory (see release_mem_region_adjustable). If hot-removing is
+ * not enabled then there is no point trying to merge resources.
+ *
+ * Note that the inability to merge resources is not an error.
+ *
+ * Return: NULL for successful request of resource and conflict resource if
+ * there was a conflict.
+ */
+struct resource *request_resource_and_merge(struct resource *parent,
+ struct resource *new, int nid)
+{
+ struct resource *conflict;
+
+ conflict = request_resource_conflict(parent, new);
+
+ if (conflict)
+ return conflict;
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+ merge_node_resources(nid, parent);
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
+ return NULL;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9eea6e809a4e..5d50057e7716 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -97,7 +97,7 @@ void mem_hotplug_done(void)
}
/* add this memory to iomem resource */
-static struct resource *register_memory_resource(u64 start, u64 size)
+static struct resource *register_memory_resource(int nid, u64 start, u64 size)
{
struct resource *res, *conflict;
res = kzalloc(sizeof(struct resource), GFP_KERNEL);
@@ -108,7 +108,7 @@ static struct resource *register_memory_resource(u64 start, u64 size)
res->start = start;
res->end = start + size - 1;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
- conflict = request_resource_conflict(&iomem_resource, res);
+ conflict = request_resource_and_merge(&iomem_resource, res, nid);
if (conflict) {
if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
pr_debug("Device unaddressable memory block "
@@ -122,11 +122,15 @@ static struct resource *register_memory_resource(u64 start, u64 size)
return res;
}
-static void release_memory_resource(struct resource *res)
+static void release_memory_resource(struct resource *res, u64 start, u64 size)
{
if (!res)
return;
+#ifdef CONFIG_MEMORY_HOTREMOVE
+ release_mem_region_adjustable(&iomem_resource, start, size);
+#else
release_resource(res);
+#endif
kfree(res);
return;
}
@@ -1114,13 +1118,9 @@ static int online_memory_block(struct memory_block *mem, void *arg)
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
int __ref add_memory_resource(int nid, struct resource *res, bool online)
{
- u64 start, size;
bool new_node = false;
int ret;
- start = res->start;
- size = resource_size(res);
-
ret = check_hotplug_memory_range(start, size);
if (ret)
return ret;
@@ -1187,13 +1187,13 @@ int __ref add_memory(int nid, u64 start, u64 size)
struct resource *res;
int ret;
- res = register_memory_resource(start, size);
+ res = register_memory_resource(nid, start, size);
if (IS_ERR(res))
return PTR_ERR(res);
- ret = add_memory_resource(nid, res, memhp_auto_online);
+ ret = add_memory_resource(nid, start, size, memhp_auto_online);
if (ret < 0)
- release_memory_resource(res);
+ release_memory_resource(res, start, size);
return ret;
}
EXPORT_SYMBOL_GPL(add_memory);