summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/vm/slub.rst64
-rw-r--r--MAINTAINERS1
-rw-r--r--include/linux/slab.h15
-rw-r--r--include/linux/slub_def.h1
-rw-r--r--include/linux/stackdepot.h26
-rw-r--r--init/Kconfig1
-rw-r--r--lib/Kconfig.debug1
-rw-r--r--lib/slub_kunit.c10
-rw-r--r--lib/stackdepot.c67
-rw-r--r--mm/page_owner.c9
-rw-r--r--mm/slab.c29
-rw-r--r--mm/slab.h5
-rw-r--r--mm/slab_common.c23
-rw-r--r--mm/slub.c174
14 files changed, 283 insertions, 143 deletions
diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst
index d3028554b1e9..43063ade737a 100644
--- a/Documentation/vm/slub.rst
+++ b/Documentation/vm/slub.rst
@@ -384,5 +384,69 @@ c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the
40,60`` range will plot only samples collected between 40th and
60th seconds).
+
+DebugFS files for SLUB
+======================
+
+For more information about current state of SLUB caches with the user tracking
+debug option enabled, debugfs files are available, typically under
+/sys/kernel/debug/slab/<cache>/ (created only for caches with enabled user
+tracking). There are 2 types of these files with the following debug
+information:
+
+1. alloc_traces::
+
+ Prints information about unique allocation traces of the currently
+ allocated objects. The output is sorted by frequency of each trace.
+
+ Information in the output:
+ Number of objects, allocating function, minimal/average/maximal jiffies since alloc,
+ pid range of the allocating processes, cpu mask of allocating cpus, and stack trace.
+
+ Example:::
+
+ 1085 populate_error_injection_list+0x97/0x110 age=166678/166680/166682 pid=1 cpus=1::
+ __slab_alloc+0x6d/0x90
+ kmem_cache_alloc_trace+0x2eb/0x300
+ populate_error_injection_list+0x97/0x110
+ init_error_injection+0x1b/0x71
+ do_one_initcall+0x5f/0x2d0
+ kernel_init_freeable+0x26f/0x2d7
+ kernel_init+0xe/0x118
+ ret_from_fork+0x22/0x30
+
+
+2. free_traces::
+
+ Prints information about unique freeing traces of the currently allocated
+ objects. The freeing traces thus come from the previous life-cycle of the
+ objects and are reported as not available for objects allocated for the first
+ time. The output is sorted by frequency of each trace.
+
+ Information in the output:
+ Number of objects, freeing function, minimal/average/maximal jiffies since free,
+ pid range of the freeing processes, cpu mask of freeing cpus, and stack trace.
+
+ Example:::
+
+ 1980 <not-available> age=4294912290 pid=0 cpus=0
+ 51 acpi_ut_update_ref_count+0x6a6/0x782 age=236886/237027/237772 pid=1 cpus=1
+ kfree+0x2db/0x420
+ acpi_ut_update_ref_count+0x6a6/0x782
+ acpi_ut_update_object_reference+0x1ad/0x234
+ acpi_ut_remove_reference+0x7d/0x84
+ acpi_rs_get_prt_method_data+0x97/0xd6
+ acpi_get_irq_routing_table+0x82/0xc4
+ acpi_pci_irq_find_prt_entry+0x8e/0x2e0
+ acpi_pci_irq_lookup+0x3a/0x1e0
+ acpi_pci_irq_enable+0x77/0x240
+ pcibios_enable_device+0x39/0x40
+ do_pci_enable_device.part.0+0x5d/0xe0
+ pci_enable_device_flags+0xfc/0x120
+ pci_enable_device+0x13/0x20
+ virtio_pci_probe+0x9e/0x170
+ local_pci_probe+0x48/0x80
+ pci_device_probe+0x105/0x1c0
+
Christoph Lameter, May 30, 2007
Sergey Senozhatsky, October 23, 2015
diff --git a/MAINTAINERS b/MAINTAINERS
index 6618e9b91b6c..7af43b32e06d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18163,6 +18163,7 @@ M: Joonsoo Kim <iamjoonsoo.kim@lge.com>
M: Andrew Morton <akpm@linux-foundation.org>
M: Vlastimil Babka <vbabka@suse.cz>
R: Roman Gushchin <roman.gushchin@linux.dev>
+R: Hyeonggon Yoo <42.hyeyoo@gmail.com>
L: linux-mm@kvack.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 373b3ef99f4e..58bb9392775d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -112,6 +112,13 @@
#define SLAB_KASAN 0
#endif
+/*
+ * Ignore user specified debugging flags.
+ * Intended for caches created for self-tests so they have only flags
+ * specified in the code and other flags are ignored.
+ */
+#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U)
+
/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U)
@@ -190,7 +197,7 @@ void kmem_dump_obj(void *object);
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
- * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
+ * Setting ARCH_DMA_MINALIGN in arch headers allows that.
*/
#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
@@ -210,9 +217,9 @@ void kmem_dump_obj(void *object);
#endif
/*
- * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
- * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
- * aligned pointers.
+ * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN.
+ * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN
+ * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment.
*/
#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 33c5c0e3bd8d..f9c68a9dac04 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -105,7 +105,6 @@ struct kmem_cache {
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
- struct kmem_cache_order_objects max;
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags to use on each alloc */
int refcount; /* Refcount for slab cache destroy */
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 17f992fe6355..bc2797955de9 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -20,18 +20,36 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
gfp_t gfp_flags, bool can_alloc);
/*
- * Every user of stack depot has to call this during its own init when it's
- * decided that it will be calling stack_depot_save() later.
+ * Every user of stack depot has to call stack_depot_init() during its own init
+ * when it's decided that it will be calling stack_depot_save() later. This is
+ * recommended for e.g. modules initialized later in the boot process, when
+ * slab_is_available() is true.
*
* The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot
* enabled as part of mm_init(), for subsystems where it's known at compile time
* that stack depot will be used.
+ *
+ * Another alternative is to call stack_depot_want_early_init(), when the
+ * decision to use stack depot is taken e.g. when evaluating kernel boot
+ * parameters, which precedes the enablement point in mm_init().
+ *
+ * stack_depot_init() and stack_depot_want_early_init() can be called regardless
+ * of CONFIG_STACKDEPOT and are no-op when disabled. The actual save/fetch/print
+ * functions should only be called from code that makes sure CONFIG_STACKDEPOT
+ * is enabled.
*/
+#ifdef CONFIG_STACKDEPOT
int stack_depot_init(void);
-#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT
-static inline int stack_depot_early_init(void) { return stack_depot_init(); }
+void __init stack_depot_want_early_init(void);
+
+/* This is supposed to be called only from mm_init() */
+int __init stack_depot_early_init(void);
#else
+static inline int stack_depot_init(void) { return 0; }
+
+static inline void stack_depot_want_early_init(void) { }
+
static inline int stack_depot_early_init(void) { return 0; }
#endif
diff --git a/init/Kconfig b/init/Kconfig
index ddcbefe535e9..adc57f989d87 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1875,6 +1875,7 @@ config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS
+ select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
result in significant savings in code size. This also disables
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a30d5279efda..388fb808ff18 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -710,6 +710,7 @@ config DEBUG_SLAB
config SLUB_DEBUG_ON
bool "SLUB debugging on by default"
depends on SLUB && SLUB_DEBUG
+ select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT
default n
help
Boot with debugging on by default. SLUB boots by default with
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index 8662dc6cb509..7a0564d7cb7a 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -12,7 +12,7 @@ static int slab_errors;
static void test_clobber_zone(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
- SLAB_RED_ZONE, NULL);
+ SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kasan_disable_current();
@@ -30,7 +30,7 @@ static void test_clobber_zone(struct kunit *test)
static void test_next_pointer(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
- SLAB_POISON, NULL);
+ SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
unsigned long tmp;
unsigned long *ptr_addr;
@@ -75,7 +75,7 @@ static void test_next_pointer(struct kunit *test)
static void test_first_word(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
- SLAB_POISON, NULL);
+ SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_free(s, p);
@@ -90,7 +90,7 @@ static void test_first_word(struct kunit *test)
static void test_clobber_50th_byte(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
- SLAB_POISON, NULL);
+ SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_free(s, p);
@@ -106,7 +106,7 @@ static void test_clobber_50th_byte(struct kunit *test)
static void test_clobber_redzone_free(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
- SLAB_RED_ZONE, NULL);
+ SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kasan_disable_current();
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index bf5ba9af0500..5ca0d086ef4a 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -66,6 +66,9 @@ struct stack_record {
unsigned long entries[]; /* Variable-sized array of entries. */
};
+static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
+static bool __stack_depot_early_init_passed __initdata;
+
static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
static int depot_index;
@@ -162,38 +165,58 @@ static int __init is_stack_depot_disabled(char *str)
}
early_param("stack_depot_disable", is_stack_depot_disabled);
-/*
- * __ref because of memblock_alloc(), which will not be actually called after
- * the __init code is gone, because at that point slab_is_available() is true
- */
-__ref int stack_depot_init(void)
+void __init stack_depot_want_early_init(void)
+{
+ /* Too late to request early init now */
+ WARN_ON(__stack_depot_early_init_passed);
+
+ __stack_depot_want_early_init = true;
+}
+
+int __init stack_depot_early_init(void)
+{
+ size_t size;
+
+ /* This is supposed to be called only once, from mm_init() */
+ if (WARN_ON(__stack_depot_early_init_passed))
+ return 0;
+
+ __stack_depot_early_init_passed = true;
+
+ if (!__stack_depot_want_early_init || stack_depot_disable)
+ return 0;
+
+ size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
+ pr_info("Stack Depot early init allocating hash table with memblock_alloc, %zu bytes\n",
+ size);
+ stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
+
+ if (!stack_table) {
+ pr_err("Stack Depot hash table allocation failed, disabling\n");
+ stack_depot_disable = true;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int stack_depot_init(void)
{
static DEFINE_MUTEX(stack_depot_init_mutex);
+ int ret = 0;
mutex_lock(&stack_depot_init_mutex);
if (!stack_depot_disable && !stack_table) {
- size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
- int i;
-
- if (slab_is_available()) {
- pr_info("Stack Depot allocating hash table with kvmalloc\n");
- stack_table = kvmalloc(size, GFP_KERNEL);
- } else {
- pr_info("Stack Depot allocating hash table with memblock_alloc\n");
- stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
- }
- if (stack_table) {
- for (i = 0; i < STACK_HASH_SIZE; i++)
- stack_table[i] = NULL;
- } else {
+ pr_info("Stack Depot allocating hash table with kvcalloc\n");
+ stack_table = kvcalloc(STACK_HASH_SIZE, sizeof(struct stack_record *), GFP_KERNEL);
+ if (!stack_table) {
pr_err("Stack Depot hash table allocation failed, disabling\n");
stack_depot_disable = true;
- mutex_unlock(&stack_depot_init_mutex);
- return -ENOMEM;
+ ret = -ENOMEM;
}
}
mutex_unlock(&stack_depot_init_mutex);
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(stack_depot_init);
diff --git a/mm/page_owner.c b/mm/page_owner.c
index fb3a05fdebdb..2743062e92c2 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -45,7 +45,12 @@ static void init_early_allocated_pages(void);
static int __init early_page_owner_param(char *buf)
{
- return kstrtobool(buf, &page_owner_enabled);
+ int ret = kstrtobool(buf, &page_owner_enabled);
+
+ if (page_owner_enabled)
+ stack_depot_want_early_init();
+
+ return ret;
}
early_param("page_owner", early_page_owner_param);
@@ -83,8 +88,6 @@ static __init void init_page_owner(void)
if (!page_owner_enabled)
return;
- stack_depot_init();
-
register_dummy_stack();
register_failure_stack();
register_early_stack();
diff --git a/mm/slab.c b/mm/slab.c
index 0edb474edef1..a301f266efd1 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -619,18 +619,6 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
return 0;
}
-static inline void *alternate_node_alloc(struct kmem_cache *cachep,
- gfp_t flags)
-{
- return NULL;
-}
-
-static inline void *____cache_alloc_node(struct kmem_cache *cachep,
- gfp_t flags, int nodeid)
-{
- return NULL;
-}
-
static inline gfp_t gfp_exact_node(gfp_t flags)
{
return flags & ~__GFP_NOFAIL;
@@ -638,9 +626,6 @@ static inline gfp_t gfp_exact_node(gfp_t flags)
#else /* CONFIG_NUMA */
-static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
-static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
-
static struct alien_cache *__alloc_alien_cache(int node, int entries,
int batch, gfp_t gfp)
{
@@ -796,7 +781,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
int slab_node = slab_nid(virt_to_slab(objp));
int node = numa_mem_id();
/*
- * Make sure we are not freeing a object from another node to the array
+ * Make sure we are not freeing an object from another node to the array
* cache on this cpu.
*/
if (likely(node == slab_node))
@@ -847,7 +832,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
/*
* The kmem_cache_nodes don't come and go as CPUs
- * come and go. slab_mutex is sufficient
+ * come and go. slab_mutex provides sufficient
* protection here.
*/
cachep->node[node] = n;
@@ -860,7 +845,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
* Allocates and initializes node for a node on each slab cache, used for
* either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
* will be allocated off-node since memory is not yet online for the new node.
- * When hotplugging memory or a cpu, existing node are not replaced if
+ * When hotplugging memory or a cpu, existing nodes are not replaced if
* already in use.
*
* Must hold slab_mutex.
@@ -1061,7 +1046,7 @@ int slab_prepare_cpu(unsigned int cpu)
* offline.
*
* Even if all the cpus of a node are down, we don't free the
- * kmem_cache_node of any cache. This to avoid a race between cpu_down, and
+ * kmem_cache_node of any cache. This is to avoid a race between cpu_down, and
* a kmalloc allocation from another cpu for memory from the node of
* the cpu going down. The kmem_cache_node structure is usually allocated from
* kmem_cache_create() and gets destroyed at kmem_cache_destroy().
@@ -1905,7 +1890,7 @@ static bool set_on_slab_cache(struct kmem_cache *cachep,
* @flags: SLAB flags
*
* Returns a ptr to the cache on success, NULL on failure.
- * Cannot be called within a int, but can be interrupted.
+ * Cannot be called within an int, but can be interrupted.
* The @ctor is run when new pages are allocated by the cache.
*
* The flags are
@@ -3056,6 +3041,8 @@ out:
}
#ifdef CONFIG_NUMA
+static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
+
/*
* Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
*
@@ -3151,7 +3138,7 @@ retry:
}
/*
- * A interface to enable slab creation on nodeid
+ * An interface to enable slab creation on nodeid
*/
static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
int nodeid)
diff --git a/mm/slab.h b/mm/slab.h
index 95eb34174c1b..db9fb5c8dae7 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -331,7 +331,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
SLAB_ACCOUNT)
#elif defined(CONFIG_SLUB)
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
- SLAB_TEMPORARY | SLAB_ACCOUNT)
+ SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_NO_USER_FLAGS)
#else
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
#endif
@@ -350,7 +350,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
SLAB_NOLEAKTRACE | \
SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | \
- SLAB_ACCOUNT)
+ SLAB_ACCOUNT | \
+ SLAB_NO_USER_FLAGS)
bool __kmem_cache_empty(struct kmem_cache *);
int __kmem_cache_shutdown(struct kmem_cache *);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2b3206a2c3b5..d1f3133847ad 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -24,6 +24,7 @@
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <linux/memcontrol.h>
+#include <linux/stackdepot.h>
#define CREATE_TRACE_POINTS
#include <trace/events/kmem.h>
@@ -314,9 +315,13 @@ kmem_cache_create_usercopy(const char *name,
* If no slub_debug was enabled globally, the static key is not yet
* enabled by setup_slub_debug(). Enable it if the cache is being
* created with any of the debugging flags passed explicitly.
+ * It's also possible that this is the first cache created with
+ * SLAB_STORE_USER and we should init stack_depot for it.
*/
if (flags & SLAB_DEBUG_FLAGS)
static_branch_enable(&slub_debug_enabled);
+ if (flags & SLAB_STORE_USER)
+ stack_depot_init();
#endif
mutex_lock(&slab_mutex);
@@ -858,6 +863,8 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
return;
}
flags |= SLAB_ACCOUNT;
+ } else if (IS_ENABLED(CONFIG_ZONE_DMA) && (type == KMALLOC_DMA)) {
+ flags |= SLAB_CACHE_DMA;
}
kmalloc_caches[type][idx] = create_kmalloc_cache(
@@ -886,7 +893,7 @@ void __init create_kmalloc_caches(slab_flags_t flags)
/*
* Including KMALLOC_CGROUP if CONFIG_MEMCG_KMEM defined
*/
- for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
+ for (type = KMALLOC_NORMAL; type < NR_KMALLOC_TYPES; type++) {
for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
if (!kmalloc_caches[type][i])
new_kmalloc_cache(i, type, flags);
@@ -907,20 +914,6 @@ void __init create_kmalloc_caches(slab_flags_t flags)
/* Kmalloc array is now usable */
slab_state = UP;
-
-#ifdef CONFIG_ZONE_DMA
- for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
- struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
-
- if (s) {
- kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
- kmalloc_info[i].name[KMALLOC_DMA],
- kmalloc_info[i].size,
- SLAB_CACHE_DMA | flags, 0,
- kmalloc_info[i].size);
- }
- }
-#endif
}
#endif /* !CONFIG_SLOB */
diff --git a/mm/slub.c b/mm/slub.c
index ed5c2c03a47a..e5535020e0fd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -26,6 +26,7 @@
#include <linux/cpuset.h>
#include <linux/mempolicy.h>
#include <linux/ctype.h>
+#include <linux/stackdepot.h>
#include <linux/debugobjects.h>
#include <linux/kallsyms.h>
#include <linux/kfence.h>
@@ -37,6 +38,7 @@
#include <linux/memcontrol.h>
#include <linux/random.h>
#include <kunit/test.h>
+#include <linux/sort.h>
#include <linux/debugfs.h>
#include <trace/events/kmem.h>
@@ -264,8 +266,8 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
#define TRACK_ADDRS_COUNT 16
struct track {
unsigned long addr; /* Called from address */
-#ifdef CONFIG_STACKTRACE
- unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
+#ifdef CONFIG_STACKDEPOT
+ depot_stack_handle_t handle;
#endif
int cpu; /* Was running on cpu */
int pid; /* Pid context */
@@ -724,57 +726,51 @@ static struct track *get_track(struct kmem_cache *s, void *object,
return kasan_reset_tag(p + alloc);
}
-static void set_track(struct kmem_cache *s, void *object,
+static void noinline set_track(struct kmem_cache *s, void *object,
enum track_item alloc, unsigned long addr)
{
struct track *p = get_track(s, object, alloc);
- if (addr) {
-#ifdef CONFIG_STACKTRACE
- unsigned int nr_entries;
-
- metadata_access_enable();
- nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
- TRACK_ADDRS_COUNT, 3);
- metadata_access_disable();
+#ifdef CONFIG_STACKDEPOT
+ unsigned long entries[TRACK_ADDRS_COUNT];
+ unsigned int nr_entries;
- if (nr_entries < TRACK_ADDRS_COUNT)
- p->addrs[nr_entries] = 0;
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
+ p->handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
#endif
- p->addr = addr;
- p->cpu = smp_processor_id();
- p->pid = current->pid;
- p->when = jiffies;
- } else {
- memset(p, 0, sizeof(struct track));
- }
+
+ p->addr = addr;
+ p->cpu = smp_processor_id();
+ p->pid = current->pid;
+ p->when = jiffies;
}
static void init_tracking(struct kmem_cache *s, void *object)
{
+ struct track *p;
+
if (!(s->flags & SLAB_STORE_USER))
return;
- set_track(s, object, TRACK_FREE, 0UL);
- set_track(s, object, TRACK_ALLOC, 0UL);
+ p = get_track(s, object, TRACK_ALLOC);
+ memset(p, 0, 2*sizeof(struct track));
}
static void print_track(const char *s, struct track *t, unsigned long pr_time)
{
+ depot_stack_handle_t handle __maybe_unused;
+
if (!t->addr)
return;
pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
-#ifdef CONFIG_STACKTRACE
- {
- int i;
- for (i = 0; i < TRACK_ADDRS_COUNT; i++)
- if (t->addrs[i])
- pr_err("\t%pS\n", (void *)t->addrs[i]);
- else
- break;
- }
+#ifdef CONFIG_STACKDEPOT
+ handle = READ_ONCE(t->handle);
+ if (handle)
+ stack_depot_print(handle);
+ else
+ pr_err("object allocation/free stack trace missing\n");
#endif
}
@@ -1021,7 +1017,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
}
/* Check the pad bytes at the end of a slab page */
-static int slab_pad_check(struct kmem_cache *s, struct slab *slab)
+static void slab_pad_check(struct kmem_cache *s, struct slab *slab)
{
u8 *start;
u8 *fault;
@@ -1031,21 +1027,21 @@ static int slab_pad_check(struct kmem_cache *s, struct slab *slab)
int remainder;
if (!(s->flags & SLAB_POISON))
- return 1;
+ return;
start = slab_address(slab);
length = slab_size(slab);
end = start + length;
remainder = length % s->size;
if (!remainder)
- return 1;
+ return;
pad = end - remainder;
metadata_access_enable();
fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
metadata_access_disable();
if (!fault)
- return 1;
+ return;
while (end > fault && end[-1] == POISON_INUSE)
end--;
@@ -1054,7 +1050,6 @@ static int slab_pad_check(struct kmem_cache *s, struct slab *slab)
print_section(KERN_ERR, "Padding ", pad, remainder);
restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
- return 0;
}
static int check_object(struct kmem_cache *s, struct slab *slab,
@@ -1268,8 +1263,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
}
/* Object debug checks for alloc/free paths */
-static void setup_object_debug(struct kmem_cache *s, struct slab *slab,
- void *object)
+static void setup_object_debug(struct kmem_cache *s, void *object)
{
if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
return;
@@ -1534,6 +1528,8 @@ static int __init setup_slub_debug(char *str)
global_slub_debug_changed = true;
} else {
slab_list_specified = true;
+ if (flags & SLAB_STORE_USER)
+ stack_depot_want_early_init();
}
}
@@ -1551,6 +1547,8 @@ static int __init setup_slub_debug(char *str)
}
out:
slub_debug = global_flags;
+ if (slub_debug & SLAB_STORE_USER)
+ stack_depot_want_early_init();
if (slub_debug != 0 || slub_debug_string)
static_branch_enable(&slub_debug_enabled);
else
@@ -1584,6 +1582,9 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
slab_flags_t block_flags;
slab_flags_t slub_debug_local = slub_debug;
+ if (flags & SLAB_NO_USER_FLAGS)
+ return flags;
+
/*
* If the slab cache is for debugging (e.g. kmemleak) then
* don't store user (stack trace) information by default,
@@ -1628,8 +1629,7 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
return flags | slub_debug_local;
}
#else /* !CONFIG_SLUB_DEBUG */
-static inline void setup_object_debug(struct kmem_cache *s,
- struct slab *slab, void *object) {}
+static inline void setup_object_debug(struct kmem_cache *s, void *object) {}
static inline
void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
@@ -1641,8 +1641,7 @@ static inline int free_debug_processing(
void *head, void *tail, int bulk_cnt,
unsigned long addr) { return 0; }
-static inline int slab_pad_check(struct kmem_cache *s, struct slab *slab)
- { return 1; }
+static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
static inline int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val) { return 1; }
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
@@ -1772,10 +1771,9 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
return *head != NULL;
}
-static void *setup_object(struct kmem_cache *s, struct slab *slab,
- void *object)
+static void *setup_object(struct kmem_cache *s, void *object)
{
- setup_object_debug(s, slab, object);
+ setup_object_debug(s, object);
object = kasan_init_slab_obj(s, object);
if (unlikely(s->ctor)) {
kasan_unpoison_object_data(s, object);
@@ -1894,13 +1892,13 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
/* First entry is used as the base of the freelist */
cur = next_freelist_entry(s, slab, &pos, start, page_limit,
freelist_count);
- cur = setup_object(s, slab, cur);
+ cur = setup_object(s, cur);
slab->freelist = cur;
for (idx = 1; idx < slab->objects; idx++) {
next = next_freelist_entry(s, slab, &pos, start, page_limit,
freelist_count);
- next = setup_object(s, slab, next);
+ next = setup_object(s, next);
set_freepointer(s, cur, next);
cur = next;
}
@@ -1939,7 +1937,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
*/
alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
- alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
+ alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_RECLAIM;
slab = alloc_slab_page(alloc_gfp, node, oo);
if (unlikely(!slab)) {
@@ -1971,11 +1969,11 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
if (!shuffle) {
start = fixup_red_left(s, start);
- start = setup_object(s, slab, start);
+ start = setup_object(s, start);
slab->freelist = start;
for (idx = 0, p = start; idx < slab->objects - 1; idx++) {
next = p + s->size;
- next = setup_object(s, slab, next);
+ next = setup_object(s, next);
set_freepointer(s, p, next);
p = next;
}
@@ -2910,7 +2908,6 @@ redo:
*/
if (!node_isset(node, slab_nodes)) {
node = NUMA_NO_NODE;
- goto redo;
} else {
stat(s, ALLOC_NODE_MISMATCH);
goto deactivate_slab;
@@ -4165,8 +4162,6 @@ static int calculate_sizes(struct kmem_cache *s)
*/
s->oo = oo_make(order, size);
s->min = oo_make(get_order(size), size);
- if (oo_objects(s->oo) > oo_objects(s->max))
- s->max = s->oo;
return !!oo_objects(s->oo);
}
@@ -4344,18 +4339,26 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
objp = fixup_red_left(s, objp);
trackp = get_track(s, objp, TRACK_ALLOC);
kpp->kp_ret = (void *)trackp->addr;
-#ifdef CONFIG_STACKTRACE
- for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
- kpp->kp_stack[i] = (void *)trackp->addrs[i];
- if (!kpp->kp_stack[i])
- break;
- }
+#ifdef CONFIG_STACKDEPOT
+ {
+ depot_stack_handle_t handle;
+ unsigned long *entries;
+ unsigned int nr_entries;
- trackp = get_track(s, objp, TRACK_FREE);
- for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
- kpp->kp_free_stack[i] = (void *)trackp->addrs[i];
- if (!kpp->kp_free_stack[i])
- break;
+ handle = READ_ONCE(trackp->handle);
+ if (handle) {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++)
+ kpp->kp_stack[i] = (void *)entries[i];
+ }
+
+ trackp = get_track(s, objp, TRACK_FREE);
+ handle = READ_ONCE(trackp->handle);
+ if (handle) {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++)
+ kpp->kp_free_stack[i] = (void *)entries[i];
+ }
}
#endif
#endif
@@ -5057,6 +5060,7 @@ EXPORT_SYMBOL(validate_slab_cache);
*/
struct location {
+ depot_stack_handle_t handle;
unsigned long count;
unsigned long addr;
long long sum_time;
@@ -5109,9 +5113,13 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
{
long start, end, pos;
struct location *l;
- unsigned long caddr;
+ unsigned long caddr, chandle;
unsigned long age = jiffies - track->when;
+ depot_stack_handle_t handle = 0;
+#ifdef CONFIG_STACKDEPOT
+ handle = READ_ONCE(track->handle);
+#endif
start = -1;
end = t->count;
@@ -5126,7 +5134,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
break;
caddr = t->loc[pos].addr;
- if (track->addr == caddr) {
+ chandle = t->loc[pos].handle;
+ if ((track->addr == caddr) && (handle == chandle)) {
l = &t->loc[pos];
l->count++;
@@ -5151,6 +5160,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
if (track->addr < caddr)
end = pos;
+ else if (track->addr == caddr && handle < chandle)
+ end = pos;
else
start = pos;
}
@@ -5173,6 +5184,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
l->max_time = age;
l->min_pid = track->pid;
l->max_pid = track->pid;
+ l->handle = handle;
cpumask_clear(to_cpumask(l->cpus));
cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
nodes_clear(l->nodes);
@@ -6082,6 +6094,21 @@ static int slab_debugfs_show(struct seq_file *seq, void *v)
seq_printf(seq, " nodes=%*pbl",
nodemask_pr_args(&l->nodes));
+#ifdef CONFIG_STACKDEPOT
+ {
+ depot_stack_handle_t handle;
+ unsigned long *entries;
+ unsigned int nr_entries, j;
+
+ handle = READ_ONCE(l->handle);
+ if (handle) {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ seq_puts(seq, "\n");
+ for (j = 0; j < nr_entries; j++)
+ seq_printf(seq, " %pS\n", (void *)entries[j]);
+ }
+ }
+#endif
seq_puts(seq, "\n");
}
@@ -6106,6 +6133,17 @@ static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
return NULL;
}
+static int cmp_loc_by_count(const void *a, const void *b, const void *data)
+{
+ struct location *loc1 = (struct location *)a;
+ struct location *loc2 = (struct location *)b;
+
+ if (loc1->count > loc2->count)
+ return -1;
+ else
+ return 1;
+}
+
static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
{
struct loc_track *t = seq->private;
@@ -6167,6 +6205,10 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
spin_unlock_irqrestore(&n->list_lock, flags);
}
+ /* Sort locations by count */
+ sort_r(t->loc, t->count, sizeof(struct location),
+ cmp_loc_by_count, NULL, NULL);
+
bitmap_free(obj_map);
return 0;
}