diff options
-rw-r--r-- | Documentation/vm/slub.rst | 64 | ||||
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | include/linux/slab.h | 15 | ||||
-rw-r--r-- | include/linux/slub_def.h | 1 | ||||
-rw-r--r-- | include/linux/stackdepot.h | 26 | ||||
-rw-r--r-- | init/Kconfig | 1 | ||||
-rw-r--r-- | lib/Kconfig.debug | 1 | ||||
-rw-r--r-- | lib/slub_kunit.c | 10 | ||||
-rw-r--r-- | lib/stackdepot.c | 67 | ||||
-rw-r--r-- | mm/page_owner.c | 9 | ||||
-rw-r--r-- | mm/slab.c | 29 | ||||
-rw-r--r-- | mm/slab.h | 5 | ||||
-rw-r--r-- | mm/slab_common.c | 23 | ||||
-rw-r--r-- | mm/slub.c | 174 |
14 files changed, 283 insertions, 143 deletions
diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst index d3028554b1e9..43063ade737a 100644 --- a/Documentation/vm/slub.rst +++ b/Documentation/vm/slub.rst @@ -384,5 +384,69 @@ c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the 40,60`` range will plot only samples collected between 40th and 60th seconds). + +DebugFS files for SLUB +====================== + +For more information about current state of SLUB caches with the user tracking +debug option enabled, debugfs files are available, typically under +/sys/kernel/debug/slab/<cache>/ (created only for caches with enabled user +tracking). There are 2 types of these files with the following debug +information: + +1. alloc_traces:: + + Prints information about unique allocation traces of the currently + allocated objects. The output is sorted by frequency of each trace. + + Information in the output: + Number of objects, allocating function, minimal/average/maximal jiffies since alloc, + pid range of the allocating processes, cpu mask of allocating cpus, and stack trace. + + Example::: + + 1085 populate_error_injection_list+0x97/0x110 age=166678/166680/166682 pid=1 cpus=1:: + __slab_alloc+0x6d/0x90 + kmem_cache_alloc_trace+0x2eb/0x300 + populate_error_injection_list+0x97/0x110 + init_error_injection+0x1b/0x71 + do_one_initcall+0x5f/0x2d0 + kernel_init_freeable+0x26f/0x2d7 + kernel_init+0xe/0x118 + ret_from_fork+0x22/0x30 + + +2. free_traces:: + + Prints information about unique freeing traces of the currently allocated + objects. The freeing traces thus come from the previous life-cycle of the + objects and are reported as not available for objects allocated for the first + time. The output is sorted by frequency of each trace. + + Information in the output: + Number of objects, freeing function, minimal/average/maximal jiffies since free, + pid range of the freeing processes, cpu mask of freeing cpus, and stack trace. + + Example::: + + 1980 <not-available> age=4294912290 pid=0 cpus=0 + 51 acpi_ut_update_ref_count+0x6a6/0x782 age=236886/237027/237772 pid=1 cpus=1 + kfree+0x2db/0x420 + acpi_ut_update_ref_count+0x6a6/0x782 + acpi_ut_update_object_reference+0x1ad/0x234 + acpi_ut_remove_reference+0x7d/0x84 + acpi_rs_get_prt_method_data+0x97/0xd6 + acpi_get_irq_routing_table+0x82/0xc4 + acpi_pci_irq_find_prt_entry+0x8e/0x2e0 + acpi_pci_irq_lookup+0x3a/0x1e0 + acpi_pci_irq_enable+0x77/0x240 + pcibios_enable_device+0x39/0x40 + do_pci_enable_device.part.0+0x5d/0xe0 + pci_enable_device_flags+0xfc/0x120 + pci_enable_device+0x13/0x20 + virtio_pci_probe+0x9e/0x170 + local_pci_probe+0x48/0x80 + pci_device_probe+0x105/0x1c0 + Christoph Lameter, May 30, 2007 Sergey Senozhatsky, October 23, 2015 diff --git a/MAINTAINERS b/MAINTAINERS index 6618e9b91b6c..7af43b32e06d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18163,6 +18163,7 @@ M: Joonsoo Kim <iamjoonsoo.kim@lge.com> M: Andrew Morton <akpm@linux-foundation.org> M: Vlastimil Babka <vbabka@suse.cz> R: Roman Gushchin <roman.gushchin@linux.dev> +R: Hyeonggon Yoo <42.hyeyoo@gmail.com> L: linux-mm@kvack.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git diff --git a/include/linux/slab.h b/include/linux/slab.h index 373b3ef99f4e..58bb9392775d 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -112,6 +112,13 @@ #define SLAB_KASAN 0 #endif +/* + * Ignore user specified debugging flags. + * Intended for caches created for self-tests so they have only flags + * specified in the code and other flags are ignored. + */ +#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U) + /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) @@ -190,7 +197,7 @@ void kmem_dump_obj(void *object); /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. - * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that. + * Setting ARCH_DMA_MINALIGN in arch headers allows that. */ #if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN @@ -210,9 +217,9 @@ void kmem_dump_obj(void *object); #endif /* - * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned - * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN - * aligned pointers. + * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN. + * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN + * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment. */ #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) #define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 33c5c0e3bd8d..f9c68a9dac04 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -105,7 +105,6 @@ struct kmem_cache { struct kmem_cache_order_objects oo; /* Allocation and freeing of slabs */ - struct kmem_cache_order_objects max; struct kmem_cache_order_objects min; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 17f992fe6355..bc2797955de9 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -20,18 +20,36 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, gfp_t gfp_flags, bool can_alloc); /* - * Every user of stack depot has to call this during its own init when it's - * decided that it will be calling stack_depot_save() later. + * Every user of stack depot has to call stack_depot_init() during its own init + * when it's decided that it will be calling stack_depot_save() later. This is + * recommended for e.g. modules initialized later in the boot process, when + * slab_is_available() is true. * * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot * enabled as part of mm_init(), for subsystems where it's known at compile time * that stack depot will be used. + * + * Another alternative is to call stack_depot_want_early_init(), when the + * decision to use stack depot is taken e.g. when evaluating kernel boot + * parameters, which precedes the enablement point in mm_init(). + * + * stack_depot_init() and stack_depot_want_early_init() can be called regardless + * of CONFIG_STACKDEPOT and are no-op when disabled. The actual save/fetch/print + * functions should only be called from code that makes sure CONFIG_STACKDEPOT + * is enabled. */ +#ifdef CONFIG_STACKDEPOT int stack_depot_init(void); -#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT -static inline int stack_depot_early_init(void) { return stack_depot_init(); } +void __init stack_depot_want_early_init(void); + +/* This is supposed to be called only from mm_init() */ +int __init stack_depot_early_init(void); #else +static inline int stack_depot_init(void) { return 0; } + +static inline void stack_depot_want_early_init(void) { } + static inline int stack_depot_early_init(void) { return 0; } #endif diff --git a/init/Kconfig b/init/Kconfig index ddcbefe535e9..adc57f989d87 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1875,6 +1875,7 @@ config SLUB_DEBUG default y bool "Enable SLUB debugging support" if EXPERT depends on SLUB && SYSFS + select STACKDEPOT if STACKTRACE_SUPPORT help SLUB has extensive debug support features. Disabling these can result in significant savings in code size. This also disables diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a30d5279efda..388fb808ff18 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -710,6 +710,7 @@ config DEBUG_SLAB config SLUB_DEBUG_ON bool "SLUB debugging on by default" depends on SLUB && SLUB_DEBUG + select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT default n help Boot with debugging on by default. SLUB boots by default with diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c index 8662dc6cb509..7a0564d7cb7a 100644 --- a/lib/slub_kunit.c +++ b/lib/slub_kunit.c @@ -12,7 +12,7 @@ static int slab_errors; static void test_clobber_zone(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0, - SLAB_RED_ZONE, NULL); + SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kasan_disable_current(); @@ -30,7 +30,7 @@ static void test_clobber_zone(struct kunit *test) static void test_next_pointer(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0, - SLAB_POISON, NULL); + SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); unsigned long tmp; unsigned long *ptr_addr; @@ -75,7 +75,7 @@ static void test_next_pointer(struct kunit *test) static void test_first_word(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0, - SLAB_POISON, NULL); + SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kmem_cache_free(s, p); @@ -90,7 +90,7 @@ static void test_first_word(struct kunit *test) static void test_clobber_50th_byte(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0, - SLAB_POISON, NULL); + SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kmem_cache_free(s, p); @@ -106,7 +106,7 @@ static void test_clobber_50th_byte(struct kunit *test) static void test_clobber_redzone_free(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0, - SLAB_RED_ZONE, NULL); + SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kasan_disable_current(); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index bf5ba9af0500..5ca0d086ef4a 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -66,6 +66,9 @@ struct stack_record { unsigned long entries[]; /* Variable-sized array of entries. */ }; +static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); +static bool __stack_depot_early_init_passed __initdata; + static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; static int depot_index; @@ -162,38 +165,58 @@ static int __init is_stack_depot_disabled(char *str) } early_param("stack_depot_disable", is_stack_depot_disabled); -/* - * __ref because of memblock_alloc(), which will not be actually called after - * the __init code is gone, because at that point slab_is_available() is true - */ -__ref int stack_depot_init(void) +void __init stack_depot_want_early_init(void) +{ + /* Too late to request early init now */ + WARN_ON(__stack_depot_early_init_passed); + + __stack_depot_want_early_init = true; +} + +int __init stack_depot_early_init(void) +{ + size_t size; + + /* This is supposed to be called only once, from mm_init() */ + if (WARN_ON(__stack_depot_early_init_passed)) + return 0; + + __stack_depot_early_init_passed = true; + + if (!__stack_depot_want_early_init || stack_depot_disable) + return 0; + + size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); + pr_info("Stack Depot early init allocating hash table with memblock_alloc, %zu bytes\n", + size); + stack_table = memblock_alloc(size, SMP_CACHE_BYTES); + + if (!stack_table) { + pr_err("Stack Depot hash table allocation failed, disabling\n"); + stack_depot_disable = true; + return -ENOMEM; + } + + return 0; +} + +int stack_depot_init(void) { static DEFINE_MUTEX(stack_depot_init_mutex); + int ret = 0; mutex_lock(&stack_depot_init_mutex); if (!stack_depot_disable && !stack_table) { - size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); - int i; - - if (slab_is_available()) { - pr_info("Stack Depot allocating hash table with kvmalloc\n"); - stack_table = kvmalloc(size, GFP_KERNEL); - } else { - pr_info("Stack Depot allocating hash table with memblock_alloc\n"); - stack_table = memblock_alloc(size, SMP_CACHE_BYTES); - } - if (stack_table) { - for (i = 0; i < STACK_HASH_SIZE; i++) - stack_table[i] = NULL; - } else { + pr_info("Stack Depot allocating hash table with kvcalloc\n"); + stack_table = kvcalloc(STACK_HASH_SIZE, sizeof(struct stack_record *), GFP_KERNEL); + if (!stack_table) { pr_err("Stack Depot hash table allocation failed, disabling\n"); stack_depot_disable = true; - mutex_unlock(&stack_depot_init_mutex); - return -ENOMEM; + ret = -ENOMEM; } } mutex_unlock(&stack_depot_init_mutex); - return 0; + return ret; } EXPORT_SYMBOL_GPL(stack_depot_init); diff --git a/mm/page_owner.c b/mm/page_owner.c index fb3a05fdebdb..2743062e92c2 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -45,7 +45,12 @@ static void init_early_allocated_pages(void); static int __init early_page_owner_param(char *buf) { - return kstrtobool(buf, &page_owner_enabled); + int ret = kstrtobool(buf, &page_owner_enabled); + + if (page_owner_enabled) + stack_depot_want_early_init(); + + return ret; } early_param("page_owner", early_page_owner_param); @@ -83,8 +88,6 @@ static __init void init_page_owner(void) if (!page_owner_enabled) return; - stack_depot_init(); - register_dummy_stack(); register_failure_stack(); register_early_stack(); diff --git a/mm/slab.c b/mm/slab.c index 0edb474edef1..a301f266efd1 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -619,18 +619,6 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) return 0; } -static inline void *alternate_node_alloc(struct kmem_cache *cachep, - gfp_t flags) -{ - return NULL; -} - -static inline void *____cache_alloc_node(struct kmem_cache *cachep, - gfp_t flags, int nodeid) -{ - return NULL; -} - static inline gfp_t gfp_exact_node(gfp_t flags) { return flags & ~__GFP_NOFAIL; @@ -638,9 +626,6 @@ static inline gfp_t gfp_exact_node(gfp_t flags) #else /* CONFIG_NUMA */ -static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); -static void *alternate_node_alloc(struct kmem_cache *, gfp_t); - static struct alien_cache *__alloc_alien_cache(int node, int entries, int batch, gfp_t gfp) { @@ -796,7 +781,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) int slab_node = slab_nid(virt_to_slab(objp)); int node = numa_mem_id(); /* - * Make sure we are not freeing a object from another node to the array + * Make sure we are not freeing an object from another node to the array * cache on this cpu. */ if (likely(node == slab_node)) @@ -847,7 +832,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp) /* * The kmem_cache_nodes don't come and go as CPUs - * come and go. slab_mutex is sufficient + * come and go. slab_mutex provides sufficient * protection here. */ cachep->node[node] = n; @@ -860,7 +845,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp) * Allocates and initializes node for a node on each slab cache, used for * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node * will be allocated off-node since memory is not yet online for the new node. - * When hotplugging memory or a cpu, existing node are not replaced if + * When hotplugging memory or a cpu, existing nodes are not replaced if * already in use. * * Must hold slab_mutex. @@ -1061,7 +1046,7 @@ int slab_prepare_cpu(unsigned int cpu) * offline. * * Even if all the cpus of a node are down, we don't free the - * kmem_cache_node of any cache. This to avoid a race between cpu_down, and + * kmem_cache_node of any cache. This is to avoid a race between cpu_down, and * a kmalloc allocation from another cpu for memory from the node of * the cpu going down. The kmem_cache_node structure is usually allocated from * kmem_cache_create() and gets destroyed at kmem_cache_destroy(). @@ -1905,7 +1890,7 @@ static bool set_on_slab_cache(struct kmem_cache *cachep, * @flags: SLAB flags * * Returns a ptr to the cache on success, NULL on failure. - * Cannot be called within a int, but can be interrupted. + * Cannot be called within an int, but can be interrupted. * The @ctor is run when new pages are allocated by the cache. * * The flags are @@ -3056,6 +3041,8 @@ out: } #ifdef CONFIG_NUMA +static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); + /* * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set. * @@ -3151,7 +3138,7 @@ retry: } /* - * A interface to enable slab creation on nodeid + * An interface to enable slab creation on nodeid */ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) diff --git a/mm/slab.h b/mm/slab.h index 95eb34174c1b..db9fb5c8dae7 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -331,7 +331,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, SLAB_ACCOUNT) #elif defined(CONFIG_SLUB) #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ - SLAB_TEMPORARY | SLAB_ACCOUNT) + SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_NO_USER_FLAGS) #else #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE) #endif @@ -350,7 +350,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, SLAB_NOLEAKTRACE | \ SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | \ - SLAB_ACCOUNT) + SLAB_ACCOUNT | \ + SLAB_NO_USER_FLAGS) bool __kmem_cache_empty(struct kmem_cache *); int __kmem_cache_shutdown(struct kmem_cache *); diff --git a/mm/slab_common.c b/mm/slab_common.c index 2b3206a2c3b5..d1f3133847ad 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -24,6 +24,7 @@ #include <asm/tlbflush.h> #include <asm/page.h> #include <linux/memcontrol.h> +#include <linux/stackdepot.h> #define CREATE_TRACE_POINTS #include <trace/events/kmem.h> @@ -314,9 +315,13 @@ kmem_cache_create_usercopy(const char *name, * If no slub_debug was enabled globally, the static key is not yet * enabled by setup_slub_debug(). Enable it if the cache is being * created with any of the debugging flags passed explicitly. + * It's also possible that this is the first cache created with + * SLAB_STORE_USER and we should init stack_depot for it. */ if (flags & SLAB_DEBUG_FLAGS) static_branch_enable(&slub_debug_enabled); + if (flags & SLAB_STORE_USER) + stack_depot_init(); #endif mutex_lock(&slab_mutex); @@ -858,6 +863,8 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags) return; } flags |= SLAB_ACCOUNT; + } else if (IS_ENABLED(CONFIG_ZONE_DMA) && (type == KMALLOC_DMA)) { + flags |= SLAB_CACHE_DMA; } kmalloc_caches[type][idx] = create_kmalloc_cache( @@ -886,7 +893,7 @@ void __init create_kmalloc_caches(slab_flags_t flags) /* * Including KMALLOC_CGROUP if CONFIG_MEMCG_KMEM defined */ - for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) { + for (type = KMALLOC_NORMAL; type < NR_KMALLOC_TYPES; type++) { for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { if (!kmalloc_caches[type][i]) new_kmalloc_cache(i, type, flags); @@ -907,20 +914,6 @@ void __init create_kmalloc_caches(slab_flags_t flags) /* Kmalloc array is now usable */ slab_state = UP; - -#ifdef CONFIG_ZONE_DMA - for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { - struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i]; - - if (s) { - kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache( - kmalloc_info[i].name[KMALLOC_DMA], - kmalloc_info[i].size, - SLAB_CACHE_DMA | flags, 0, - kmalloc_info[i].size); - } - } -#endif } #endif /* !CONFIG_SLOB */ diff --git a/mm/slub.c b/mm/slub.c index ed5c2c03a47a..e5535020e0fd 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -26,6 +26,7 @@ #include <linux/cpuset.h> #include <linux/mempolicy.h> #include <linux/ctype.h> +#include <linux/stackdepot.h> #include <linux/debugobjects.h> #include <linux/kallsyms.h> #include <linux/kfence.h> @@ -37,6 +38,7 @@ #include <linux/memcontrol.h> #include <linux/random.h> #include <kunit/test.h> +#include <linux/sort.h> #include <linux/debugfs.h> #include <trace/events/kmem.h> @@ -264,8 +266,8 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) #define TRACK_ADDRS_COUNT 16 struct track { unsigned long addr; /* Called from address */ -#ifdef CONFIG_STACKTRACE - unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ +#ifdef CONFIG_STACKDEPOT + depot_stack_handle_t handle; #endif int cpu; /* Was running on cpu */ int pid; /* Pid context */ @@ -724,57 +726,51 @@ static struct track *get_track(struct kmem_cache *s, void *object, return kasan_reset_tag(p + alloc); } -static void set_track(struct kmem_cache *s, void *object, +static void noinline set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr) { struct track *p = get_track(s, object, alloc); - if (addr) { -#ifdef CONFIG_STACKTRACE - unsigned int nr_entries; - - metadata_access_enable(); - nr_entries = stack_trace_save(kasan_reset_tag(p->addrs), - TRACK_ADDRS_COUNT, 3); - metadata_access_disable(); +#ifdef CONFIG_STACKDEPOT + unsigned long entries[TRACK_ADDRS_COUNT]; + unsigned int nr_entries; - if (nr_entries < TRACK_ADDRS_COUNT) - p->addrs[nr_entries] = 0; + nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3); + p->handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT); #endif - p->addr = addr; - p->cpu = smp_processor_id(); - p->pid = current->pid; - p->when = jiffies; - } else { - memset(p, 0, sizeof(struct track)); - } + + p->addr = addr; + p->cpu = smp_processor_id(); + p->pid = current->pid; + p->when = jiffies; } static void init_tracking(struct kmem_cache *s, void *object) { + struct track *p; + if (!(s->flags & SLAB_STORE_USER)) return; - set_track(s, object, TRACK_FREE, 0UL); - set_track(s, object, TRACK_ALLOC, 0UL); + p = get_track(s, object, TRACK_ALLOC); + memset(p, 0, 2*sizeof(struct track)); } static void print_track(const char *s, struct track *t, unsigned long pr_time) { + depot_stack_handle_t handle __maybe_unused; + if (!t->addr) return; pr_err("%s in %pS age=%lu cpu=%u pid=%d\n", s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid); -#ifdef CONFIG_STACKTRACE - { - int i; - for (i = 0; i < TRACK_ADDRS_COUNT; i++) - if (t->addrs[i]) - pr_err("\t%pS\n", (void *)t->addrs[i]); - else - break; - } +#ifdef CONFIG_STACKDEPOT + handle = READ_ONCE(t->handle); + if (handle) + stack_depot_print(handle); + else + pr_err("object allocation/free stack trace missing\n"); #endif } @@ -1021,7 +1017,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) } /* Check the pad bytes at the end of a slab page */ -static int slab_pad_check(struct kmem_cache *s, struct slab *slab) +static void slab_pad_check(struct kmem_cache *s, struct slab *slab) { u8 *start; u8 *fault; @@ -1031,21 +1027,21 @@ static int slab_pad_check(struct kmem_cache *s, struct slab *slab) int remainder; if (!(s->flags & SLAB_POISON)) - return 1; + return; start = slab_address(slab); length = slab_size(slab); end = start + length; remainder = length % s->size; if (!remainder) - return 1; + return; pad = end - remainder; metadata_access_enable(); fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder); metadata_access_disable(); if (!fault) - return 1; + return; while (end > fault && end[-1] == POISON_INUSE) end--; @@ -1054,7 +1050,6 @@ static int slab_pad_check(struct kmem_cache *s, struct slab *slab) print_section(KERN_ERR, "Padding ", pad, remainder); restore_bytes(s, "slab padding", POISON_INUSE, fault, end); - return 0; } static int check_object(struct kmem_cache *s, struct slab *slab, @@ -1268,8 +1263,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) } /* Object debug checks for alloc/free paths */ -static void setup_object_debug(struct kmem_cache *s, struct slab *slab, - void *object) +static void setup_object_debug(struct kmem_cache *s, void *object) { if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)) return; @@ -1534,6 +1528,8 @@ static int __init setup_slub_debug(char *str) global_slub_debug_changed = true; } else { slab_list_specified = true; + if (flags & SLAB_STORE_USER) + stack_depot_want_early_init(); } } @@ -1551,6 +1547,8 @@ static int __init setup_slub_debug(char *str) } out: slub_debug = global_flags; + if (slub_debug & SLAB_STORE_USER) + stack_depot_want_early_init(); if (slub_debug != 0 || slub_debug_string) static_branch_enable(&slub_debug_enabled); else @@ -1584,6 +1582,9 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, slab_flags_t block_flags; slab_flags_t slub_debug_local = slub_debug; + if (flags & SLAB_NO_USER_FLAGS) + return flags; + /* * If the slab cache is for debugging (e.g. kmemleak) then * don't store user (stack trace) information by default, @@ -1628,8 +1629,7 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, return flags | slub_debug_local; } #else /* !CONFIG_SLUB_DEBUG */ -static inline void setup_object_debug(struct kmem_cache *s, - struct slab *slab, void *object) {} +static inline void setup_object_debug(struct kmem_cache *s, void *object) {} static inline void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {} @@ -1641,8 +1641,7 @@ static inline int free_debug_processing( void *head, void *tail, int bulk_cnt, unsigned long addr) { return 0; } -static inline int slab_pad_check(struct kmem_cache *s, struct slab *slab) - { return 1; } +static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {} static inline int check_object(struct kmem_cache *s, struct slab *slab, void *object, u8 val) { return 1; } static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, @@ -1772,10 +1771,9 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, return *head != NULL; } -static void *setup_object(struct kmem_cache *s, struct slab *slab, - void *object) +static void *setup_object(struct kmem_cache *s, void *object) { - setup_object_debug(s, slab, object); + setup_object_debug(s, object); object = kasan_init_slab_obj(s, object); if (unlikely(s->ctor)) { kasan_unpoison_object_data(s, object); @@ -1894,13 +1892,13 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) /* First entry is used as the base of the freelist */ cur = next_freelist_entry(s, slab, &pos, start, page_limit, freelist_count); - cur = setup_object(s, slab, cur); + cur = setup_object(s, cur); slab->freelist = cur; for (idx = 1; idx < slab->objects; idx++) { next = next_freelist_entry(s, slab, &pos, start, page_limit, freelist_count); - next = setup_object(s, slab, next); + next = setup_object(s, next); set_freepointer(s, cur, next); cur = next; } @@ -1939,7 +1937,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) */ alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min)) - alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL); + alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_RECLAIM; slab = alloc_slab_page(alloc_gfp, node, oo); if (unlikely(!slab)) { @@ -1971,11 +1969,11 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (!shuffle) { start = fixup_red_left(s, start); - start = setup_object(s, slab, start); + start = setup_object(s, start); slab->freelist = start; for (idx = 0, p = start; idx < slab->objects - 1; idx++) { next = p + s->size; - next = setup_object(s, slab, next); + next = setup_object(s, next); set_freepointer(s, p, next); p = next; } @@ -2910,7 +2908,6 @@ redo: */ if (!node_isset(node, slab_nodes)) { node = NUMA_NO_NODE; - goto redo; } else { stat(s, ALLOC_NODE_MISMATCH); goto deactivate_slab; @@ -4165,8 +4162,6 @@ static int calculate_sizes(struct kmem_cache *s) */ s->oo = oo_make(order, size); s->min = oo_make(get_order(size), size); - if (oo_objects(s->oo) > oo_objects(s->max)) - s->max = s->oo; return !!oo_objects(s->oo); } @@ -4344,18 +4339,26 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) objp = fixup_red_left(s, objp); trackp = get_track(s, objp, TRACK_ALLOC); kpp->kp_ret = (void *)trackp->addr; -#ifdef CONFIG_STACKTRACE - for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) { - kpp->kp_stack[i] = (void *)trackp->addrs[i]; - if (!kpp->kp_stack[i]) - break; - } +#ifdef CONFIG_STACKDEPOT + { + depot_stack_handle_t handle; + unsigned long *entries; + unsigned int nr_entries; - trackp = get_track(s, objp, TRACK_FREE); - for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) { - kpp->kp_free_stack[i] = (void *)trackp->addrs[i]; - if (!kpp->kp_free_stack[i]) - break; + handle = READ_ONCE(trackp->handle); + if (handle) { + nr_entries = stack_depot_fetch(handle, &entries); + for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++) + kpp->kp_stack[i] = (void *)entries[i]; + } + + trackp = get_track(s, objp, TRACK_FREE); + handle = READ_ONCE(trackp->handle); + if (handle) { + nr_entries = stack_depot_fetch(handle, &entries); + for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++) + kpp->kp_free_stack[i] = (void *)entries[i]; + } } #endif #endif @@ -5057,6 +5060,7 @@ EXPORT_SYMBOL(validate_slab_cache); */ struct location { + depot_stack_handle_t handle; unsigned long count; unsigned long addr; long long sum_time; @@ -5109,9 +5113,13 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, { long start, end, pos; struct location *l; - unsigned long caddr; + unsigned long caddr, chandle; unsigned long age = jiffies - track->when; + depot_stack_handle_t handle = 0; +#ifdef CONFIG_STACKDEPOT + handle = READ_ONCE(track->handle); +#endif start = -1; end = t->count; @@ -5126,7 +5134,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, break; caddr = t->loc[pos].addr; - if (track->addr == caddr) { + chandle = t->loc[pos].handle; + if ((track->addr == caddr) && (handle == chandle)) { l = &t->loc[pos]; l->count++; @@ -5151,6 +5160,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, if (track->addr < caddr) end = pos; + else if (track->addr == caddr && handle < chandle) + end = pos; else start = pos; } @@ -5173,6 +5184,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, l->max_time = age; l->min_pid = track->pid; l->max_pid = track->pid; + l->handle = handle; cpumask_clear(to_cpumask(l->cpus)); cpumask_set_cpu(track->cpu, to_cpumask(l->cpus)); nodes_clear(l->nodes); @@ -6082,6 +6094,21 @@ static int slab_debugfs_show(struct seq_file *seq, void *v) seq_printf(seq, " nodes=%*pbl", nodemask_pr_args(&l->nodes)); +#ifdef CONFIG_STACKDEPOT + { + depot_stack_handle_t handle; + unsigned long *entries; + unsigned int nr_entries, j; + + handle = READ_ONCE(l->handle); + if (handle) { + nr_entries = stack_depot_fetch(handle, &entries); + seq_puts(seq, "\n"); + for (j = 0; j < nr_entries; j++) + seq_printf(seq, " %pS\n", (void *)entries[j]); + } + } +#endif seq_puts(seq, "\n"); } @@ -6106,6 +6133,17 @@ static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos) return NULL; } +static int cmp_loc_by_count(const void *a, const void *b, const void *data) +{ + struct location *loc1 = (struct location *)a; + struct location *loc2 = (struct location *)b; + + if (loc1->count > loc2->count) + return -1; + else + return 1; +} + static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos) { struct loc_track *t = seq->private; @@ -6167,6 +6205,10 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep) spin_unlock_irqrestore(&n->list_lock, flags); } + /* Sort locations by count */ + sort_r(t->loc, t->count, sizeof(struct location), + cmp_loc_by_count, NULL, NULL); + bitmap_free(obj_map); return 0; } |