From 926a59b1dfe2580e1a00bb2ba1664e5472077cbd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:35 +0930 Subject: module: Annotate module version magic Due to the new lockdep checks in the coming patch, we go: [ 9.759380] ------------[ cut here ]------------ [ 9.759389] WARNING: CPU: 31 PID: 597 at ../kernel/module.c:216 each_symbol_section+0x121/0x130() [ 9.759391] Modules linked in: [ 9.759393] CPU: 31 PID: 597 Comm: modprobe Not tainted 4.0.0-rc1+ #65 [ 9.759393] Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013 [ 9.759396] ffffffff817d8676 ffff880424567ca8 ffffffff8157e98b 0000000000000001 [ 9.759398] 0000000000000000 ffff880424567ce8 ffffffff8105fbc7 ffff880424567cd8 [ 9.759400] 0000000000000000 ffffffff810ec160 ffff880424567d40 0000000000000000 [ 9.759400] Call Trace: [ 9.759407] [] dump_stack+0x4f/0x7b [ 9.759410] [] warn_slowpath_common+0x97/0xe0 [ 9.759412] [] ? section_objs+0x60/0x60 [ 9.759414] [] warn_slowpath_null+0x1a/0x20 [ 9.759415] [] each_symbol_section+0x121/0x130 [ 9.759417] [] find_symbol+0x31/0x70 [ 9.759420] [] load_module+0x20f/0x2660 [ 9.759422] [] ? __do_page_fault+0x190/0x4e0 [ 9.759426] [] ? retint_restore_args+0x13/0x13 [ 9.759427] [] ? retint_restore_args+0x13/0x13 [ 9.759433] [] ? trace_hardirqs_on_caller+0x11d/0x1e0 [ 9.759437] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 9.759439] [] ? retint_restore_args+0x13/0x13 [ 9.759441] [] SyS_init_module+0xce/0x100 [ 9.759443] [] system_call_fastpath+0x12/0x17 [ 9.759445] ---[ end trace 9294429076a9c644 ]--- As per the comment this site should be fine, but lets wrap it in preempt_disable() anyhow to placate lockdep. Cc: Rusty Russell Acked-by: Paul E. McKenney Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- kernel/module.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 42a1d2afb217..1150d5239205 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1169,11 +1169,17 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, { const unsigned long *crc; - /* Since this should be found in kernel (which can't be removed), - * no locking is necessary. */ + /* + * Since this should be found in kernel (which can't be removed), no + * locking is necessary -- use preempt_disable() to placate lockdep. + */ + preempt_disable(); if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL, - &crc, true, false)) + &crc, true, false)) { + preempt_enable(); BUG(); + } + preempt_enable(); return check_version(sechdrs, versindex, VMLINUX_SYMBOL_STR(module_layout), mod, crc, NULL); -- cgit v1.2.1 From 0be964be0d45084245673c971d72a4b51690231d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:35 +0930 Subject: module: Sanitize RCU usage and locking Currently the RCU usage in module is an inconsistent mess of RCU and RCU-sched, this is broken for CONFIG_PREEMPT where synchronize_rcu() does not imply synchronize_sched(). Most usage sites use preempt_{dis,en}able() which is RCU-sched, but (most of) the modification sites use synchronize_rcu(). With the exception of the module bug list, which actually uses RCU. Convert everything over to RCU-sched. Furthermore add lockdep asserts to all sites, because it's not at all clear to me the required locking is observed, esp. on exported functions. Cc: Rusty Russell Acked-by: "Paul E. McKenney" Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- kernel/module.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 1150d5239205..a15899e00ca9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -105,6 +105,22 @@ static LIST_HEAD(modules); struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ #endif /* CONFIG_KGDB_KDB */ +static void module_assert_mutex(void) +{ + lockdep_assert_held(&module_mutex); +} + +static void module_assert_mutex_or_preempt(void) +{ +#ifdef CONFIG_LOCKDEP + if (unlikely(!debug_locks)) + return; + + WARN_ON(!rcu_read_lock_sched_held() && + !lockdep_is_held(&module_mutex)); +#endif +} + #ifdef CONFIG_MODULE_SIG #ifdef CONFIG_MODULE_SIG_FORCE static bool sig_enforce = true; @@ -318,6 +334,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, #endif }; + module_assert_mutex_or_preempt(); + if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) return true; @@ -457,6 +475,8 @@ static struct module *find_module_all(const char *name, size_t len, { struct module *mod; + module_assert_mutex(); + list_for_each_entry(mod, &modules, list) { if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) continue; @@ -1860,8 +1880,8 @@ static void free_module(struct module *mod) list_del_rcu(&mod->list); /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); - /* Wait for RCU synchronizing before releasing mod->list and buglist. */ - synchronize_rcu(); + /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ + synchronize_sched(); mutex_unlock(&module_mutex); /* This may be NULL, but that's OK */ @@ -3133,11 +3153,11 @@ static noinline int do_init_module(struct module *mod) mod->init_text_size = 0; /* * We want to free module_init, but be aware that kallsyms may be - * walking this with preempt disabled. In all the failure paths, - * we call synchronize_rcu/synchronize_sched, but we don't want - * to slow down the success path, so use actual RCU here. + * walking this with preempt disabled. In all the failure paths, we + * call synchronize_sched(), but we don't want to slow down the success + * path, so use actual RCU here. */ - call_rcu(&freeinit->rcu, do_free_init); + call_rcu_sched(&freeinit->rcu, do_free_init); mutex_unlock(&module_mutex); wake_up_all(&module_wq); @@ -3395,8 +3415,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); wake_up_all(&module_wq); - /* Wait for RCU synchronizing before releasing mod->list. */ - synchronize_rcu(); + /* Wait for RCU-sched synchronizing before releasing mod->list. */ + synchronize_sched(); mutex_unlock(&module_mutex); free_module: /* Free lock-classes; relies on the preceding sync_rcu() */ @@ -3663,6 +3683,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, unsigned int i; int ret; + module_assert_mutex(); + list_for_each_entry(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -3837,6 +3859,8 @@ struct module *__module_address(unsigned long addr) if (addr < module_addr_min || addr > module_addr_max) return NULL; + module_assert_mutex_or_preempt(); + list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; -- cgit v1.2.1 From 93c2e105f6bcee231c951ba0e56e84505c4b0483 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:37 +0930 Subject: module: Optimize __module_address() using a latched RB-tree Currently __module_address() is using a linear search through all modules in order to find the module corresponding to the provided address. With a lot of modules this can take a lot of time. One of the users of this is kernel_text_address() which is employed in many stack unwinders; which in turn are used by perf-callchain and ftrace (possibly from NMI context). So by optimizing __module_address() we optimize many stack unwinders which are used by both perf and tracing in performance sensitive code. Cc: Rusty Russell Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Oleg Nesterov Cc: "Paul E. McKenney" Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- kernel/module.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 5 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index a15899e00ca9..e0db5c31cb53 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -101,6 +101,108 @@ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); + +/* + * Use a latched RB-tree for __module_address(); this allows us to use + * RCU-sched lookups of the address from any context. + * + * Because modules have two address ranges: init and core, we need two + * latch_tree_nodes entries. Therefore we need the back-pointer from + * mod_tree_node. + * + * Because init ranges are short lived we mark them unlikely and have placed + * them outside the critical cacheline in struct module. + */ + +static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) +{ + struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); + struct module *mod = mtn->mod; + + if (unlikely(mtn == &mod->mtn_init)) + return (unsigned long)mod->module_init; + + return (unsigned long)mod->module_core; +} + +static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n) +{ + struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); + struct module *mod = mtn->mod; + + if (unlikely(mtn == &mod->mtn_init)) + return (unsigned long)mod->init_size; + + return (unsigned long)mod->core_size; +} + +static __always_inline bool +mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b) +{ + return __mod_tree_val(a) < __mod_tree_val(b); +} + +static __always_inline int +mod_tree_comp(void *key, struct latch_tree_node *n) +{ + unsigned long val = (unsigned long)key; + unsigned long start, end; + + start = __mod_tree_val(n); + if (val < start) + return -1; + + end = start + __mod_tree_size(n); + if (val >= end) + return 1; + + return 0; +} + +static const struct latch_tree_ops mod_tree_ops = { + .less = mod_tree_less, + .comp = mod_tree_comp, +}; + +static struct latch_tree_root mod_tree __cacheline_aligned; + +/* + * These modifications: insert, remove_init and remove; are serialized by the + * module_mutex. + */ +static void mod_tree_insert(struct module *mod) +{ + mod->mtn_core.mod = mod; + mod->mtn_init.mod = mod; + + latch_tree_insert(&mod->mtn_core.node, &mod_tree, &mod_tree_ops); + if (mod->init_size) + latch_tree_insert(&mod->mtn_init.node, &mod_tree, &mod_tree_ops); +} + +static void mod_tree_remove_init(struct module *mod) +{ + if (mod->init_size) + latch_tree_erase(&mod->mtn_init.node, &mod_tree, &mod_tree_ops); +} + +static void mod_tree_remove(struct module *mod) +{ + latch_tree_erase(&mod->mtn_core.node, &mod_tree, &mod_tree_ops); + mod_tree_remove_init(mod); +} + +static struct module *mod_tree_find(unsigned long addr) +{ + struct latch_tree_node *ltn; + + ltn = latch_tree_find((void *)addr, &mod_tree, &mod_tree_ops); + if (!ltn) + return NULL; + + return container_of(ltn, struct mod_tree_node, node)->mod; +} + #ifdef CONFIG_KGDB_KDB struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ #endif /* CONFIG_KGDB_KDB */ @@ -1878,6 +1980,7 @@ static void free_module(struct module *mod) mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mod_tree_remove(mod); /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ @@ -3145,6 +3248,7 @@ static noinline int do_init_module(struct module *mod) mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif + mod_tree_remove_init(mod); unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); mod->module_init = NULL; @@ -3215,6 +3319,7 @@ again: goto out; } list_add_rcu(&mod->list, &modules); + mod_tree_insert(mod); err = 0; out: @@ -3861,13 +3966,13 @@ struct module *__module_address(unsigned long addr) module_assert_mutex_or_preempt(); - list_for_each_entry_rcu(mod, &modules, list) { + mod = mod_tree_find(addr); + if (mod) { + BUG_ON(!within_module(addr, mod)); if (mod->state == MODULE_STATE_UNFORMED) - continue; - if (within_module(addr, mod)) - return mod; + mod = NULL; } - return NULL; + return mod; } EXPORT_SYMBOL_GPL(__module_address); -- cgit v1.2.1 From 6c9692e2d6a2206d8fd75ea247daa47fb75e4a02 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:37 +0930 Subject: module: Make the mod_tree stuff conditional on PERF_EVENTS || TRACING Andrew worried about the overhead on small systems; only use the fancy code when either perf or tracing is enabled. Cc: Rusty Russell Cc: Steven Rostedt Requested-by: Andrew Morton Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- kernel/module.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index e0db5c31cb53..ac3044ceca3f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -102,6 +102,8 @@ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); +#ifdef CONFIG_MODULES_TREE_LOOKUP + /* * Use a latched RB-tree for __module_address(); this allows us to use * RCU-sched lookups of the address from any context. @@ -112,6 +114,10 @@ static LIST_HEAD(modules); * * Because init ranges are short lived we mark them unlikely and have placed * them outside the critical cacheline in struct module. + * + * This is conditional on PERF_EVENTS || TRACING because those can really hit + * __module_address() hard by doing a lot of stack unwinding; potentially from + * NMI context. */ static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) @@ -192,7 +198,7 @@ static void mod_tree_remove(struct module *mod) mod_tree_remove_init(mod); } -static struct module *mod_tree_find(unsigned long addr) +static struct module *mod_find(unsigned long addr) { struct latch_tree_node *ltn; @@ -203,6 +209,26 @@ static struct module *mod_tree_find(unsigned long addr) return container_of(ltn, struct mod_tree_node, node)->mod; } +#else /* MODULES_TREE_LOOKUP */ + +static void mod_tree_insert(struct module *mod) { } +static void mod_tree_remove_init(struct module *mod) { } +static void mod_tree_remove(struct module *mod) { } + +static struct module *mod_find(unsigned long addr) +{ + struct module *mod; + + list_for_each_entry_rcu(mod, &modules, list) { + if (within_module(addr, mod)) + return mod; + } + + return NULL; +} + +#endif /* MODULES_TREE_LOOKUP */ + #ifdef CONFIG_KGDB_KDB struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ #endif /* CONFIG_KGDB_KDB */ @@ -3966,7 +3992,7 @@ struct module *__module_address(unsigned long addr) module_assert_mutex_or_preempt(); - mod = mod_tree_find(addr); + mod = mod_find(addr); if (mod) { BUG_ON(!within_module(addr, mod)); if (mod->state == MODULE_STATE_UNFORMED) -- cgit v1.2.1 From b7df4d1b23bfca830f1076412d21524686c5a441 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:37 +0930 Subject: module: Use __module_address() for module_address_lookup() Use the generic __module_address() addr to struct module lookup instead of open coding it once more. Cc: Rusty Russell Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- kernel/module.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index ac3044ceca3f..293dfaf4ce52 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3671,19 +3671,15 @@ const char *module_address_lookup(unsigned long addr, char **modname, char *namebuf) { - struct module *mod; const char *ret = NULL; + struct module *mod; preempt_disable(); - list_for_each_entry_rcu(mod, &modules, list) { - if (mod->state == MODULE_STATE_UNFORMED) - continue; - if (within_module(addr, mod)) { - if (modname) - *modname = mod->name; - ret = get_ksymbol(mod, addr, size, offset); - break; - } + mod = __module_address(addr); + if (mod) { + if (modname) + *modname = mod->name; + ret = get_ksymbol(mod, addr, size, offset); } /* Make a copy in here where it's safe */ if (ret) { @@ -3691,6 +3687,7 @@ const char *module_address_lookup(unsigned long addr, ret = namebuf; } preempt_enable(); + return ret; } -- cgit v1.2.1 From 4f666546d047752c17265f4641cc9470c1cbaed4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:38 +0930 Subject: module: Rework module_addr_{min,max} __module_address() does an initial bound check before doing the {list/tree} iteration to find the actual module. The bound variables are nowhere near the mod_tree cacheline, in fact they're nowhere near one another. module_addr_min lives in .data while module_addr_max lives in .bss (smarty pants GCC thinks the explicit 0 assignment is a mistake). Rectify this by moving the two variables into a structure together with the latch_tree_root to guarantee they all share the same cacheline and avoid hitting two extra cachelines for the lookup. While reworking the bounds code, move the bound update from allocation to insertion time, this avoids updating the bounds for a few error paths. Cc: Rusty Russell Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- kernel/module.c | 80 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 28 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 293dfaf4ce52..c8da2a59ebf7 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -170,7 +170,26 @@ static const struct latch_tree_ops mod_tree_ops = { .comp = mod_tree_comp, }; -static struct latch_tree_root mod_tree __cacheline_aligned; +static struct mod_tree_root { + struct latch_tree_root root; + unsigned long addr_min; + unsigned long addr_max; +} mod_tree __cacheline_aligned = { + .addr_min = -1UL, +}; + +#define module_addr_min mod_tree.addr_min +#define module_addr_max mod_tree.addr_max + +static noinline void __mod_tree_insert(struct mod_tree_node *node) +{ + latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops); +} + +static void __mod_tree_remove(struct mod_tree_node *node) +{ + latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops); +} /* * These modifications: insert, remove_init and remove; are serialized by the @@ -181,20 +200,20 @@ static void mod_tree_insert(struct module *mod) mod->mtn_core.mod = mod; mod->mtn_init.mod = mod; - latch_tree_insert(&mod->mtn_core.node, &mod_tree, &mod_tree_ops); + __mod_tree_insert(&mod->mtn_core); if (mod->init_size) - latch_tree_insert(&mod->mtn_init.node, &mod_tree, &mod_tree_ops); + __mod_tree_insert(&mod->mtn_init); } static void mod_tree_remove_init(struct module *mod) { if (mod->init_size) - latch_tree_erase(&mod->mtn_init.node, &mod_tree, &mod_tree_ops); + __mod_tree_remove(&mod->mtn_init); } static void mod_tree_remove(struct module *mod) { - latch_tree_erase(&mod->mtn_core.node, &mod_tree, &mod_tree_ops); + __mod_tree_remove(&mod->mtn_core); mod_tree_remove_init(mod); } @@ -202,7 +221,7 @@ static struct module *mod_find(unsigned long addr) { struct latch_tree_node *ltn; - ltn = latch_tree_find((void *)addr, &mod_tree, &mod_tree_ops); + ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops); if (!ltn) return NULL; @@ -211,6 +230,8 @@ static struct module *mod_find(unsigned long addr) #else /* MODULES_TREE_LOOKUP */ +static unsigned long module_addr_min = -1UL, module_addr_max = 0; + static void mod_tree_insert(struct module *mod) { } static void mod_tree_remove_init(struct module *mod) { } static void mod_tree_remove(struct module *mod) { } @@ -229,6 +250,28 @@ static struct module *mod_find(unsigned long addr) #endif /* MODULES_TREE_LOOKUP */ +/* + * Bounds of module text, for speeding up __module_address. + * Protected by module_mutex. + */ +static void __mod_update_bounds(void *base, unsigned int size) +{ + unsigned long min = (unsigned long)base; + unsigned long max = min + size; + + if (min < module_addr_min) + module_addr_min = min; + if (max > module_addr_max) + module_addr_max = max; +} + +static void mod_update_bounds(struct module *mod) +{ + __mod_update_bounds(mod->module_core, mod->core_size); + if (mod->init_size) + __mod_update_bounds(mod->module_init, mod->init_size); +} + #ifdef CONFIG_KGDB_KDB struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ #endif /* CONFIG_KGDB_KDB */ @@ -297,10 +340,6 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq); static BLOCKING_NOTIFIER_HEAD(module_notify_list); -/* Bounds of module allocation, for speeding __module_address. - * Protected by module_mutex. */ -static unsigned long module_addr_min = -1UL, module_addr_max = 0; - int register_module_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&module_notify_list, nb); @@ -2539,22 +2578,6 @@ void * __weak module_alloc(unsigned long size) return vmalloc_exec(size); } -static void *module_alloc_update_bounds(unsigned long size) -{ - void *ret = module_alloc(size); - - if (ret) { - mutex_lock(&module_mutex); - /* Update module bounds. */ - if ((unsigned long)ret < module_addr_min) - module_addr_min = (unsigned long)ret; - if ((unsigned long)ret + size > module_addr_max) - module_addr_max = (unsigned long)ret + size; - mutex_unlock(&module_mutex); - } - return ret; -} - #ifdef CONFIG_DEBUG_KMEMLEAK static void kmemleak_load_module(const struct module *mod, const struct load_info *info) @@ -2960,7 +2983,7 @@ static int move_module(struct module *mod, struct load_info *info) void *ptr; /* Do the allocs. */ - ptr = module_alloc_update_bounds(mod->core_size); + ptr = module_alloc(mod->core_size); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2974,7 +2997,7 @@ static int move_module(struct module *mod, struct load_info *info) mod->module_core = ptr; if (mod->init_size) { - ptr = module_alloc_update_bounds(mod->init_size); + ptr = module_alloc(mod->init_size); /* * The pointer to this block is stored in the module structure * which is inside the block. This block doesn't need to be @@ -3344,6 +3367,7 @@ again: err = -EEXIST; goto out; } + mod_update_bounds(mod); list_add_rcu(&mod->list, &modules); mod_tree_insert(mod); err = 0; -- cgit v1.2.1 From 05f408dddb013168759cdb4cbd0ba4e189a9504d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 27 May 2015 11:09:38 +0930 Subject: kernel/module.c: use generic module param operaters for sig_enforce We're directly checking and modifying sig_enforce when needed instead of using the generic helpers. This prevents us from generalizing this helper so that others can use it. Use indirect helpers to allow us to generalize this code a bit and to make it a bit more clear what this is doing. Cc: Rusty Russell Cc: Jani Nikula Cc: Andrew Morton Cc: Kees Cook Cc: Tejun Heo Cc: Ingo Molnar Cc: linux-kernel@vger.kernel.org Cc: cocci@systeme.lip6.fr Signed-off-by: Luis R. Rodriguez Signed-off-by: Rusty Russell --- kernel/module.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index c8da2a59ebf7..9e8c9305bba9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -301,23 +301,25 @@ static bool sig_enforce = false; static int param_set_bool_enable_only(const char *val, const struct kernel_param *kp) { - int err; - bool test; + int err = 0; + bool new_value; + bool orig_value = *(bool *)kp->arg; struct kernel_param dummy_kp = *kp; - dummy_kp.arg = &test; + dummy_kp.arg = &new_value; err = param_set_bool(val, &dummy_kp); if (err) return err; /* Don't let them unset it once it's set! */ - if (!test && sig_enforce) + if (!new_value && orig_value) return -EROFS; - if (test) - sig_enforce = true; - return 0; + if (new_value) + err = param_set_bool(val, kp); + + return err; } static const struct kernel_param_ops param_ops_bool_enable_only = { -- cgit v1.2.1 From d19f05d8a8fa221e5d5f4eaca0f3ca5874c990d3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 27 May 2015 11:09:38 +0930 Subject: kernel/params.c: generalize bool_enable_only This takes out the bool_enable_only implementation from the module loading code and generalizes it so that others can make use of it. Cc: Rusty Russell Cc: Jani Nikula Cc: Andrew Morton Cc: Kees Cook Cc: Tejun Heo Cc: Ingo Molnar Cc: linux-kernel@vger.kernel.org Cc: cocci@systeme.lip6.fr Signed-off-by: Luis R. Rodriguez Signed-off-by: Rusty Russell --- kernel/module.c | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 9e8c9305bba9..9b0e36145474 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -298,37 +298,6 @@ static bool sig_enforce = true; #else static bool sig_enforce = false; -static int param_set_bool_enable_only(const char *val, - const struct kernel_param *kp) -{ - int err = 0; - bool new_value; - bool orig_value = *(bool *)kp->arg; - struct kernel_param dummy_kp = *kp; - - dummy_kp.arg = &new_value; - - err = param_set_bool(val, &dummy_kp); - if (err) - return err; - - /* Don't let them unset it once it's set! */ - if (!new_value && orig_value) - return -EROFS; - - if (new_value) - err = param_set_bool(val, kp); - - return err; -} - -static const struct kernel_param_ops param_ops_bool_enable_only = { - .flags = KERNEL_PARAM_OPS_FL_NOARG, - .set = param_set_bool_enable_only, - .get = param_get_bool, -}; -#define param_check_bool_enable_only param_check_bool - module_param(sig_enforce, bool_enable_only, 0644); #endif /* !CONFIG_MODULE_SIG_FORCE */ #endif /* CONFIG_MODULE_SIG */ -- cgit v1.2.1 From 6727bb9c6abe836d88191ce13bfdd7a53c245e15 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 27 May 2015 11:09:39 +0930 Subject: kernel/module.c: avoid ifdefs for sig_enforce declaration There's no need to require an ifdef over the declaration of sig_enforce as IS_ENABLED() can be used. While at it, there's no harm in exposing this kernel parameter outside of CONFIG_MODULE_SIG as it'd be a no-op on non module sig kernels. Now, technically we should in theory be able to remove the #ifdef'ery over the declaration of the module parameter as we are also trusting the bool_enable_only code for CONFIG_MODULE_SIG kernels but for now remain paranoid and keep it. With time if no one can put a bullet through bool_enable_only and if there are no technical requirements over not exposing CONFIG_MODULE_SIG_FORCE with the measures in place by bool_enable_only we could remove this last ifdef. Cc: Rusty Russell Cc: Andrew Morton Cc: Kees Cook Cc: Tejun Heo Cc: Ingo Molnar Cc: linux-kernel@vger.kernel.org Cc: cocci@systeme.lip6.fr Signed-off-by: Luis R. Rodriguez Signed-off-by: Rusty Russell --- kernel/module.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 9b0e36145474..427b99f1a4b3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -292,15 +292,10 @@ static void module_assert_mutex_or_preempt(void) #endif } -#ifdef CONFIG_MODULE_SIG -#ifdef CONFIG_MODULE_SIG_FORCE -static bool sig_enforce = true; -#else -static bool sig_enforce = false; - +static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); +#ifndef CONFIG_MODULE_SIG_FORCE module_param(sig_enforce, bool_enable_only, 0644); #endif /* !CONFIG_MODULE_SIG_FORCE */ -#endif /* CONFIG_MODULE_SIG */ /* Block module loading/unloading? */ int modules_disabled = 0; -- cgit v1.2.1 From b51d23e4e9fea6f264d39535c2a62d1f51e7ccc3 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Wed, 17 Jun 2015 06:18:52 +0930 Subject: module: add per-module param_lock Add a "param_lock" mutex to each module, and update params.c to use the correct built-in or module mutex while locking kernel params. Remove the kparam_block_sysfs_r/w() macros, replace them with direct calls to kernel_param_[un]lock(module). The kernel param code currently uses a single mutex to protect modification of any and all kernel params. While this generally works, there is one specific problem with it; a module callback function cannot safely load another module, i.e. with request_module() or even with indirect calls such as crypto_has_alg(). If the module to be loaded has any of its params configured (e.g. with a /etc/modprobe.d/* config file), then the attempt will result in a deadlock between the first module param callback waiting for modprobe, and modprobe trying to lock the single kernel param mutex to set the new module's param. This fixes that by using per-module mutexes, so that each individual module is protected against concurrent changes in its own kernel params, but is not blocked by changes to other module params. All built-in modules continue to use the built-in mutex, since they will always be loaded at runtime and references (e.g. request_module(), crypto_has_alg()) to them will never cause load-time param changing. This also simplifies the interface used by modules to block sysfs access to their params; while there are currently functions to block and unblock sysfs param access which are split up by read and write and expect a single kernel param to be passed, their actual operation is identical and applies to all params, not just the one passed to them; they simply lock and unlock the global param mutex. They are replaced with direct calls to kernel_param_[un]lock(THIS_MODULE), which locks THIS_MODULE's param_lock, or if the module is built-in, it locks the built-in mutex. Suggested-by: Rusty Russell Signed-off-by: Dan Streetman Signed-off-by: Rusty Russell --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 427b99f1a4b3..8ec33ce202a6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3442,6 +3442,8 @@ static int load_module(struct load_info *info, const char __user *uargs, if (err) goto unlink_mod; + mutex_init(&mod->param_lock); + /* Now we've got everything in the final locations, we can * find optional sections. */ err = find_module_sections(mod, info); -- cgit v1.2.1 From cf2fde7b39e9446e2af015215d7fb695781af0c1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Jun 2015 06:44:38 +0930 Subject: param: fix module param locks when !CONFIG_SYSFS. As Dan Streetman points out, the entire point of locking for is to stop sysfs accesses, so they're elided entirely in the !SYSFS case. Reported-by: Stephen Rothwell Signed-off-by: Rusty Russell --- kernel/module.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 8ec33ce202a6..b4994adf7187 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1820,6 +1820,10 @@ static void mod_sysfs_fini(struct module *mod) mod_kobject_put(mod); } +static void init_param_lock(struct module *mod) +{ + mutex_init(&mod->param_lock); +} #else /* !CONFIG_SYSFS */ static int mod_sysfs_setup(struct module *mod, @@ -1842,6 +1846,9 @@ static void del_usage_links(struct module *mod) { } +static void init_param_lock(struct module *mod) +{ +} #endif /* CONFIG_SYSFS */ static void mod_sysfs_teardown(struct module *mod) @@ -3442,7 +3449,7 @@ static int load_module(struct load_info *info, const char __user *uargs, if (err) goto unlink_mod; - mutex_init(&mod->param_lock); + init_param_lock(mod); /* Now we've got everything in the final locations, we can * find optional sections. */ -- cgit v1.2.1