diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2020-02-13 13:51:13 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2020-02-13 13:51:13 +1100 |
commit | ff2d1808851bfe9ecf612caf713191cd11f6ce90 (patch) | |
tree | f9d02e210f75eb565bed6508294ec466b05685bf | |
parent | ab8e8ee588a8cf3eadd8ed464a7950dc1880bd88 (diff) | |
parent | ec7c11f69882c03ec47650da86243284b04fdc31 (diff) | |
download | linux-next-ff2d1808851bfe9ecf612caf713191cd11f6ce90.tar.gz |
Merge branch 'akpm-current/current'
36 files changed, 501 insertions, 513 deletions
diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst index 179f2a5625a0..a0ffdc8daef3 100644 --- a/Documentation/process/deprecated.rst +++ b/Documentation/process/deprecated.rst @@ -84,7 +84,7 @@ buffer. This could result in linear overflows beyond the end of the buffer, leading to all kinds of misbehaviors. While `CONFIG_FORTIFY_SOURCE=y` and various compiler flags help reduce the risk of using this function, there is no good reason to add new uses of -this function. The safe replacement is :c:func:`strscpy`. +this function. The safe replacement is stracpy() or strscpy(). strncpy() on NUL-terminated strings ----------------------------------- @@ -93,9 +93,9 @@ will be NUL terminated. This can lead to various linear read overflows and other misbehavior due to the missing termination. It also NUL-pads the destination buffer if the source contents are shorter than the destination buffer size, which may be a needless performance penalty for callers using -only NUL-terminated strings. The safe replacement is :c:func:`strscpy`. -(Users of :c:func:`strscpy` still needing NUL-padding will need an -explicit :c:func:`memset` added.) +only NUL-terminated strings. In this case, the safe replacement is +stracpy() or strscpy(). If, however, the destination buffer still needs +NUL-padding, the safe replacement is stracpy_pad(). If a caller is using non-NUL-terminated strings, :c:func:`strncpy()` can still be used, but destinations should be marked with the `__nonstring @@ -107,7 +107,7 @@ strlcpy() :c:func:`strlcpy` reads the entire source buffer first, possibly exceeding the given limit of bytes to copy. This is inefficient and can lead to linear read overflows if a source string is not NUL-terminated. The -safe replacement is :c:func:`strscpy`. +safe replacement is stracpy() or strscpy(). Variable Length Arrays (VLAs) ----------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 29e05f865b40..ad462f671f01 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9313,6 +9313,13 @@ F: include/uapi/linux/keyctl.h F: include/keys/ F: security/keys/ +KFIFO: +M: Stefani Seibold <stefani@seibold.net> +S: Maintained +F: lib/kfifo.c +F: include/linux/kfifo.h +F: samples/kfifo/ + KGDB / KDB /debug_core M: Jason Wessel <jason.wessel@windriver.com> M: Daniel Thompson <daniel.thompson@linaro.org> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fa4ea09593ab..629fdf13f846 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -190,7 +190,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } -void vmalloc_sync_all(void) +static void vmalloc_sync(void) { unsigned long address; @@ -217,6 +217,16 @@ void vmalloc_sync_all(void) } } +void vmalloc_sync_mappings(void) +{ + vmalloc_sync(); +} + +void vmalloc_sync_unmappings(void) +{ + vmalloc_sync(); +} + /* * 32-bit: * @@ -319,11 +329,23 @@ out: #else /* CONFIG_X86_64: */ -void vmalloc_sync_all(void) +void vmalloc_sync_mappings(void) { + /* + * 64-bit mappings might allocate new p4d/pud pages + * that need to be propagated to all tasks' PGDs. + */ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); } +void vmalloc_sync_unmappings(void) +{ + /* + * Unmappings never allocate or free p4d/pud pages. + * No work is required here. + */ +} + /* * 64-bit: * diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 103acbbfcf9a..24c9642e8fc7 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes) * New allocation must be visible in all pgd before it can be found by * an NMI allocating from the pool. */ - vmalloc_sync_all(); + vmalloc_sync_mappings(); rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); if (rc) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index b9f474c11393..6503f5d0b749 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -21,6 +21,7 @@ #include <linux/mm.h> #include <linux/stat.h> #include <linux/slab.h> +#include <linux/xarray.h> #include <linux/atomic.h> #include <linux/uaccess.h> @@ -56,6 +57,13 @@ static struct bus_type memory_subsys = { .offline = memory_subsys_offline, }; +/* + * Memory blocks are cached in a local radix tree to avoid + * a costly linear search for the corresponding device on + * the subsystem bus. + */ +static DEFINE_XARRAY(memory_blocks); + static BLOCKING_NOTIFIER_HEAD(memory_chain); int register_memory_notifier(struct notifier_block *nb) @@ -549,22 +557,23 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn) return 0; } -/* A reference for the returned memory block device is acquired. */ +/* + * A reference for the returned memory block device is acquired. + * + * Called under device_hotplug_lock. + */ static struct memory_block *find_memory_block_by_id(unsigned long block_id) { - struct device *dev; + struct memory_block *mem; - dev = subsys_find_device_by_id(&memory_subsys, block_id, NULL); - return dev ? to_memory_block(dev) : NULL; + mem = xa_load(&memory_blocks, block_id); + if (mem) + get_device(&mem->dev); + return mem; } /* - * For now, we have a linear search to go find the appropriate - * memory_block corresponding to a particular phys_index. If - * this gets to be a real problem, we can always use a radix - * tree or something here. - * - * This could be made generic for all device subsystems. + * Called under device_hotplug_lock. */ struct memory_block *find_memory_block(struct mem_section *section) { @@ -608,9 +617,16 @@ int register_memory(struct memory_block *memory) memory->dev.offline = memory->state == MEM_OFFLINE; ret = device_register(&memory->dev); - if (ret) + if (ret) { put_device(&memory->dev); - + return ret; + } + ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory, + GFP_KERNEL)); + if (ret) { + put_device(&memory->dev); + device_unregister(&memory->dev); + } return ret; } @@ -668,6 +684,8 @@ static void unregister_memory(struct memory_block *memory) if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) return; + WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL); + /* drop the ref. we got via find_memory_block() */ put_device(&memory->dev); device_unregister(&memory->dev); @@ -817,6 +835,8 @@ void __init memory_dev_init(void) * * In case func() returns an error, walking is aborted and the error is * returned. + * + * Called under device_hotplug_lock. */ int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, walk_memory_blocks_func_t func) @@ -1287,12 +1287,9 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, * the ringbuffer empty. So in practice we should be ok, but it's * something to be aware of when touching this code. */ - if (until == 0) - aio_read_events(ctx, min_nr, nr, event, &ret); - else - wait_event_interruptible_hrtimeout(ctx->wait, - aio_read_events(ctx, min_nr, nr, event, &ret), - until); + wait_event_interruptible_hrtimeout(ctx->wait, + aio_read_events(ctx, min_nr, nr, event, &ret), + until); return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index b8d28370cfd7..4299e100a05b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -996,10 +996,20 @@ grow_dev_page(struct block_device *bdev, sector_t block, end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits, size); +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x01; +#endif goto done; } - if (!try_to_free_buffers(page)) + if (!try_to_free_buffers(page)) { +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x02; +#endif goto failed; + } +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x04; +#endif } /* @@ -1019,6 +1029,9 @@ grow_dev_page(struct block_device *bdev, sector_t block, spin_unlock(&inode->i_mapping->private_lock); done: ret = (block < end_block) ? 1 : -ENXIO; +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x08; +#endif failed: unlock_page(page); put_page(page); @@ -1074,6 +1087,12 @@ __getblk_slow(struct block_device *bdev, sector_t block, return NULL; } +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_stamp = jiffies; + current->getblk_executed = 0; + current->getblk_bh_count = 0; + current->getblk_bh_state = 0; +#endif for (;;) { struct buffer_head *bh; int ret; @@ -1085,6 +1104,24 @@ __getblk_slow(struct block_device *bdev, sector_t block, ret = grow_buffers(bdev, block, size, gfp); if (ret < 0) return NULL; + +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + if (!time_after(jiffies, current->getblk_stamp + 3 * HZ)) + continue; + printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d bh_state=%lx bdev_super_blocksize=%ld size=%u bdev_super_blocksize_bits=%d bdev_inode_blkbits=%d\n", + current->comm, current->pid, current->getblk_executed, + current->getblk_bh_count, current->getblk_bh_state, + IS_ERR_OR_NULL(bdev->bd_super) ? -1L : + bdev->bd_super->s_blocksize, size, + IS_ERR_OR_NULL(bdev->bd_super) ? -1 : + bdev->bd_super->s_blocksize_bits, + IS_ERR_OR_NULL(bdev->bd_inode) ? -1 : + bdev->bd_inode->i_blkbits); + current->getblk_executed = 0; + current->getblk_bh_count = 0; + current->getblk_bh_state = 0; + current->getblk_stamp = jiffies; +#endif } } @@ -3240,6 +3277,11 @@ EXPORT_SYMBOL(sync_dirty_buffer); */ static inline int buffer_busy(struct buffer_head *bh) { +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x80; + current->getblk_bh_count = atomic_read(&bh->b_count); + current->getblk_bh_state = bh->b_state; +#endif return atomic_read(&bh->b_count) | (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock))); } @@ -3278,11 +3320,18 @@ int try_to_free_buffers(struct page *page) int ret = 0; BUG_ON(!PageLocked(page)); - if (PageWriteback(page)) + if (PageWriteback(page)) { +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x10; +#endif return 0; + } if (mapping == NULL) { /* can this still happen? */ ret = drop_buffers(page, &buffers_to_free); +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x20; +#endif goto out; } @@ -3306,6 +3355,9 @@ int try_to_free_buffers(struct page *page) if (ret) cancel_dirty_page(page); spin_unlock(&mapping->private_lock); +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x40; +#endif out: if (buffers_to_free) { struct buffer_head *bh = buffers_to_free; diff --git a/fs/proc/page.c b/fs/proc/page.c index f909243d4a66..f3b39a7d2bf3 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -115,7 +115,10 @@ u64 stable_page_flags(struct page *page) * it differentiates a memory hole from a page with no flags */ if (!page) - return 1 << KPF_NOPAGE; + return BIT_ULL(KPF_NOPAGE); + + if (pfn_zone_device_reserved(page_to_pfn(page))) + return BIT_ULL(KPF_RESERVED); k = page->flags; u = 0; @@ -127,22 +130,22 @@ u64 stable_page_flags(struct page *page) * simple test in page_mapped() is not enough. */ if (!PageSlab(page) && page_mapped(page)) - u |= 1 << KPF_MMAP; + u |= BIT_ULL(KPF_MMAP); if (PageAnon(page)) - u |= 1 << KPF_ANON; + u |= BIT_ULL(KPF_ANON); if (PageKsm(page)) - u |= 1 << KPF_KSM; + u |= BIT_ULL(KPF_KSM); /* * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ if (PageHead(page)) - u |= 1 << KPF_COMPOUND_HEAD; + u |= BIT_ULL(KPF_COMPOUND_HEAD); if (PageTail(page)) - u |= 1 << KPF_COMPOUND_TAIL; + u |= BIT_ULL(KPF_COMPOUND_TAIL); if (PageHuge(page)) - u |= 1 << KPF_HUGE; + u |= BIT_ULL(KPF_HUGE); /* * PageTransCompound can be true for non-huge compound pages (slab * pages or pages allocated by drivers with __GFP_COMP) because it @@ -153,14 +156,13 @@ u64 stable_page_flags(struct page *page) struct page *head = compound_head(page); if (PageLRU(head) || PageAnon(head)) - u |= 1 << KPF_THP; + u |= BIT_ULL(KPF_THP); else if (is_huge_zero_page(head)) { - u |= 1 << KPF_ZERO_PAGE; - u |= 1 << KPF_THP; + u |= BIT_ULL(KPF_ZERO_PAGE); + u |= BIT_ULL(KPF_THP); } } else if (is_zero_pfn(page_to_pfn(page))) - u |= 1 << KPF_ZERO_PAGE; - + u |= BIT_ULL(KPF_ZERO_PAGE); /* * Caveats on high order pages: page->_refcount will only be set @@ -168,23 +170,23 @@ u64 stable_page_flags(struct page *page) * SLOB won't set PG_slab at all on compound pages. */ if (PageBuddy(page)) - u |= 1 << KPF_BUDDY; + u |= BIT_ULL(KPF_BUDDY); else if (page_count(page) == 0 && is_free_buddy_page(page)) - u |= 1 << KPF_BUDDY; + u |= BIT_ULL(KPF_BUDDY); if (PageOffline(page)) - u |= 1 << KPF_OFFLINE; + u |= BIT_ULL(KPF_OFFLINE); if (PageTable(page)) - u |= 1 << KPF_PGTABLE; + u |= BIT_ULL(KPF_PGTABLE); if (page_is_idle(page)) - u |= 1 << KPF_IDLE; + u |= BIT_ULL(KPF_IDLE); u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); if (PageTail(page) && PageSlab(compound_head(page))) - u |= 1 << KPF_SLAB; + u |= BIT_ULL(KPF_SLAB); u |= kpf_copy_bit(k, KPF_ERROR, PG_error); u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); @@ -197,7 +199,7 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); if (PageSwapCache(page)) - u |= 1 << KPF_SWAPCACHE; + u |= BIT_ULL(KPF_SWAPCACHE); u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index ee179a81b3da..55c1dc329bd2 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -147,6 +147,17 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * return error; } +static int ramfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode; + + inode = ramfs_get_inode(dir->i_sb, dir, mode, 0); + if (!inode) + return -ENOSPC; + d_tmpfile(dentry, inode); + return 0; +} + static const struct inode_operations ramfs_dir_inode_operations = { .create = ramfs_create, .lookup = simple_lookup, @@ -157,6 +168,7 @@ static const struct inode_operations ramfs_dir_inode_operations = { .rmdir = simple_rmdir, .mknod = ramfs_mknod, .rename = simple_rename, + .tmpfile = ramfs_tmpfile, }; /* diff --git a/include/linux/compat.h b/include/linux/compat.h index 11083d84eb23..df2475be134a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -248,15 +248,6 @@ typedef struct compat_siginfo { } _sifields; } compat_siginfo_t; -/* - * These functions operate on 32- or 64-bit specs depending on - * COMPAT_USE_64BIT_TIME, hence the void user pointer arguments. - */ -extern int compat_get_timespec(struct timespec *, const void __user *); -extern int compat_put_timespec(const struct timespec *, void __user *); -extern int compat_get_timeval(struct timeval *, const void __user *); -extern int compat_put_timeval(const struct timeval *, void __user *); - struct compat_iovec { compat_uptr_t iov_base; compat_size_t iov_len; @@ -416,26 +407,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginf int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); -static inline int old_timeval32_compare(struct old_timeval32 *lhs, - struct old_timeval32 *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_usec - rhs->tv_usec; -} - -static inline int old_timespec32_compare(struct old_timespec32 *lhs, - struct old_timespec32 *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_nsec - rhs->tv_nsec; -} - extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat); /* diff --git a/include/linux/ktime.h b/include/linux/ktime.h index b2bb44f87f5a..d1fb05135665 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -66,33 +66,15 @@ static inline ktime_t ktime_set(const s64 secs, const unsigned long nsecs) */ #define ktime_sub_ns(kt, nsval) ((kt) - (nsval)) -/* convert a timespec to ktime_t format: */ -static inline ktime_t timespec_to_ktime(struct timespec ts) -{ - return ktime_set(ts.tv_sec, ts.tv_nsec); -} - /* convert a timespec64 to ktime_t format: */ static inline ktime_t timespec64_to_ktime(struct timespec64 ts) { return ktime_set(ts.tv_sec, ts.tv_nsec); } -/* convert a timeval to ktime_t format: */ -static inline ktime_t timeval_to_ktime(struct timeval tv) -{ - return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); -} - -/* Map the ktime_t to timespec conversion to ns_to_timespec function */ -#define ktime_to_timespec(kt) ns_to_timespec((kt)) - /* Map the ktime_t to timespec conversion to ns_to_timespec function */ #define ktime_to_timespec64(kt) ns_to_timespec64((kt)) -/* Map the ktime_t to timeval conversion to ns_to_timeval function */ -#define ktime_to_timeval(kt) ns_to_timeval((kt)) - /* Convert ktime_t to nanoseconds */ static inline s64 ktime_to_ns(const ktime_t kt) { @@ -216,25 +198,6 @@ static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec) extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); /** - * ktime_to_timespec_cond - convert a ktime_t variable to timespec - * format only if the variable contains data - * @kt: the ktime_t variable to convert - * @ts: the timespec variable to store the result in - * - * Return: %true if there was a successful conversion, %false if kt was 0. - */ -static inline __must_check bool ktime_to_timespec_cond(const ktime_t kt, - struct timespec *ts) -{ - if (kt) { - *ts = ktime_to_timespec(kt); - return true; - } else { - return false; - } -} - -/** * ktime_to_timespec64_cond - convert a ktime_t variable to timespec64 * format only if the variable contains data * @kt: the ktime_t variable to convert diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 6fefb09af7c3..c676e33205d3 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -123,6 +123,7 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) } #ifdef CONFIG_ZONE_DEVICE +bool pfn_zone_device_reserved(unsigned long pfn); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); @@ -133,6 +134,11 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); #else +static inline bool pfn_zone_device_reserved(unsigned long pfn) +{ + return false; +} + static inline void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 0918904c939d..f314790cb527 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -906,6 +906,7 @@ struct task_struct { #ifdef CONFIG_DETECT_HUNG_TASK unsigned long last_switch_count; unsigned long last_switch_time; + unsigned long killed_time; #endif /* Filesystem information: */ struct fs_struct *fs; @@ -1265,6 +1266,7 @@ struct task_struct { #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; #endif + struct list_head oom_victim_list; #ifdef CONFIG_VMAP_STACK struct vm_struct *stack_vm_area; #endif @@ -1285,6 +1287,13 @@ struct task_struct { unsigned long prev_lowest_stack; #endif +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + unsigned long getblk_stamp; + unsigned int getblk_executed; + unsigned int getblk_bh_count; + unsigned long getblk_bh_state; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. diff --git a/include/linux/string.h b/include/linux/string.h index 6dfbb2efa815..3b8e8b12dd37 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -35,6 +35,51 @@ ssize_t strscpy(char *, const char *, size_t); /* Wraps calls to strscpy()/memset(), no arch specific code required */ ssize_t strscpy_pad(char *dest, const char *src, size_t count); +/** + * stracpy - Copy a C-string into an array of char/u8/s8 or equivalent + * @dest: Where to copy the string, must be an array of char and not a pointer + * @src: String to copy, may be a pointer or const char array + * + * Helper for strscpy(). + * Copies a maximum of sizeof(@dest) bytes of @src with %NUL termination. + * + * Returns: + * * The number of characters copied (not including the trailing %NUL) + * * -E2BIG if @dest is a zero size array or @src was truncated. + */ +#define stracpy(dest, src) \ +({ \ + size_t count = ARRAY_SIZE(dest); \ + BUILD_BUG_ON(!(__same_type(dest, char[]) || \ + __same_type(dest, unsigned char[]) || \ + __same_type(dest, signed char[]))); \ + \ + strscpy(dest, src, count); \ +}) + +/** + * stracpy_pad - Copy a C-string into an array of char/u8/s8 with %NUL padding + * @dest: Where to copy the string, must be an array of char and not a pointer + * @src: String to copy, may be a pointer or const char array + * + * Helper for strscpy_pad(). + * Copies a maximum of sizeof(@dest) bytes of @src with %NUL termination + * and zero-pads the remaining size of @dest + * + * Returns: + * * The number of characters copied (not including the trailing %NUL) + * * -E2BIG if @dest is a zero size array or @src was truncated. + */ +#define stracpy_pad(dest, src) \ +({ \ + size_t count = ARRAY_SIZE(dest); \ + BUILD_BUG_ON(!(__same_type(dest, char[]) || \ + __same_type(dest, unsigned char[]) || \ + __same_type(dest, signed char[]))); \ + \ + strscpy_pad(dest, src, count); \ +}) + #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); #endif diff --git a/include/linux/time32.h b/include/linux/time32.h index cad4c3186002..cf9320cd2d0b 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -12,8 +12,6 @@ #include <linux/time64.h> #include <linux/timex.h> -#define TIME_T_MAX (__kernel_old_time_t)((1UL << ((sizeof(__kernel_old_time_t) << 3) - 1)) - 1) - typedef s32 old_time32_t; struct old_timespec32 { @@ -73,162 +71,12 @@ struct __kernel_timex; int get_old_timex32(struct __kernel_timex *, const struct old_timex32 __user *); int put_old_timex32(struct old_timex32 __user *, const struct __kernel_timex *); -#if __BITS_PER_LONG == 64 - -/* timespec64 is defined as timespec here */ -static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) -{ - return *(const struct timespec *)&ts64; -} - -static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) -{ - return *(const struct timespec64 *)&ts; -} - -#else -static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) -{ - struct timespec ret; - - ret.tv_sec = (time_t)ts64.tv_sec; - ret.tv_nsec = ts64.tv_nsec; - return ret; -} - -static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) -{ - struct timespec64 ret; - - ret.tv_sec = ts.tv_sec; - ret.tv_nsec = ts.tv_nsec; - return ret; -} -#endif - -static inline int timespec_equal(const struct timespec *a, - const struct timespec *b) -{ - return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); -} - -/* - * lhs < rhs: return <0 - * lhs == rhs: return 0 - * lhs > rhs: return >0 - */ -static inline int timespec_compare(const struct timespec *lhs, const struct timespec *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_nsec - rhs->tv_nsec; -} - -/* - * Returns true if the timespec is norm, false if denorm: - */ -static inline bool timespec_valid(const struct timespec *ts) -{ - /* Dates before 1970 are bogus */ - if (ts->tv_sec < 0) - return false; - /* Can't have more nanoseconds then a second */ - if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) - return false; - return true; -} - -/** - * timespec_to_ns - Convert timespec to nanoseconds - * @ts: pointer to the timespec variable to be converted - * - * Returns the scalar nanosecond representation of the timespec - * parameter. - */ -static inline s64 timespec_to_ns(const struct timespec *ts) -{ - return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; -} - /** - * ns_to_timespec - Convert nanoseconds to timespec - * @nsec: the nanoseconds value to be converted - * - * Returns the timespec representation of the nsec parameter. - */ -extern struct timespec ns_to_timespec(const s64 nsec); - -/** - * timespec_add_ns - Adds nanoseconds to a timespec - * @a: pointer to timespec to be incremented - * @ns: unsigned nanoseconds value to be added - * - * This must always be inlined because its used from the x86-64 vdso, - * which cannot call other kernel functions. - */ -static __always_inline void timespec_add_ns(struct timespec *a, u64 ns) -{ - a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); - a->tv_nsec = ns; -} - -static inline unsigned long mktime(const unsigned int year, - const unsigned int mon, const unsigned int day, - const unsigned int hour, const unsigned int min, - const unsigned int sec) -{ - return mktime64(year, mon, day, hour, min, sec); -} - -static inline bool timeval_valid(const struct timeval *tv) -{ - /* Dates before 1970 are bogus */ - if (tv->tv_sec < 0) - return false; - - /* Can't have more microseconds then a second */ - if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) - return false; - - return true; -} - -/** - * timeval_to_ns - Convert timeval to nanoseconds - * @ts: pointer to the timeval variable to be converted - * - * Returns the scalar nanosecond representation of the timeval - * parameter. - */ -static inline s64 timeval_to_ns(const struct timeval *tv) -{ - return ((s64) tv->tv_sec * NSEC_PER_SEC) + - tv->tv_usec * NSEC_PER_USEC; -} - -/** - * ns_to_timeval - Convert nanoseconds to timeval + * ns_to_kernel_old_timeval - Convert nanoseconds to timeval * @nsec: the nanoseconds value to be converted * * Returns the timeval representation of the nsec parameter. */ -extern struct timeval ns_to_timeval(const s64 nsec); extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec); -/* - * Old names for the 32-bit time_t interfaces, these will be removed - * when everything uses the new names. - */ -#define compat_time_t old_time32_t -#define compat_timeval old_timeval32 -#define compat_timespec old_timespec32 -#define compat_itimerspec old_itimerspec32 -#define ns_to_compat_timeval ns_to_old_timeval32 -#define get_compat_itimerspec64 get_old_itimerspec32 -#define put_compat_itimerspec64 put_old_itimerspec32 -#define compat_get_timespec64 get_old_timespec32 -#define compat_put_timespec64 put_old_timespec32 - #endif diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h index cc59cc9e0e84..266017fc9ee9 100644 --- a/include/linux/timekeeping32.h +++ b/include/linux/timekeeping32.h @@ -11,36 +11,4 @@ static inline unsigned long get_seconds(void) return ktime_get_real_seconds(); } -static inline void getnstimeofday(struct timespec *ts) -{ - struct timespec64 ts64; - - ktime_get_real_ts64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void ktime_get_ts(struct timespec *ts) -{ - struct timespec64 ts64; - - ktime_get_ts64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void getrawmonotonic(struct timespec *ts) -{ - struct timespec64 ts64; - - ktime_get_raw_ts64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void getboottime(struct timespec *ts) -{ - struct timespec64 ts64; - - getboottime64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - #endif diff --git a/include/linux/types.h b/include/linux/types.h index eb870ad42919..d3021c879179 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -65,11 +65,6 @@ typedef __kernel_ssize_t ssize_t; typedef __kernel_ptrdiff_t ptrdiff_t; #endif -#ifndef _TIME_T -#define _TIME_T -typedef __kernel_old_time_t time_t; -#endif - #ifndef _CLOCK_T #define _CLOCK_T typedef __kernel_clock_t clock_t; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index ec3813236699..0507a162ccd0 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -141,8 +141,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); -void vmalloc_sync_all(void); - +void vmalloc_sync_mappings(void); +void vmalloc_sync_unmappings(void); + /* * Lowlevel-APIs (not for driver use!) */ diff --git a/include/linux/wait.h b/include/linux/wait.h index feeb6be5cad6..55fcd2389d23 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -544,7 +544,7 @@ do { \ ({ \ int __ret = 0; \ might_sleep(); \ - if (!(condition)) \ + if (!(condition) && (timeout)) \ __ret = __wait_event_hrtimeout(wq_head, condition, timeout, \ TASK_UNINTERRUPTIBLE); \ __ret; \ @@ -570,7 +570,7 @@ do { \ ({ \ long __ret = 0; \ might_sleep(); \ - if (!(condition)) \ + if (!(condition) && (timeout)) \ __ret = __wait_event_hrtimeout(wq, condition, timeout, \ TASK_INTERRUPTIBLE); \ __ret; \ diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index a5ab2973e8dc..c37e2280e6dd 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -127,18 +127,43 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_b ); #ifdef CONFIG_MEMCG -DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin, +DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_begin_template, - TP_PROTO(int order, gfp_t gfp_flags), + TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags), - TP_ARGS(order, gfp_flags) + TP_ARGS(cgroup_ino, order, gfp_flags), + + TP_STRUCT__entry( + __field(unsigned int, cgroup_ino) + __field(int, order) + __field(gfp_t, gfp_flags) + ), + + TP_fast_assign( + __entry->cgroup_ino = cgroup_ino; + __entry->order = order; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("cgroup_ino=%u order=%d gfp_flags=%s", + __entry->cgroup_ino, __entry->order, + show_gfp_flags(__entry->gfp_flags)) ); -DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin, +DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template, + mm_vmscan_memcg_reclaim_begin, - TP_PROTO(int order, gfp_t gfp_flags), + TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags), - TP_ARGS(order, gfp_flags) + TP_ARGS(cgroup_ino, order, gfp_flags) +); + +DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template, + mm_vmscan_memcg_softlimit_reclaim_begin, + + TP_PROTO(unsigned int cgroup_ino, int order, gfp_t gfp_flags), + + TP_ARGS(cgroup_ino, order, gfp_flags) ); #endif /* CONFIG_MEMCG */ @@ -167,18 +192,40 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_direct_reclaim_end ); #ifdef CONFIG_MEMCG -DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end, +DECLARE_EVENT_CLASS(mm_vmscan_memcg_reclaim_end_template, - TP_PROTO(unsigned long nr_reclaimed), + TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed), - TP_ARGS(nr_reclaimed) + TP_ARGS(cgroup_ino, nr_reclaimed), + + TP_STRUCT__entry( + __field(unsigned int, cgroup_ino) + __field(unsigned long, nr_reclaimed) + ), + + TP_fast_assign( + __entry->cgroup_ino = cgroup_ino; + __entry->nr_reclaimed = nr_reclaimed; + ), + + TP_printk("cgroup_ino=%u nr_reclaimed=%lu", + __entry->cgroup_ino, __entry->nr_reclaimed) ); -DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end, +DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template, + mm_vmscan_memcg_reclaim_end, - TP_PROTO(unsigned long nr_reclaimed), + TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed), - TP_ARGS(nr_reclaimed) + TP_ARGS(cgroup_ino, nr_reclaimed) +); + +DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template, + mm_vmscan_memcg_softlimit_reclaim_end, + + TP_PROTO(unsigned int cgroup_ino, unsigned long nr_reclaimed), + + TP_ARGS(cgroup_ino, nr_reclaimed) ); #endif /* CONFIG_MEMCG */ diff --git a/include/uapi/asm-generic/posix_types.h b/include/uapi/asm-generic/posix_types.h index 2f9c80595ba7..b5f7594eee7a 100644 --- a/include/uapi/asm-generic/posix_types.h +++ b/include/uapi/asm-generic/posix_types.h @@ -87,7 +87,9 @@ typedef struct { typedef __kernel_long_t __kernel_off_t; typedef long long __kernel_loff_t; typedef __kernel_long_t __kernel_old_time_t; +#ifndef __KERNEL__ typedef __kernel_long_t __kernel_time_t; +#endif typedef long long __kernel_time64_t; typedef __kernel_long_t __kernel_clock_t; typedef int __kernel_timer_t; diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index a655aa28dc6e..4f4b6e48e01c 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <linux/time_types.h> +#ifndef __KERNEL__ #ifndef _STRUCT_TIMESPEC #define _STRUCT_TIMESPEC struct timespec { @@ -18,6 +19,17 @@ struct timeval { __kernel_suseconds_t tv_usec; /* microseconds */ }; +struct itimerspec { + struct timespec it_interval;/* timer period */ + struct timespec it_value; /* timer expiration */ +}; + +struct itimerval { + struct timeval it_interval;/* timer interval */ + struct timeval it_value; /* current value */ +}; +#endif + struct timezone { int tz_minuteswest; /* minutes west of Greenwich */ int tz_dsttime; /* type of dst correction */ @@ -31,16 +43,6 @@ struct timezone { #define ITIMER_VIRTUAL 1 #define ITIMER_PROF 2 -struct itimerspec { - struct timespec it_interval; /* timer period */ - struct timespec it_value; /* timer expiration */ -}; - -struct itimerval { - struct timeval it_interval; /* timer interval */ - struct timeval it_value; /* current value */ -}; - /* * The IDs of the various system clocks (for POSIX.1b interval timers): */ diff --git a/ipc/sem.c b/ipc/sem.c index 4f4303f32077..3687b71151b3 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2384,11 +2384,9 @@ void exit_sem(struct task_struct *tsk) ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); - /* we are the last process using this ulp, acquiring ulp->lock - * isn't required. Besides that, we are also protected against - * IPC_RMID as we hold sma->sem_perm lock now - */ + spin_lock(&ulp->lock); list_del_rcu(&un->list_proc); + spin_unlock(&ulp->lock); /* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { diff --git a/kernel/compat.c b/kernel/compat.c index 95005f849c68..843dd17e6078 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -26,70 +26,6 @@ #include <linux/uaccess.h> -static int __compat_get_timeval(struct timeval *tv, const struct old_timeval32 __user *ctv) -{ - return (!access_ok(ctv, sizeof(*ctv)) || - __get_user(tv->tv_sec, &ctv->tv_sec) || - __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; -} - -static int __compat_put_timeval(const struct timeval *tv, struct old_timeval32 __user *ctv) -{ - return (!access_ok(ctv, sizeof(*ctv)) || - __put_user(tv->tv_sec, &ctv->tv_sec) || - __put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; -} - -static int __compat_get_timespec(struct timespec *ts, const struct old_timespec32 __user *cts) -{ - return (!access_ok(cts, sizeof(*cts)) || - __get_user(ts->tv_sec, &cts->tv_sec) || - __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; -} - -static int __compat_put_timespec(const struct timespec *ts, struct old_timespec32 __user *cts) -{ - return (!access_ok(cts, sizeof(*cts)) || - __put_user(ts->tv_sec, &cts->tv_sec) || - __put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; -} - -int compat_get_timeval(struct timeval *tv, const void __user *utv) -{ - if (COMPAT_USE_64BIT_TIME) - return copy_from_user(tv, utv, sizeof(*tv)) ? -EFAULT : 0; - else - return __compat_get_timeval(tv, utv); -} -EXPORT_SYMBOL_GPL(compat_get_timeval); - -int compat_put_timeval(const struct timeval *tv, void __user *utv) -{ - if (COMPAT_USE_64BIT_TIME) - return copy_to_user(utv, tv, sizeof(*tv)) ? -EFAULT : 0; - else - return __compat_put_timeval(tv, utv); -} -EXPORT_SYMBOL_GPL(compat_put_timeval); - -int compat_get_timespec(struct timespec *ts, const void __user *uts) -{ - if (COMPAT_USE_64BIT_TIME) - return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0; - else - return __compat_get_timespec(ts, uts); -} -EXPORT_SYMBOL_GPL(compat_get_timespec); - -int compat_put_timespec(const struct timespec *ts, void __user *uts) -{ - if (COMPAT_USE_64BIT_TIME) - return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0; - else - return __compat_put_timespec(ts, uts); -} -EXPORT_SYMBOL_GPL(compat_put_timespec); - #ifdef __ARCH_WANT_SYS_SIGPROCMASK /* diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 14a625c16cb3..69f54848af79 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -142,6 +142,47 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) touch_nmi_watchdog(); } +static void check_killed_task(struct task_struct *t, unsigned long timeout) +{ + unsigned long stamp = t->killed_time; + + /* + * Ensure the task is not frozen. + * Also, skip vfork and any other user process that freezer should skip. + */ + if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP))) + return; + /* + * Skip threads which are already inside do_exit(), for exit_mm() etc. + * might take many seconds. + */ + if (t->flags & PF_EXITING) + return; + if (!stamp) { + stamp = jiffies; + if (!stamp) + stamp++; + t->killed_time = stamp; + return; + } + if (time_is_after_jiffies(stamp + timeout * HZ)) + return; + trace_sched_process_hang(t); + if (sysctl_hung_task_panic) { + console_verbose(); + hung_task_call_panic = true; + } + /* + * This thread failed to terminate for more than + * sysctl_hung_task_timeout_secs seconds, complain: + */ + pr_err("INFO: task %s:%d can't die for more than %ld seconds.\n", + t->comm, t->pid, (jiffies - stamp) / HZ); + sched_show_task(t); + hung_task_show_lock = true; + touch_nmi_watchdog(); +} + /* * To avoid extending the RCU grace period for an unbounded amount of time, * periodically exit the critical section and enter a new one. @@ -193,6 +234,9 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) goto unlock; last_break = jiffies; } + /* Check threads which are about to terminate. */ + if (unlikely(fatal_signal_pending(t))) + check_killed_task(t, timeout); /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ if (t->state == TASK_UNINTERRUPTIBLE) check_hung_task(t, timeout); diff --git a/kernel/notifier.c b/kernel/notifier.c index 63d7501ac638..5989bbb93039 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die); int register_die_notifier(struct notifier_block *nb) { - vmalloc_sync_all(); + vmalloc_sync_mappings(); return atomic_notifier_chain_register(&die_chain, nb); } EXPORT_SYMBOL_GPL(register_die_notifier); diff --git a/kernel/relay.c b/kernel/relay.c index ade14fb7ce2e..07ee1a791d85 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -991,14 +991,14 @@ static void relay_file_read_consume(struct rchan_buf *buf, /* * relay_file_read_avail - boolean, are there unconsumed bytes available? */ -static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos) +static int relay_file_read_avail(struct rchan_buf *buf) { size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; size_t produced = buf->subbufs_produced; size_t consumed = buf->subbufs_consumed; - relay_file_read_consume(buf, read_pos, 0); + relay_file_read_consume(buf, 0, 0); consumed = buf->subbufs_consumed; @@ -1059,23 +1059,20 @@ static size_t relay_file_read_subbuf_avail(size_t read_pos, /** * relay_file_read_start_pos - find the first available byte to read - * @read_pos: file read position * @buf: relay channel buffer * - * If the @read_pos is in the middle of padding, return the + * If the read_pos is in the middle of padding, return the * position of the first actually available byte, otherwise * return the original value. */ -static size_t relay_file_read_start_pos(size_t read_pos, - struct rchan_buf *buf) +static size_t relay_file_read_start_pos(struct rchan_buf *buf) { size_t read_subbuf, padding, padding_start, padding_end; size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; size_t consumed = buf->subbufs_consumed % n_subbufs; + size_t read_pos = consumed * subbuf_size + buf->bytes_consumed; - if (!read_pos) - read_pos = consumed * subbuf_size + buf->bytes_consumed; read_subbuf = read_pos / subbuf_size; padding = buf->padding[read_subbuf]; padding_start = (read_subbuf + 1) * subbuf_size - padding; @@ -1131,10 +1128,10 @@ static ssize_t relay_file_read(struct file *filp, do { void *from; - if (!relay_file_read_avail(buf, *ppos)) + if (!relay_file_read_avail(buf)) break; - read_start = relay_file_read_start_pos(*ppos, buf); + read_start = relay_file_read_start_pos(buf); avail = relay_file_read_subbuf_avail(read_start, buf); if (!avail) break; diff --git a/kernel/time/time.c b/kernel/time/time.c index cdd7386115ff..3985b2b32d08 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -449,49 +449,6 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0, } EXPORT_SYMBOL(mktime64); -/** - * ns_to_timespec - Convert nanoseconds to timespec - * @nsec: the nanoseconds value to be converted - * - * Returns the timespec representation of the nsec parameter. - */ -struct timespec ns_to_timespec(const s64 nsec) -{ - struct timespec ts; - s32 rem; - - if (!nsec) - return (struct timespec) {0, 0}; - - ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem); - if (unlikely(rem < 0)) { - ts.tv_sec--; - rem += NSEC_PER_SEC; - } - ts.tv_nsec = rem; - - return ts; -} -EXPORT_SYMBOL(ns_to_timespec); - -/** - * ns_to_timeval - Convert nanoseconds to timeval - * @nsec: the nanoseconds value to be converted - * - * Returns the timeval representation of the nsec parameter. - */ -struct timeval ns_to_timeval(const s64 nsec) -{ - struct timespec ts = ns_to_timespec(nsec); - struct timeval tv; - - tv.tv_sec = ts.tv_sec; - tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000; - - return tv; -} -EXPORT_SYMBOL(ns_to_timeval); - struct __kernel_old_timeval ns_to_kernel_old_timeval(const s64 nsec) { struct timespec64 ts = ns_to_timespec64(nsec); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index fd0c69404a3b..e4676b992eae 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1528,6 +1528,12 @@ config IO_STRICT_DEVMEM menu "$(SRCARCH) Debugging" +config DEBUG_AID_FOR_SYZBOT + bool "Additional debug code for syzbot" + default n + help + This option is intended for testing by syzbot. + source "arch/$(SRCARCH)/Kconfig.debug" endmenu diff --git a/mm/memremap.c b/mm/memremap.c index 09b5b7adc773..5d05bb29ee19 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -56,8 +56,16 @@ static void pgmap_array_delete(struct resource *res) static unsigned long pfn_first(struct dev_pagemap *pgmap) { - return PHYS_PFN(pgmap->res.start) + - vmem_altmap_offset(pgmap_altmap(pgmap)); + const struct resource *res = &pgmap->res; + struct vmem_altmap *altmap = pgmap_altmap(pgmap); + unsigned long pfn; + + if (altmap) + pfn = altmap->base_pfn + vmem_altmap_offset(altmap); + else + pfn = PHYS_PFN(res->start); + + return pfn; } static unsigned long pfn_end(struct dev_pagemap *pgmap) @@ -74,6 +82,26 @@ static unsigned long pfn_next(unsigned long pfn) return pfn + 1; } +/* + * This returns true if the page is reserved by ZONE_DEVICE driver. + */ +bool pfn_zone_device_reserved(unsigned long pfn) +{ + struct dev_pagemap *pgmap; + struct vmem_altmap *altmap; + bool ret = false; + + pgmap = get_dev_pagemap(pfn, NULL); + if (!pgmap) + return ret; + altmap = pgmap_altmap(pgmap); + if (altmap && pfn < (altmap->base_pfn + altmap->reserve)) + ret = true; + put_dev_pagemap(pgmap); + + return ret; +} + #define for_each_device_pfn(pfn, map) \ for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn)) diff --git a/mm/mmap.c b/mm/mmap.c index 6756b8bb0033..7cc2562b99fd 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1852,6 +1852,22 @@ unacct_error: return error; } +static inline unsigned long gap_start_offset(struct vm_unmapped_area_info *info, + unsigned long addr) +{ + /* get gap_start offset to adjust gap address to the + * desired alignment + */ + return (info->align_offset - addr) & info->align_mask; +} + +static inline unsigned long gap_end_offset(struct vm_unmapped_area_info *info, + unsigned long addr) +{ + /* get gap_end offset to adjust gap address to the desired alignment */ + return (addr - info->align_offset) & info->align_mask; +} + unsigned long unmapped_area(struct vm_unmapped_area_info *info) { /* @@ -1866,10 +1882,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info) struct vm_area_struct *vma; unsigned long length, low_limit, high_limit, gap_start, gap_end; - /* Adjust search length to account for worst case alignment overhead */ - length = info->length + info->align_mask; - if (length < info->length) - return -ENOMEM; + length = info->length; /* Adjust search limits by the desired length */ if (info->high_limit < length) @@ -1901,6 +1914,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info) } gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; + gap_start += gap_start_offset(info, gap_start); check_current: /* Check if current node has a suitable gap */ if (gap_start > high_limit) @@ -1929,6 +1943,7 @@ check_current: struct vm_area_struct, vm_rb); if (prev == vma->vm_rb.rb_left) { gap_start = vm_end_gap(vma->vm_prev); + gap_start += gap_start_offset(info, gap_start); gap_end = vm_start_gap(vma); goto check_current; } @@ -1938,17 +1953,17 @@ check_current: check_highest: /* Check highest gap, which does not precede any rbtree node */ gap_start = mm->highest_vm_end; + gap_start += gap_start_offset(info, gap_start); gap_end = ULONG_MAX; /* Only for VM_BUG_ON below */ if (gap_start > high_limit) return -ENOMEM; found: /* We found a suitable gap. Clip it with the original low_limit. */ - if (gap_start < info->low_limit) + if (gap_start < info->low_limit) { gap_start = info->low_limit; - - /* Adjust gap address to the desired alignment */ - gap_start += (info->align_offset - gap_start) & info->align_mask; + gap_start += gap_start_offset(info, gap_start); + } VM_BUG_ON(gap_start + info->length > info->high_limit); VM_BUG_ON(gap_start + info->length > gap_end); @@ -1961,16 +1976,14 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) struct vm_area_struct *vma; unsigned long length, low_limit, high_limit, gap_start, gap_end; - /* Adjust search length to account for worst case alignment overhead */ - length = info->length + info->align_mask; - if (length < info->length) - return -ENOMEM; + length = info->length; /* * Adjust search limits by the desired length. * See implementation comment at top of unmapped_area(). */ gap_end = info->high_limit; + gap_end -= gap_end_offset(info, gap_end); if (gap_end < length) return -ENOMEM; high_limit = gap_end - length; @@ -2007,6 +2020,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) check_current: /* Check if current node has a suitable gap */ gap_end = vm_start_gap(vma); + gap_end -= gap_end_offset(info, gap_end); if (gap_end < low_limit) return -ENOMEM; if (gap_start <= high_limit && @@ -2041,13 +2055,14 @@ check_current: found: /* We found a suitable gap. Clip it with the original high_limit. */ - if (gap_end > info->high_limit) + if (gap_end > info->high_limit) { gap_end = info->high_limit; + gap_end -= gap_end_offset(info, gap_end); + } found_highest: /* Compute highest gap address at the desired alignment */ gap_end -= info->length; - gap_end -= (gap_end - info->align_offset) & info->align_mask; VM_BUG_ON(gap_end < info->low_limit); VM_BUG_ON(gap_end < gap_start); diff --git a/mm/nommu.c b/mm/nommu.c index bd2b4e5ef144..318df4e236c9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -370,10 +370,14 @@ void vm_unmap_aliases(void) EXPORT_SYMBOL_GPL(vm_unmap_aliases); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement a stub for vmalloc_sync_[un]mapping() if the architecture + * chose not to have one. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) +{ +} + +void __weak vmalloc_sync_unmappings(void) { } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index dfc357614e56..d7cc37e3f91d 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -378,36 +378,13 @@ static void select_bad_process(struct oom_control *oc) } } -static int dump_task(struct task_struct *p, void *arg) -{ - struct oom_control *oc = arg; - struct task_struct *task; - - if (oom_unkillable_task(p)) - return 0; - - /* p may not have freeable memory in nodemask */ - if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc)) - return 0; - task = find_lock_task_mm(p); - if (!task) { - /* - * This is a kthread or all of p's threads have already - * detached their mm's. There's no need to report - * them; they can't be oom killed anyway. - */ - return 0; +static int add_candidate_task(struct task_struct *p, void *arg) +{ + if (!oom_unkillable_task(p)) { + get_task_struct(p); + list_add_tail(&p->oom_victim_list, (struct list_head *) arg); } - - pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n", - task->pid, from_kuid(&init_user_ns, task_uid(task)), - task->tgid, task->mm->total_vm, get_mm_rss(task->mm), - mm_pgtables_bytes(task->mm), - get_mm_counter(task->mm, MM_SWAPENTS), - task->signal->oom_score_adj, task->comm); - task_unlock(task); - return 0; } @@ -423,19 +400,41 @@ static int dump_task(struct task_struct *p, void *arg) */ static void dump_tasks(struct oom_control *oc) { - pr_info("Tasks state (memory values in pages):\n"); - pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n"); + LIST_HEAD(list); + struct task_struct *p; + struct task_struct *t; if (is_memcg_oom(oc)) - mem_cgroup_scan_tasks(oc->memcg, dump_task, oc); + mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, &list); else { - struct task_struct *p; - rcu_read_lock(); for_each_process(p) - dump_task(p, oc); + add_candidate_task(p, &list); rcu_read_unlock(); } + pr_info("Tasks state (memory values in pages):\n"); + pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n"); + list_for_each_entry(p, &list, oom_victim_list) { + cond_resched(); + /* p may not have freeable memory in nodemask */ + if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc)) + continue; + /* All of p's threads might have already detached their mm's. */ + t = find_lock_task_mm(p); + if (!t) + continue; + pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n", + t->pid, from_kuid(&init_user_ns, task_uid(t)), + t->tgid, t->mm->total_vm, get_mm_rss(t->mm), + mm_pgtables_bytes(t->mm), + get_mm_counter(t->mm, MM_SWAPENTS), + t->signal->oom_score_adj, t->comm); + task_unlock(t); + } + list_for_each_entry_safe(p, t, &list, oom_victim_list) { + list_del(&p->oom_victim_list); + put_task_struct(p); + } } static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1f46c3b86f9f..6b8eeb0ecee5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1295,7 +1295,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) * First make sure the mappings are removed from all page-tables * before they are freed. */ - vmalloc_sync_all(); + vmalloc_sync_unmappings(); /* * TODO: to calculate a flush range without looping. @@ -3128,16 +3128,19 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, EXPORT_SYMBOL(remap_vmalloc_range); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose + * not to have one. * * The purpose of this function is to make sure the vmalloc area * mappings are identical in all page-tables in the system. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) { } +void __weak vmalloc_sync_unmappings(void) +{ +} static int f(pte_t *pte, unsigned long addr, void *data) { diff --git a/mm/vmscan.c b/mm/vmscan.c index c05eb9efec07..b1863de475fb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3300,8 +3300,10 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); - trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, - sc.gfp_mask); + trace_mm_vmscan_memcg_softlimit_reclaim_begin( + cgroup_ino(memcg->css.cgroup), + sc.order, + sc.gfp_mask); /* * NOTE: Although we can get the priority field, using it @@ -3312,7 +3314,9 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, */ shrink_lruvec(lruvec, &sc); - trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); + trace_mm_vmscan_memcg_softlimit_reclaim_end( + cgroup_ino(memcg->css.cgroup), + sc.nr_reclaimed); *nr_scanned = sc.nr_scanned; @@ -3347,7 +3351,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, set_task_reclaim_state(current, &sc.reclaim_state); - trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask); + trace_mm_vmscan_memcg_reclaim_begin( + cgroup_ino(memcg->css.cgroup), + 0, sc.gfp_mask); psi_memstall_enter(&pflags); noreclaim_flag = memalloc_noreclaim_save(); @@ -3357,7 +3363,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, memalloc_noreclaim_restore(noreclaim_flag); psi_memstall_leave(&pflags); - trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); + trace_mm_vmscan_memcg_reclaim_end( + cgroup_ino(memcg->css.cgroup), + nr_reclaimed); set_task_reclaim_state(current, NULL); return nr_reclaimed; diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a63380c6b0d2..f3b84347c2f3 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -607,6 +607,20 @@ foreach my $entry (keys %deprecated_apis) { } $deprecated_apis_search = "(?:${deprecated_apis_search})"; +our %deprecated_string_apis = ( + "strcpy" => "stracpy or strscpy", + "strlcpy" => "stracpy or strscpy", + "strncpy" => "stracpy or strscpy - for non-NUL-terminated uses, strncpy dest should be __nonstring", +); + +#Create a search pattern for all these strings apis to speed up a loop below +our $deprecated_string_apis_search = ""; +foreach my $entry (keys %deprecated_string_apis) { + $deprecated_string_apis_search .= '|' if ($deprecated_string_apis_search ne ""); + $deprecated_string_apis_search .= $entry; +} +$deprecated_string_apis_search = "(?:${deprecated_string_apis_search})"; + our $mode_perms_world_writable = qr{ S_IWUGO | S_IWOTH | @@ -6480,6 +6494,16 @@ sub process { "Deprecated use of '$deprecated_api', prefer '$new_api' instead\n" . $herecurr); } +# check for string deprecated apis + if ($line =~ /\b($deprecated_string_apis_search)\b\s*\(/) { + my $deprecated_string_api = $1; + my $new_api = $deprecated_string_apis{$deprecated_string_api}; + my $msg_level = \&WARN; + $msg_level = \&CHK if ($file); + &{$msg_level}("DEPRECATED_API", + "Deprecated use of '$deprecated_string_api', prefer '$new_api' instead\n" . $herecurr); + } + # check for various structs that are normally const (ops, kgdb, device_tree) # and avoid what seem like struct definitions 'struct foo {' if ($line !~ /\bconst\b/ && |