diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2020-03-23 18:54:17 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2020-03-23 18:54:17 +1100 |
commit | d8cf9e19701283afc4445ead85aae665bf2d9d84 (patch) | |
tree | b38d2b18f6cb663bb596fe21d63680e497523a76 | |
parent | 47037d8b6a510b312fcc8fef5c916122315b0604 (diff) | |
parent | 36821732f9fba66f267b0b46df1abd3abc1522bb (diff) | |
download | linux-next-d8cf9e19701283afc4445ead85aae665bf2d9d84.tar.gz |
Merge branch 'akpm/master'
145 files changed, 1141 insertions, 421 deletions
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop index 8c6a0b8e1131..9b917c7453de 100644 --- a/Documentation/ABI/testing/sysfs-platform-dell-laptop +++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop @@ -2,7 +2,7 @@ What: /sys/class/leds/dell::kbd_backlight/als_enabled Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>, - Pali Rohár <pali.rohar@gmail.com> + Pali Rohár <pali@kernel.org> Description: This file allows to control the automatic keyboard illumination mode on some systems that have an ambient @@ -13,7 +13,7 @@ What: /sys/class/leds/dell::kbd_backlight/als_setting Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>, - Pali Rohár <pali.rohar@gmail.com> + Pali Rohár <pali@kernel.org> Description: This file allows to specifiy the on/off threshold value, as reported by the ambient light sensor. @@ -22,7 +22,7 @@ What: /sys/class/leds/dell::kbd_backlight/start_triggers Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>, - Pali Rohár <pali.rohar@gmail.com> + Pali Rohár <pali@kernel.org> Description: This file allows to control the input triggers that turn on the keyboard backlight illumination that is @@ -45,7 +45,7 @@ What: /sys/class/leds/dell::kbd_backlight/stop_timeout Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>, - Pali Rohár <pali.rohar@gmail.com> + Pali Rohár <pali@kernel.org> Description: This file allows to specify the interval after which the keyboard illumination is disabled because of inactivity. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 335696d3360d..6dd406758f4a 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -446,6 +446,27 @@ Notes: successful IPC object allocation. If an IPC object allocation syscall fails, it is undefined if the value remains unmodified or is reset to -1. +modprobe: +========= + +The path to the usermode helper for autoloading kernel modules, by +default "/sbin/modprobe". This binary is executed when the kernel +requests a module. For example, if userspace passes an unknown +filesystem type "foo" to mount(), then the kernel will automatically +request the module "fs-foo.ko" by executing this usermode helper. +This usermode helper should insert the needed module into the kernel. + +This sysctl only affects module autoloading. It has no effect on the +ability to explicitly insert modules. + +If this sysctl is set to the empty string, then module autoloading is +completely disabled. The kernel will not try to execute a usermode +helper at all, nor will it call the kernel_module_request LSM hook. + +If CONFIG_STATIC_USERMODEHELPER=y is set in the kernel configuration, +then the configured static usermode helper overrides this sysctl, +except that the empty string is still accepted to completely disable +module autoloading as described above. nmi_watchdog ============ diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt new file mode 100644 index 000000000000..c527d05c0459 --- /dev/null +++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt @@ -0,0 +1,34 @@ +# +# Feature name: debug-vm-pgtable +# Kconfig: ARCH_HAS_DEBUG_VM_PGTABLE +# description: arch supports pgtable tests for semantics compliance +# + ----------------------- + | arch |status| + ----------------------- + | alpha: | TODO | + | arc: | ok | + | arm: | TODO | + | arm64: | ok | + | c6x: | TODO | + | csky: | TODO | + | h8300: | TODO | + | hexagon: | TODO | + | ia64: | TODO | + | m68k: | TODO | + | microblaze: | TODO | + | mips: | TODO | + | nds32: | TODO | + | nios2: | TODO | + | openrisc: | TODO | + | parisc: | TODO | + | powerpc: | ok | + | riscv: | TODO | + | s390: | ok | + | sh: | TODO | + | sparc: | TODO | + | um: | TODO | + | unicore32: | TODO | + | x86: | ok | + | xtensa: | TODO | + ----------------------- diff --git a/MAINTAINERS b/MAINTAINERS index edb89b90c587..07888a277f6f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -726,7 +726,7 @@ L: linux-alpha@vger.kernel.org F: arch/alpha/ ALPS PS/2 TOUCHPAD DRIVER -R: Pali Rohár <pali.rohar@gmail.com> +R: Pali Rohár <pali@kernel.org> F: drivers/input/mouse/alps.* ALTERA I2C CONTROLLER DRIVER @@ -4773,23 +4773,23 @@ F: drivers/net/fddi/defza.* DELL LAPTOP DRIVER M: Matthew Garrett <mjg59@srcf.ucam.org> -M: Pali Rohár <pali.rohar@gmail.com> +M: Pali Rohár <pali@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/dell-laptop.c DELL LAPTOP FREEFALL DRIVER -M: Pali Rohár <pali.rohar@gmail.com> +M: Pali Rohár <pali@kernel.org> S: Maintained F: drivers/platform/x86/dell-smo8800.c DELL LAPTOP RBTN DRIVER -M: Pali Rohár <pali.rohar@gmail.com> +M: Pali Rohár <pali@kernel.org> S: Maintained F: drivers/platform/x86/dell-rbtn.* DELL LAPTOP SMM DRIVER -M: Pali Rohár <pali.rohar@gmail.com> +M: Pali Rohár <pali@kernel.org> S: Maintained F: drivers/hwmon/dell-smm-hwmon.c F: include/uapi/linux/i8k.h @@ -4801,7 +4801,7 @@ S: Maintained F: drivers/platform/x86/dell_rbu.c DELL SMBIOS DRIVER -M: Pali Rohár <pali.rohar@gmail.com> +M: Pali Rohár <pali@kernel.org> M: Mario Limonciello <mario.limonciello@dell.com> L: platform-driver-x86@vger.kernel.org S: Maintained @@ -4834,7 +4834,7 @@ F: drivers/platform/x86/dell-wmi-descriptor.c DELL WMI NOTIFICATIONS DRIVER M: Matthew Garrett <mjg59@srcf.ucam.org> -M: Pali Rohár <pali.rohar@gmail.com> +M: Pali Rohár <pali@kernel.org> S: Maintained F: drivers/platform/x86/dell-wmi.c @@ -11973,7 +11973,7 @@ F: drivers/media/i2c/et8ek8 F: drivers/media/i2c/ad5820.c NOKIA N900 POWER SUPPLY DRIVERS -R: Pali Rohár <pali.rohar@gmail.com> +R: Pali Rohár <pali@kernel.org> F: include/linux/power/bq2415x_charger.h F: include/linux/power/bq27xxx_battery.h F: drivers/power/supply/bq2415x_charger.c diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index f3fb2848470a..e241bd88880f 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -90,9 +90,6 @@ typedef struct page *pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #endif /* CONFIG_DISCONTIGMEM */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 299791ce14b6..0267aa8a4f86 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -268,7 +268,6 @@ extern inline void pud_clear(pud_t * pudp) { pud_val(*pudp) = 0; } extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -extern inline int pte_special(pte_t pte) { return 0; } extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(__DIRTY_BITS); return pte; } @@ -276,7 +275,6 @@ extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~(__ACCESS_BITS); ret extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return pte; } extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } -extern inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index ff306246d0f8..471ef22216c4 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -6,6 +6,7 @@ config ARC def_bool y select ARC_TIMERS + select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 0a32e8cfd074..b0dfed0f12be 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -102,7 +102,7 @@ typedef pte_t * pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) /* Default Permissions for stack/heaps pages (Non Executable) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #define WANT_PAGE_VIRTUAL 1 diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index c2b75cba26df..11b058a72a5b 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -161,9 +161,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include <asm-generic/getorder.h> diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 0d3ea35c97fe..9e084a464a97 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -211,8 +211,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pmd_addr_end(addr,end) (end) #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) -#define pte_special(pte) (0) -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * We don't have huge page support for short descriptors, for the moment diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index eabcb48a7840..556468240ba5 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -252,19 +252,8 @@ static inline void __sync_icache_dcache(pte_t pteval) extern void __sync_icache_dcache(pte_t pteval); #endif -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) -{ - unsigned long ext = 0; - - if (addr < TASK_SIZE && pte_valid_user(pteval)) { - if (!pte_special(pteval)) - __sync_icache_dcache(pteval); - ext |= PTE_EXT_NG; - } - - set_pte_ext(ptep, pteval, ext); -} +void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) { diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c index d00e3c72e37d..f70d561f37f7 100644 --- a/arch/arm/mach-omap2/omap-secure.c +++ b/arch/arm/mach-omap2/omap-secure.c @@ -5,7 +5,7 @@ * Copyright (C) 2011 Texas Instruments, Inc. * Santosh Shilimkar <santosh.shilimkar@ti.com> * Copyright (C) 2012 Ivaylo Dimitrov <freemangordon@abv.bg> - * Copyright (C) 2013 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2013 Pali Rohár <pali@kernel.org> */ #include <linux/arm-smccc.h> diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index ba8c486c0454..4aaa95706d39 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -5,7 +5,7 @@ * Copyright (C) 2011 Texas Instruments, Inc. * Santosh Shilimkar <santosh.shilimkar@ti.com> * Copyright (C) 2012 Ivaylo Dimitrov <freemangordon@abv.bg> - * Copyright (C) 2013 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2013 Pali Rohár <pali@kernel.org> */ #ifndef OMAP_ARCH_OMAP_SECURE_H #define OMAP_ARCH_OMAP_SECURE_H diff --git a/arch/arm/mach-omap2/omap-smc.S b/arch/arm/mach-omap2/omap-smc.S index d4832845a4e8..7376f528034d 100644 --- a/arch/arm/mach-omap2/omap-smc.S +++ b/arch/arm/mach-omap2/omap-smc.S @@ -6,7 +6,7 @@ * Written by Santosh Shilimkar <santosh.shilimkar@ti.com> * * Copyright (C) 2012 Ivaylo Dimitrov <freemangordon@abv.bg> - * Copyright (C) 2013 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2013 Pali Rohár <pali@kernel.org> */ #include <linux/linkage.h> diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index b598e6978b29..2dd5c41cbb8d 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -189,7 +189,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) */ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) { - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) mask = VM_WRITE; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 5d0d0f86e790..16e9b041d7cf 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1672,3 +1672,17 @@ void __init early_mm_init(const struct machine_desc *mdesc) build_mem_type_table(); early_paging_init(mdesc); } + +void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + unsigned long ext = 0; + + if (addr < TASK_SIZE && pte_valid_user(pteval)) { + if (!pte_special(pteval)) + __sync_icache_dcache(pteval); + ext |= PTE_EXT_NG; + } + + set_pte_ext(ptep, pteval, ext); +} diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0736a2b17ec9..1ee1d8fab218 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,7 @@ config ARM64 select ACPI_PPTT if ACPI select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEBUG_VIRTUAL + select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 75d6cd23a679..c01b52add377 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -36,9 +36,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include <asm-generic/getorder.h> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1027851d469a..c9cedc0432d2 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -445,7 +445,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned long vm_flags = VM_ACCESS_FLAGS; unsigned int mm_flags = FAULT_FLAG_DEFAULT; if (kprobe_page_fault(regs, esr)) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6a0e75f48e7b..4888b99fe1f4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1373,7 +1373,7 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) } int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { int ret, flags = 0; @@ -1381,12 +1381,13 @@ int arch_add_memory(int nid, u64 start, u64 size, flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), - size, PAGE_KERNEL, __pgd_pgtable_alloc, flags); + size, params->pgprot, __pgd_pgtable_alloc, + flags); memblock_clear_nomap(start, size); ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, - restrictions); + params); if (ret) __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); diff --git a/arch/c6x/include/asm/page.h b/arch/c6x/include/asm/page.h index 70db1e7632bc..40079899084d 100644 --- a/arch/c6x/include/asm/page.h +++ b/arch/c6x/include/asm/page.h @@ -2,10 +2,7 @@ #ifndef _ASM_C6X_PAGE_H #define _ASM_C6X_PAGE_H -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include <asm-generic/page.h> diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 9738eacefdc7..9b98bf31d57c 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -85,9 +85,6 @@ extern unsigned long va_pa_offset; PHYS_OFFSET_OFFSET) #define virt_to_page(x) (mem_map + MAP_NR(x)) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #define pfn_to_kaddr(x) __va(PFN_PHYS(x)) #include <asm-generic/memory_model.h> diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index 9b7764cb7645..9ab4a445ad99 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -110,9 +110,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern void load_pgd(unsigned long pg_dir); extern pte_t invalid_pte_table[PTRS_PER_PTE]; -static inline int pte_special(pte_t pte) { return 0; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - static inline void set_pte(pte_t *p, pte_t pte) { *p = pte; diff --git a/arch/h8300/include/asm/page.h b/arch/h8300/include/asm/page.h index 8da5124ad344..53e037544239 100644 --- a/arch/h8300/include/asm/page.h +++ b/arch/h8300/include/asm/page.h @@ -6,8 +6,6 @@ #include <linux/types.h> #define MAP_NR(addr) (((uintptr_t)(addr)-PAGE_OFFSET) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #ifndef __ASSEMBLY__ extern unsigned long rom_length; diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h index ee31f36f48f3..7cbf719c578e 100644 --- a/arch/hexagon/include/asm/page.h +++ b/arch/hexagon/include/asm/page.h @@ -93,8 +93,7 @@ struct page; #define virt_to_page(kaddr) pfn_to_page(PFN_DOWN(__pa(kaddr))) /* Default vm area behavior is non-executable. */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #define pfn_valid(pfn) ((pfn) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 2fec20ad939e..d383e8bea5b2 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -158,8 +158,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* located in head.S */ /* Seems to be zero even in architectures where the zero page is firewalled? */ #define FIRST_USER_ADDRESS 0UL -#define pte_special(pte) 0 -#define pte_mkspecial(pte) (pte) /* HUGETLB not working currently */ #ifdef CONFIG_HUGETLB_PAGE diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h index 5798bd2b462c..b69a5499d75b 100644 --- a/arch/ia64/include/asm/page.h +++ b/arch/ia64/include/asm/page.h @@ -218,10 +218,7 @@ get_order (unsigned long size) #define PAGE_OFFSET RGN_BASE(RGN_KERNEL) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | \ - (((current->personality & READ_IMPLIES_EXEC) != 0) \ - ? VM_EXEC : 0)) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define GATE_ADDR RGN_BASE(RGN_GATE) diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index d602e7c622db..0e7b645b76c6 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -298,7 +298,6 @@ extern unsigned long VMALLOC_END; #define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) #define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) #define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) -#define pte_special(pte) 0 /* * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the @@ -311,7 +310,6 @@ extern unsigned long VMALLOC_END; #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) #define pte_mkhuge(pte) (__pte(pte_val(pte))) -#define pte_mkspecial(pte) (pte) /* * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b01d68a2d5d9..d637b4ea3147 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -670,13 +670,16 @@ mem_init (void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + return -EINVAL; + + ret = __add_pages(nid, start_pfn, nr_pages, params); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index b9f45aeded25..0031cd387b75 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -235,11 +235,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & CF_PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~CF_PAGE_WRITABLE; @@ -312,11 +307,6 @@ static inline pte_t pte_mkcache(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} - #define swapper_pg_dir kernel_pg_dir extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 4b91a470ad58..48f19f0ab1e7 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -174,7 +174,6 @@ static inline void pud_set(pud_t *pudp, pmd_t *pmdp) static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RONLY); } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } @@ -192,7 +191,6 @@ static inline pte_t pte_mkcache(pte_t pte) pte_val(pte) = (pte_val(pte) & _CACHEMASK040) | m68k_supervisor_cachemode; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h index da546487e177..2614a1206f2f 100644 --- a/arch/m68k/include/asm/page.h +++ b/arch/m68k/include/asm/page.h @@ -65,9 +65,6 @@ extern unsigned long _ramend; #define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) #define __pfn_to_phys(pfn) PFN_PHYS(pfn) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include <asm-generic/getorder.h> #endif /* _M68K_PAGE_H */ diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h index bc4155264810..0caa18a08437 100644 --- a/arch/m68k/include/asm/sun3_pgtable.h +++ b/arch/m68k/include/asm/sun3_pgtable.h @@ -155,7 +155,6 @@ static inline void pmd_clear (pmd_t *pmdp) { pmd_val (*pmdp) = 0; } static inline int pte_write(pte_t pte) { return pte_val(pte) & SUN3_PAGE_WRITEABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_MODIFIED; return pte; } @@ -168,7 +167,6 @@ static inline pte_t pte_mknocache(pte_t pte) { pte_val(pte) |= SUN3_PAGE_NOCACHE //static inline pte_t pte_mkcache(pte_t pte) { pte_val(pte) &= SUN3_PAGE_NOCACHE; return pte; } // until then, use: static inline pte_t pte_mkcache(pte_t pte) { return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index ae7215c94706..b13463d39b38 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -194,8 +194,6 @@ extern int page_is_ram(unsigned long pfn); #ifdef CONFIG_MMU -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #endif /* CONFIG_MMU */ #endif /* __KERNEL__ */ diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 45b30878fc17..6b056f6545d8 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -77,10 +77,6 @@ extern pte_t *va_to_pte(unsigned long address); * Undefined behaviour if not.. */ -static inline int pte_special(pte_t pte) { return 0; } - -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - /* Start and end of the vmalloc area. */ /* Make sure to map the vmalloc area above the pinned kernel memory area of 32Mb. */ diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 0ba4ce6e2bf3..e2f503fc7a84 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -253,10 +253,7 @@ extern bool __virt_addr_valid(const volatile void *kaddr); #define virt_addr_valid(kaddr) \ __virt_addr_valid((const volatile void *) (kaddr)) -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index aef5378f909c..f1801e7a4b15 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -270,6 +270,36 @@ cache_sync_done: extern pgd_t swapper_pg_dir[]; /* + * Platform specific pte_special() and pte_mkspecial() definitions + * are required only when ARCH_HAS_PTE_SPECIAL is enabled. + */ +#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) +#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) +static inline int pte_special(pte_t pte) +{ + return pte.pte_low & _PAGE_SPECIAL; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte.pte_low |= _PAGE_SPECIAL; + return pte; +} +#else +static inline int pte_special(pte_t pte) +{ + return pte_val(pte) & _PAGE_SPECIAL; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= _PAGE_SPECIAL; + return pte; +} +#endif +#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ + +/* * The following only work if pte_present() is true. * Undefined behaviour if not.. */ @@ -277,7 +307,6 @@ extern pgd_t swapper_pg_dir[]; static inline int pte_write(pte_t pte) { return pte.pte_low & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte.pte_low & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return pte.pte_low & _PAGE_SPECIAL; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -338,17 +367,10 @@ static inline pte_t pte_mkyoung(pte_t pte) } return pte; } - -static inline pte_t pte_mkspecial(pte_t pte) -{ - pte.pte_low |= _PAGE_SPECIAL; - return pte; -} #else static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -392,12 +414,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) -{ - pte_val(pte) |= _PAGE_SPECIAL; - return pte; -} - #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h index 86b32014c5f9..add33a7f02c8 100644 --- a/arch/nds32/include/asm/page.h +++ b/arch/nds32/include/asm/page.h @@ -59,9 +59,6 @@ typedef struct page *pgtable_t; #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #endif /* __KERNEL__ */ #endif diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 6abc58ac406d..476cc4dd1709 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -286,15 +286,6 @@ PTE_BIT_FUNC(mkclean, &=~_PAGE_D); PTE_BIT_FUNC(mkdirty, |=_PAGE_D); PTE_BIT_FUNC(mkold, &=~_PAGE_YOUNG); PTE_BIT_FUNC(mkyoung, |=_PAGE_YOUNG); -static inline int pte_special(pte_t pte) -{ - return 0; -} - -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} /* * Mark the prot value as uncacheable and unbufferable. diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 0cf0c08c7da2..f331e533edc2 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -79,7 +79,7 @@ void do_page_fault(unsigned long entry, unsigned long addr, struct vm_area_struct *vma; int si_code; vm_fault_t fault; - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; unsigned int flags = FAULT_FLAG_DEFAULT; error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE); diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h index 79fcac61f6ef..6a989819a7c1 100644 --- a/arch/nios2/include/asm/page.h +++ b/arch/nios2/include/asm/page.h @@ -98,8 +98,7 @@ static inline bool pfn_valid(unsigned long pfn) # define virt_to_page(vaddr) pfn_to_page(PFN_DOWN(virt_to_phys(vaddr))) # define virt_addr_valid(vaddr) pfn_valid(PFN_DOWN(virt_to_phys(vaddr))) -# define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +# define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include <asm-generic/memory_model.h> diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 99985d8b7166..f98b7f4519ba 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -113,7 +113,6 @@ static inline int pte_dirty(pte_t pte) \ { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) \ { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } #define pgprot_noncached pgprot_noncached @@ -168,8 +167,6 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h index 01069db59454..aab6e64d6db4 100644 --- a/arch/openrisc/include/asm/page.h +++ b/arch/openrisc/include/asm/page.h @@ -86,11 +86,6 @@ typedef struct page *pgtable_t; #endif /* __ASSEMBLY__ */ - -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - - #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 248d22d8faa7..7f3fb9ceb083 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -236,8 +236,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline pte_t pte_wrprotect(pte_t pte) { diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 796ae29e9b9a..6b3f6740a6a6 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -180,9 +180,6 @@ extern int npmem_ranges; #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> #include <asm/pdc.h> diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index f0a365950536..9832c73a7021 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -377,7 +377,6 @@ static inline void pud_clear(pud_t *pud) { static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } @@ -385,7 +384,6 @@ static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; ret static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Huge pte definitions. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a1a1c15a5023..b27518347a46 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -116,6 +116,7 @@ config PPC # select ARCH_32BIT_OFF_T if PPC32 select ARCH_HAS_DEBUG_VIRTUAL + select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 2781ebf6add4..6fc4520092c7 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -251,7 +251,8 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start, extern void hash__vmemmap_remove_mapping(unsigned long start, unsigned long page_size); -int hash__create_section_mapping(unsigned long start, unsigned long end, int nid); +int hash__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); int hash__remove_section_mapping(unsigned long start, unsigned long end); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index d97db3ad9aae..46799f3c3d1d 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -289,7 +289,8 @@ static inline unsigned long radix__get_tree_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int radix__create_section_mapping(unsigned long start, unsigned long end, int nid); +int radix__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); int radix__remove_section_mapping(unsigned long start, unsigned long end); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 080a0bf8e54b..3ee8df0f66e0 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -240,13 +240,8 @@ static inline bool pfn_valid(unsigned long pfn) * and needs to be executable. This means the whole heap ends * up being executable. */ -#define VM_DATA_DEFAULT_FLAGS32 \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS32 VM_DATA_FLAGS_TSK_EXEC +#define VM_DATA_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC #ifdef __powerpc64__ #include <asm/page_64.h> diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 5962797f784a..79a9b7c6a132 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -94,11 +94,8 @@ extern u64 ppc64_pft_size; * stack by default, so in the absence of a PT_GNU_STACK program header * we turn execute permission off. */ -#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS32 VM_DATA_FLAGS_EXEC +#define VM_STACK_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC #define VM_STACK_DEFAULT_FLAGS \ (is_32bit_task() ? \ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 3192d454a733..c89b32443cff 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -13,7 +13,8 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, int nid); +extern int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7e5714a69a58..8ed2411c3f39 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -809,7 +809,8 @@ int resize_hpt_for_hotplug(unsigned long new_mem_size) return 0; } -int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) +int hash__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { int rc; @@ -819,7 +820,7 @@ int hash__create_section_mapping(unsigned long start, unsigned long end, int nid } rc = htab_bolt_mapping(start, end, __pa(start), - pgprot_val(PAGE_KERNEL), mmu_linear_psize, + pgprot_val(prot), mmu_linear_psize, mmu_kernel_ssize); if (rc < 0) { diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 2bf7e1b4fd82..e0bb69c616e4 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -171,12 +171,13 @@ void mmu_cleanup_all(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid) +int __meminit create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { if (radix_enabled()) - return radix__create_section_mapping(start, end, nid); + return radix__create_section_mapping(start, end, nid, prot); - return hash__create_section_mapping(start, end, nid); + return hash__create_section_mapping(start, end, nid, prot); } int __meminit remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 07527f1ed108..1199fc2bfaec 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -315,7 +315,7 @@ int __execute_only_pkey(struct mm_struct *mm) static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) { /* Do this check first since the vm_flags should be hot */ - if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) return false; return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 2a9a0cd79490..8f9edf07063a 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -254,7 +254,7 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end) static int __meminit create_physical_mapping(unsigned long start, unsigned long end, - int nid) + int nid, pgprot_t _prot) { unsigned long vaddr, addr, mapping_size = 0; bool prev_exec, exec = false; @@ -290,7 +290,7 @@ static int __meminit create_physical_mapping(unsigned long start, prot = PAGE_KERNEL_X; exec = true; } else { - prot = PAGE_KERNEL; + prot = _prot; exec = false; } @@ -334,7 +334,7 @@ static void __init radix_init_pgtable(void) WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size, - -1)); + -1, PAGE_KERNEL)); } /* Find out how many PID bits are supported */ @@ -713,8 +713,10 @@ static int __meminit stop_machine_change_mapping(void *data) spin_unlock(&init_mm.page_table_lock); pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(__pa(params->aligned_start), __pa(params->start), -1); - create_physical_mapping(__pa(params->end), __pa(params->aligned_end), -1); + create_physical_mapping(__pa(params->aligned_start), + __pa(params->start), -1, PAGE_KERNEL); + create_physical_mapping(__pa(params->end), __pa(params->aligned_end), + -1, PAGE_KERNEL); spin_lock(&init_mm.page_table_lock); return 0; } @@ -871,14 +873,16 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) radix__flush_tlb_kernel_range(start, end); } -int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) +int __meminit radix__create_section_mapping(unsigned long start, + unsigned long end, int nid, + pgprot_t prot) { if (end >= RADIX_VMALLOC_START) { pr_warn("Outside the supported range\n"); return -1; } - return create_physical_mapping(__pa(start), __pa(end), nid); + return create_physical_mapping(__pa(start), __pa(end), nid, prot); } int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9b4f5fb719e0..041ed7cfd341 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -90,7 +90,8 @@ int memory_add_physaddr_to_nid(u64 start) } #endif -int __weak create_section_mapping(unsigned long start, unsigned long end, int nid) +int __weak create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { return -ENODEV; } @@ -122,7 +123,7 @@ static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, } int __ref arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -131,14 +132,15 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, resize_hpt_for_hotplug(memblock_phys_mem_size()); start = (unsigned long)__va(start); - rc = create_section_mapping(start, start + size, nid); + rc = create_section_mapping(start, start + size, nid, + params->pgprot); if (rc) { pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); return -EFAULT; } - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } void __ref arch_remove_memory(int nid, u64 start, u64 size, diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 8ca1930caa44..2d50f76efe48 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -137,8 +137,7 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 082cfa26ebb4..539f1fdc1f56 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -59,6 +59,7 @@ config KASAN_SHADOW_OFFSET config S390 def_bool y select ARCH_BINFMT_ELF_STATE + select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index f2d4c1bd3429..cc98f9b78fd4 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -181,8 +181,7 @@ int arch_make_page_accessible(struct page *page); #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 28c9ebf9a9fb..e070553f6807 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -581,7 +581,7 @@ void do_dat_exception(struct pt_regs *regs) int access; vm_fault_t fault; - access = VM_READ | VM_EXEC | VM_WRITE; + access = VM_ACCESS_FLAGS; fault = do_exception(regs, access); if (unlikely(fault)) do_fault_error(regs, access, fault); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ac44bd76db4b..87b2d024e75a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -268,20 +268,23 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); int rc; - if (WARN_ON_ONCE(restrictions->altmap)) + if (WARN_ON_ONCE(params->altmap)) + return -EINVAL; + + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) return -EINVAL; rc = vmem_add_mapping(start, size); if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, restrictions); + rc = __add_pages(nid, start_pfn, size_pages, params); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index 5eef8be3e59f..ea8d68f58e39 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -182,9 +182,6 @@ typedef struct page *pgtable_t; #endif #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index d1b1ff2be17a..b9de2d4fa57e 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -406,14 +406,17 @@ void __init mem_init(void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot) + return -EINVAL; + /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h index b76d59edec8c..478260002836 100644 --- a/arch/sparc/include/asm/page_32.h +++ b/arch/sparc/include/asm/page_32.h @@ -133,9 +133,6 @@ extern unsigned long pfn_base; #define pfn_valid(pfn) (((pfn) >= (pfn_base)) && (((pfn)-(pfn_base)) < max_mapnr)) #define virt_addr_valid(kaddr) ((((unsigned long)(kaddr)-PAGE_OFFSET)>>PAGE_SHIFT) < max_mapnr) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index e80f2d5bf62f..254dffd85fb1 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -158,9 +158,6 @@ extern unsigned long PAGE_OFFSET; #endif /* !(__ASSEMBLY__) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include <asm-generic/getorder.h> #endif /* _SPARC64_PAGE_H */ diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 6d6f44c0cad9..0de659ae0ba4 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -223,11 +223,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & SRMMU_REF; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - static inline pte_t pte_wrprotect(pte_t pte) { return __pte(pte_val(pte) & ~SRMMU_WRITE); @@ -258,8 +253,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return __pte(pte_val(pte) | SRMMU_REF); } -#define pte_mkspecial(pte) (pte) - #define pfn_pte(pfn, prot) mk_pte(pfn_to_page(pfn), prot) static inline unsigned long pte_pfn(pte_t pte) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 65494c3a420e..da527b27cf7d 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -907,11 +907,11 @@ static inline unsigned long pud_pfn(pud_t pud) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))) /* Find an entry in the third-level page table.. */ -#define pte_index(dir, address) \ - ((pte_t *) __pmd_page(*(dir)) + \ - ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) -#define pte_offset_kernel pte_index -#define pte_offset_map pte_index +#define pte_index(address) \ + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir, address) \ + ((pte_t *) __pmd_page(*(dir)) + pte_index(address)) +#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_unmap(pte) do { } while (0) /* We cannot include <linux/mm_types.h> at this point yet: */ diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 2daa58df2190..b5ddf5d98bd5 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -167,11 +167,6 @@ static inline int pte_newprot(pte_t pte) return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); } -static inline int pte_special(pte_t pte) -{ - return 0; -} - /* * ================================= * Flags setting section. @@ -247,11 +242,6 @@ static inline pte_t pte_mknewpage(pte_t pte) return(pte); } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return(pte); -} - static inline void set_pte(pte_t *pteptr, pte_t pteval) { pte_copy(*pteptr, pteval); diff --git a/arch/unicore32/include/asm/page.h b/arch/unicore32/include/asm/page.h index 8a89335673f9..96d6bdf180bd 100644 --- a/arch/unicore32/include/asm/page.h +++ b/arch/unicore32/include/asm/page.h @@ -69,9 +69,6 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include <asm-generic/getorder.h> #endif diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index c8f7ba12f309..3b8731b3a937 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -177,7 +177,6 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & PTE_YOUNG) #define pte_exec(pte) (pte_val(pte) & PTE_EXEC) -#define pte_special(pte) (0) #define PTE_BIT_FUNC(fn, op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } @@ -189,8 +188,6 @@ PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY); PTE_BIT_FUNC(mkold, &= ~PTE_YOUNG); PTE_BIT_FUNC(mkyoung, |= PTE_YOUNG); -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - /* * Mark the prot value as uncacheable. */ diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index a9bd08fbe588..3022104aa613 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -149,7 +149,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) */ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) { - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; if (!(fsr ^ 0x12)) /* write? */ mask = VM_WRITE; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fa8fd092bbcd..73df1cd59538 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select ARCH_CLOCKSOURCE_INIT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_DEBUG_VIRTUAL + select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c85e15010f48..a506a411474d 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -35,9 +35,7 @@ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ CONFIG_PHYSICAL_ALIGN) @@ -73,9 +71,6 @@ static inline phys_addr_t get_max_mapped(void) bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - extern void initmem_init(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index dfcaf5540065..998260c972a6 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -866,7 +866,10 @@ static inline unsigned long pmd_index(unsigned long address) * * this function returns the index of the entry in the pte page which would * control the given virtual address + * + * Also define macro so we can test if pte_index is defined for arch. */ +#define pte_index pte_index static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); @@ -1084,6 +1087,9 @@ static inline void __meminit init_trampoline_default(void) void __init poking_init(void); +unsigned long init_memory_mapping(unsigned long start, + unsigned long end, pgprot_t prot); + # ifdef CONFIG_RANDOMIZE_MEMORY void __meminit init_trampoline(void); # else diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index df1373415f11..8d03ffd43794 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -53,6 +53,12 @@ static inline void sync_initial_page_table(void) { } struct mm_struct; +#define mm_p4d_folded mm_p4d_folded +static inline bool mm_p4d_folded(struct mm_struct *mm) +{ + return !pgtable_l5_enabled(); +} + void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 64c3dce374e5..034358da4837 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -34,6 +34,7 @@ * The caller is required to take care of these. */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wt(unsigned long addr, int numpages); diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 4e5f50236048..16133819415c 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -744,7 +744,8 @@ int __init gart_iommu_init(void) start_pfn = PFN_DOWN(aper_base); if (!pfn_range_is_mapped(start_pfn, end_pfn)) - init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); + init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT, + PAGE_KERNEL); pr_info("PCI-DMA: using GART IOMMU.\n"); iommu_size = check_iommu_size(info.aper_base, aper_size); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e7bb483557c9..1bba16c5742b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -467,7 +467,7 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) * the physical memory. To access them they are temporarily mapped. */ unsigned long __ref init_memory_mapping(unsigned long start, - unsigned long end) + unsigned long end, pgprot_t prot) { struct map_range mr[NR_RANGE_MR]; unsigned long ret = 0; @@ -481,7 +481,8 @@ unsigned long __ref init_memory_mapping(unsigned long start, for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, - mr[i].page_size_mask); + mr[i].page_size_mask, + prot); add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); @@ -521,7 +522,7 @@ static unsigned long __init init_range_memory_mapping( */ can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >= min(end, (u64)pgt_buf_top<<PAGE_SHIFT); - init_memory_mapping(start, end); + init_memory_mapping(start, end, PAGE_KERNEL); mapped_ram_size += end - start; can_use_brk_pgt = true; } @@ -661,7 +662,7 @@ void __init init_mem_mapping(void) #endif /* the ISA range is always mapped regardless of memory holes */ - init_memory_mapping(0, ISA_END_ADDRESS); + init_memory_mapping(0, ISA_END_ADDRESS, PAGE_KERNEL); /* Init the trampoline, possibly with KASLR memory offset */ init_trampoline(); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index de73992b8432..4222a010057a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -257,7 +257,8 @@ static inline int __is_kernel_text(unsigned long addr) unsigned long __init kernel_physical_mapping_init(unsigned long start, unsigned long end, - unsigned long page_size_mask) + unsigned long page_size_mask, + pgprot_t prot) { int use_pse = page_size_mask == (1<<PG_LEVEL_2M); unsigned long last_map_addr = end; @@ -819,12 +820,24 @@ void __init mem_init(void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; - return __add_pages(nid, start_pfn, nr_pages, restrictions); + /* + * The page tables were already mapped at boot so if the caller + * requests a different mapping type then we must change all the + * pages with __set_memory_prot(). + */ + if (params->pgprot.pgprot != PAGE_KERNEL.pgprot) { + ret = __set_memory_prot(start, nr_pages, params->pgprot); + if (ret) + return ret; + } + + return __add_pages(nid, start_pfn, nr_pages, params); } void arch_remove_memory(int nid, u64 start, u64 size, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0a14711d3a93..3b289c2f75cd 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -585,7 +585,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, */ static unsigned long __meminit phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, - unsigned long page_size_mask, bool init) + unsigned long page_size_mask, pgprot_t _prot, bool init) { unsigned long pages = 0, paddr_next; unsigned long paddr_last = paddr_end; @@ -595,7 +595,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) { pud_t *pud; pmd_t *pmd; - pgprot_t prot = PAGE_KERNEL; + pgprot_t prot = _prot; vaddr = (unsigned long)__va(paddr); pud = pud_page + pud_index(vaddr); @@ -644,9 +644,12 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<<PG_LEVEL_1G)) { pages++; spin_lock(&init_mm.page_table_lock); + + prot = __pgprot(pgprot_val(prot) | __PAGE_KERNEL_LARGE); + set_pte_init((pte_t *)pud, pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, - PAGE_KERNEL_LARGE), + prot), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -669,7 +672,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, static unsigned long __meminit phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, - unsigned long page_size_mask, bool init) + unsigned long page_size_mask, pgprot_t prot, bool init) { unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; @@ -679,7 +682,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, - page_size_mask, init); + page_size_mask, prot, init); for (; vaddr < vaddr_end; vaddr = vaddr_next) { p4d_t *p4d = p4d_page + p4d_index(vaddr); @@ -702,13 +705,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!p4d_none(*p4d)) { pud = pud_offset(p4d, 0); paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); continue; } pud = alloc_low_page(); paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); p4d_populate_init(&init_mm, p4d, pud, init); @@ -722,7 +725,7 @@ static unsigned long __meminit __kernel_physical_mapping_init(unsigned long paddr_start, unsigned long paddr_end, unsigned long page_size_mask, - bool init) + pgprot_t prot, bool init) { bool pgd_changed = false; unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; @@ -743,13 +746,13 @@ __kernel_physical_mapping_init(unsigned long paddr_start, paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), page_size_mask, - init); + prot, init); continue; } p4d = alloc_low_page(); paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); if (pgtable_l5_enabled()) @@ -778,10 +781,10 @@ __kernel_physical_mapping_init(unsigned long paddr_start, unsigned long __meminit kernel_physical_mapping_init(unsigned long paddr_start, unsigned long paddr_end, - unsigned long page_size_mask) + unsigned long page_size_mask, pgprot_t prot) { return __kernel_physical_mapping_init(paddr_start, paddr_end, - page_size_mask, true); + page_size_mask, prot, true); } /* @@ -796,7 +799,8 @@ kernel_physical_mapping_change(unsigned long paddr_start, unsigned long page_size_mask) { return __kernel_physical_mapping_init(paddr_start, paddr_end, - page_size_mask, false); + page_size_mask, PAGE_KERNEL, + false); } #ifndef CONFIG_NUMA @@ -843,11 +847,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) } int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -858,14 +862,14 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, } int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - init_memory_mapping(start, start + size); + init_memory_mapping(start, start + size, params->pgprot); - return add_pages(nid, start_pfn, nr_pages, restrictions); + return add_pages(nid, start_pfn, nr_pages, params); } #define PAGE_INUSE 0xFD diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index eeae142062ed..3f37b5c80bb3 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -12,7 +12,8 @@ void early_ioremap_page_table_range_init(void); unsigned long kernel_physical_mapping_init(unsigned long start, unsigned long end, - unsigned long page_size_mask); + unsigned long page_size_mask, + pgprot_t prot); unsigned long kernel_physical_mapping_change(unsigned long start, unsigned long end, unsigned long page_size_mask); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index ea0b6df950ee..5b219d6a2bdb 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1792,6 +1792,19 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages, CPA_PAGES_ARRAY, pages); } +/* + * _set_memory_prot is an internal helper for callers that have been passed + * a pgprot_t value from upper layers and a reservation has already been taken. + * If you want to set the pgprot to a specific page protocol, use the + * set_memory_xx() functions. + */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot) +{ + return change_page_attr_set_clr(&addr, numpages, prot, + __pgprot(~pgprot_val(prot)), 0, 0, + NULL); +} + int _set_memory_uc(unsigned long addr, int numpages) { /* diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index c6f84c0b5d7a..8873ed1438a9 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -63,7 +63,7 @@ int __execute_only_pkey(struct mm_struct *mm) static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) { /* Do this check first since the vm_flags should be hot */ - if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) return false; if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey) return false; diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 607f58147311..c60255da5a6c 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -352,7 +352,8 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, if (type == EFI_MEMORY_MAPPED_IO) return ioremap(phys_addr, size); - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size, + PAGE_KERNEL); if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { unsigned long top = last_map_pfn << PAGE_SHIFT; efi_ioremap(top, size - (top - phys_addr), type, attribute); diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h index 7c297e9e2413..df7a3896f5dd 100644 --- a/arch/x86/um/asm/vm-flags.h +++ b/arch/x86/um/asm/vm-flags.h @@ -9,17 +9,11 @@ #ifdef CONFIG_X86_32 -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #else -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \ - VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_DATA_FLAGS_EXEC) #endif #endif diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index f4771c29c7e9..37ce25ef92d6 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -203,8 +203,5 @@ static inline unsigned long ___pa(unsigned long va) #endif /* __ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include <asm-generic/memory_model.h> #endif /* _XTENSA_PAGE_H */ diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 27ac17c9da09..8be0c0568c50 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -266,7 +266,6 @@ static inline void paging_init(void) { } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~(_PAGE_WRITABLE | _PAGE_HW_WRITE); return pte; } @@ -280,8 +279,6 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITABLE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) - { return pte; } #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CA_MASK)) diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index a431c5cbe2be..e0d77fa048fb 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -4,7 +4,7 @@ * Copyright (C) 2009 Nokia Corporation * Author: Juha Yrjola <juha.yrjola@solidboot.com> * - * Copyright (C) 2013 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2013 Pali Rohár <pali@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -178,5 +178,5 @@ module_platform_driver(omap3_rom_rng_driver); MODULE_ALIAS("platform:omap3-rom-rng"); MODULE_AUTHOR("Juha Yrjola"); -MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>"); +MODULE_AUTHOR("Pali Rohár <pali@kernel.org>"); MODULE_LICENSE("GPL"); diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 1a9b37c102ba..4e295d121be6 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -1472,7 +1472,6 @@ static int tegra_dma_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; - dev_err(&pdev->dev, "No irq resource for chan %d\n", i); goto err_pm_disable; } diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index d4c83009d625..ab719d372b0d 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -7,7 +7,7 @@ * Hwmon integration: * Copyright (C) 2011 Jean Delvare <jdelvare@suse.de> * Copyright (C) 2013, 2014 Guenter Roeck <linux@roeck-us.net> - * Copyright (C) 2014, 2015 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2014, 2015 Pali Rohár <pali@kernel.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -86,7 +86,7 @@ static unsigned int auto_fan; #define I8K_HWMON_HAVE_FAN3 (1 << 12) MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)"); -MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>"); +MODULE_AUTHOR("Pali Rohár <pali@kernel.org>"); MODULE_DESCRIPTION("Dell laptop SMM BIOS hwmon driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("i8k"); diff --git a/drivers/media/platform/sti/delta/delta-ipc.c b/drivers/media/platform/sti/delta/delta-ipc.c index 186d88f02ecd..371429d81ea1 100644 --- a/drivers/media/platform/sti/delta/delta-ipc.c +++ b/drivers/media/platform/sti/delta/delta-ipc.c @@ -175,8 +175,8 @@ int delta_ipc_open(struct delta_ctx *pctx, const char *name, msg.ipc_buf_size = ipc_buf_size; msg.ipc_buf_paddr = ctx->ipc_buf->paddr; - memcpy(msg.name, name, sizeof(msg.name)); - msg.name[sizeof(msg.name) - 1] = 0; + memset(msg.name, 0, sizeof(msg.name)); + strcpy(msg.name, name); msg.param_size = param->size; memcpy(ctx->ipc_buf->vaddr, param->data, msg.param_size); diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 74e988f839e8..f8d3e3bd1bb5 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat <mjg@redhat.com> * Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com> - * Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com> + * Copyright (c) 2014 Pali Rohár <pali@kernel.org> * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. @@ -2295,6 +2295,6 @@ module_exit(dell_exit); MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>"); MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>"); -MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>"); +MODULE_AUTHOR("Pali Rohár <pali@kernel.org>"); MODULE_DESCRIPTION("Dell laptop driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-rbtn.c b/drivers/platform/x86/dell-rbtn.c index a6b856cd86bd..a89fad47ff13 100644 --- a/drivers/platform/x86/dell-rbtn.c +++ b/drivers/platform/x86/dell-rbtn.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* Dell Airplane Mode Switch driver - Copyright (C) 2014-2015 Pali Rohár <pali.rohar@gmail.com> + Copyright (C) 2014-2015 Pali Rohár <pali@kernel.org> */ @@ -495,5 +495,5 @@ MODULE_PARM_DESC(auto_remove_rfkill, "Automatically remove rfkill devices when " "(default true)"); MODULE_DEVICE_TABLE(acpi, rbtn_ids); MODULE_DESCRIPTION("Dell Airplane Mode Switch driver"); -MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>"); +MODULE_AUTHOR("Pali Rohár <pali@kernel.org>"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-rbtn.h b/drivers/platform/x86/dell-rbtn.h index 0fdc81644458..5e030f926c58 100644 --- a/drivers/platform/x86/dell-rbtn.h +++ b/drivers/platform/x86/dell-rbtn.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Dell Airplane Mode Switch driver - Copyright (C) 2014-2015 Pali Rohár <pali.rohar@gmail.com> + Copyright (C) 2014-2015 Pali Rohár <pali@kernel.org> */ diff --git a/drivers/platform/x86/dell-smbios-base.c b/drivers/platform/x86/dell-smbios-base.c index fe59b0ebff31..2e2cd565926a 100644 --- a/drivers/platform/x86/dell-smbios-base.c +++ b/drivers/platform/x86/dell-smbios-base.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat <mjg@redhat.com> * Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com> - * Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com> + * Copyright (c) 2014 Pali Rohár <pali@kernel.org> * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. @@ -645,7 +645,7 @@ module_exit(dell_smbios_exit); MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>"); MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>"); -MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>"); +MODULE_AUTHOR("Pali Rohár <pali@kernel.org>"); MODULE_AUTHOR("Mario Limonciello <mario.limonciello@dell.com>"); MODULE_DESCRIPTION("Common functions for kernel modules using Dell SMBIOS"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-smbios-smm.c b/drivers/platform/x86/dell-smbios-smm.c index d6854d1c4119..97c52a839a3e 100644 --- a/drivers/platform/x86/dell-smbios-smm.c +++ b/drivers/platform/x86/dell-smbios-smm.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat <mjg@redhat.com> * Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com> - * Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com> + * Copyright (c) 2014 Pali Rohár <pali@kernel.org> * Copyright (c) 2017 Dell Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/platform/x86/dell-smbios.h b/drivers/platform/x86/dell-smbios.h index a7ff9803f41a..75fa8ea0476d 100644 --- a/drivers/platform/x86/dell-smbios.h +++ b/drivers/platform/x86/dell-smbios.h @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat <mjg@redhat.com> * Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com> - * Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com> + * Copyright (c) 2014 Pali Rohár <pali@kernel.org> * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. diff --git a/drivers/platform/x86/dell-smo8800.c b/drivers/platform/x86/dell-smo8800.c index bfcc1d1b9b96..b96ea6142290 100644 --- a/drivers/platform/x86/dell-smo8800.c +++ b/drivers/platform/x86/dell-smo8800.c @@ -3,7 +3,7 @@ * dell-smo8800.c - Dell Latitude ACPI SMO88XX freefall sensor driver * * Copyright (C) 2012 Sonal Santan <sonal.santan@gmail.com> - * Copyright (C) 2014 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2014 Pali Rohár <pali@kernel.org> * * This is loosely based on lis3lv02d driver. */ diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index 6669db2555fb..86e8dd6a8b33 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -3,7 +3,7 @@ * Dell WMI hotkeys * * Copyright (C) 2008 Red Hat <mjg@redhat.com> - * Copyright (C) 2014-2015 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2014-2015 Pali Rohár <pali@kernel.org> * * Portions based on wistron_btns.c: * Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz> @@ -29,7 +29,7 @@ #include "dell-wmi-descriptor.h" MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>"); -MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>"); +MODULE_AUTHOR("Pali Rohár <pali@kernel.org>"); MODULE_DESCRIPTION("Dell laptop WMI hotkeys driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/power/supply/bq2415x_charger.c b/drivers/power/supply/bq2415x_charger.c index 532f6e4fcafb..a1f00ae1c180 100644 --- a/drivers/power/supply/bq2415x_charger.c +++ b/drivers/power/supply/bq2415x_charger.c @@ -2,7 +2,7 @@ /* * bq2415x charger driver * - * Copyright (C) 2011-2013 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2011-2013 Pali Rohár <pali@kernel.org> * * Datasheets: * http://www.ti.com/product/bq24150 @@ -1788,6 +1788,6 @@ static struct i2c_driver bq2415x_driver = { }; module_i2c_driver(bq2415x_driver); -MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>"); +MODULE_AUTHOR("Pali Rohár <pali@kernel.org>"); MODULE_DESCRIPTION("bq2415x charger driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 664e50103eaa..942c92127b6d 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -4,7 +4,7 @@ * Copyright (C) 2008 Rodolfo Giometti <giometti@linux.it> * Copyright (C) 2008 Eurotech S.p.A. <info@eurotech.it> * Copyright (C) 2010-2011 Lars-Peter Clausen <lars@metafoo.de> - * Copyright (C) 2011 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2011 Pali Rohár <pali@kernel.org> * Copyright (C) 2017 Liam Breck <kernel@networkimprov.net> * * Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc. diff --git a/drivers/power/supply/isp1704_charger.c b/drivers/power/supply/isp1704_charger.c index 4812ac1ff2df..b6efc454e4f0 100644 --- a/drivers/power/supply/isp1704_charger.c +++ b/drivers/power/supply/isp1704_charger.c @@ -3,7 +3,7 @@ * ISP1704 USB Charger Detection driver * * Copyright (C) 2010 Nokia Corporation - * Copyright (C) 2012 - 2013 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2012 - 2013 Pali Rohár <pali@kernel.org> */ #include <linux/kernel.h> diff --git a/drivers/power/supply/rx51_battery.c b/drivers/power/supply/rx51_battery.c index 8548b639ff2f..6e488ecf4dcb 100644 --- a/drivers/power/supply/rx51_battery.c +++ b/drivers/power/supply/rx51_battery.c @@ -2,7 +2,7 @@ /* * Nokia RX-51 battery driver * - * Copyright (C) 2012 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2012 Pali Rohár <pali@kernel.org> */ #include <linux/module.h> @@ -278,6 +278,6 @@ static struct platform_driver rx51_battery_driver = { module_platform_driver(rx51_battery_driver); MODULE_ALIAS("platform:rx51-battery"); -MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>"); +MODULE_AUTHOR("Pali Rohár <pali@kernel.org>"); MODULE_DESCRIPTION("Nokia RX-51 battery driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/staging/gasket/gasket_core.c b/drivers/staging/gasket/gasket_core.c index cd181a64f737..8e0575fcb4c8 100644 --- a/drivers/staging/gasket/gasket_core.c +++ b/drivers/staging/gasket/gasket_core.c @@ -689,7 +689,7 @@ static bool gasket_mmap_has_permissions(struct gasket_dev *gasket_dev, /* Make sure that no wrong flags are set. */ requested_permissions = - (vma->vm_flags & (VM_WRITE | VM_READ | VM_EXEC)); + (vma->vm_flags & VM_ACCESS_FLAGS); if (requested_permissions & ~(bar_permissions)) { dev_dbg(gasket_dev->dev, "Attempting to map a region with requested permissions 0x%x, but region has permissions 0x%x.\n", diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index c073aa7001c4..0641a72a8d66 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1345,7 +1345,7 @@ static int sci_dma_rx_submit(struct sci_port *s, bool port_lock_held) { struct dma_chan *chan = s->chan_rx; struct uart_port *port = &s->port; - unsigned long flags; + unsigned long uninitialized_var(flags); int i; for (i = 0; i < 2; i++) { diff --git a/fs/filesystems.c b/fs/filesystems.c index 77bf5f95362d..90b8d879fbaf 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -272,7 +272,9 @@ struct file_system_type *get_fs_type(const char *name) fs = __get_fs_type(name, len); if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); - WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); + if (!fs) + pr_warn_once("request_module fs-%.*s succeeded, but still no fs?\n", + len, name); } if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { diff --git a/fs/seq_file.c b/fs/seq_file.c index 79781ebd2145..70f5fdf99bf6 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -232,9 +232,12 @@ Fill: loff_t pos = m->index; p = m->op->next(m, p, &m->index); - if (pos == m->index) - /* Buggy ->next function */ + if (pos == m->index) { + pr_info_ratelimited("buggy seq_file .next function %ps " + "did not updated position index\n", + m->op->next); m->index++; + } if (!p || IS_ERR(p)) { err = PTR_ERR(p); break; diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h index 3fd85464abd5..736ebc5dc441 100644 --- a/fs/udf/ecma_167.h +++ b/fs/udf/ecma_167.h @@ -5,7 +5,7 @@ * http://www.ecma.ch * * Copyright (c) 2001-2002 Ben Fennema - * Copyright (c) 2017-2019 Pali Rohár <pali.rohar@gmail.com> + * Copyright (c) 2017-2019 Pali Rohár <pali@kernel.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h index 35e61b2cacfe..d5fbfab3ddb6 100644 --- a/fs/udf/osta_udf.h +++ b/fs/udf/osta_udf.h @@ -5,7 +5,7 @@ * http://www.osta.org * * Copyright (c) 2001-2004 Ben Fennema - * Copyright (c) 2017-2019 Pali Rohár <pali.rohar@gmail.com> + * Copyright (c) 2017-2019 Pali Rohár <pali@kernel.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1b4150ff64be..b97e6a94ea12 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -533,7 +533,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, struct mem_cgroup_per_node *mz; mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return mz->lru_zone_size[zone_idx][lru]; + return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } void mem_cgroup_handle_over_high(void); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ef55115320fb..93d9ada74ddd 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -58,13 +58,14 @@ enum { }; /* - * Restrictions for the memory hotplug: - * flags: MHP_ flags - * altmap: alternative allocator for memmap array + * Extended parameters for memory hotplug: + * altmap: alternative allocator for memmap array (optional) + * pgprot: page protection flags to apply to newly created page tables + * (required) */ -struct mhp_restrictions { - unsigned long flags; +struct mhp_params { struct vmem_altmap *altmap; + pgprot_t pgprot; }; /* @@ -114,7 +115,7 @@ extern int restore_online_page_callback(online_page_callback_t callback); extern int try_online_node(int nid); extern int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions); + struct mhp_params *params); extern u64 max_mem_size; extern int memhp_online_type_from_str(const char *str); @@ -135,17 +136,17 @@ extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions); + struct mhp_params *params); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, struct mhp_restrictions *restrictions) + unsigned long nr_pages, struct mhp_params *params) { - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } #else /* ARCH_HAS_ADD_PAGES */ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions); + struct mhp_params *params); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA diff --git a/include/linux/mm.h b/include/linux/mm.h index e80427d24f94..bfd79d21cfd5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -346,6 +346,20 @@ extern unsigned int kobjsize(const void *objp); /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) +#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) + +/* Common data flag combinations */ +#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ + VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC +#endif + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif @@ -358,6 +372,10 @@ extern unsigned int kobjsize(const void *objp); #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) +/* VMA basic access permission flags */ +#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) + + /* * Special vmas that are non-mergable, non-mlock()able. */ @@ -610,7 +628,7 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) static inline bool vma_is_accessible(struct vm_area_struct *vma) { - return vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC); + return vma->vm_flags & VM_ACCESS_FLAGS; } static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) @@ -1034,6 +1052,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); static inline enum zone_type page_zonenum(const struct page *page) { + ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } @@ -1911,6 +1930,18 @@ static inline void sync_mm_rss(struct mm_struct *mm) } #endif +#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL +static inline int pte_special(pte_t pte) +{ + return 0; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} +#endif + #ifndef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t pte) { @@ -2692,6 +2723,8 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num); int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num); int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 2ad72d2c8cc5..5339aa14b749 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -64,4 +64,9 @@ void dump_mm(const struct mm_struct *mm); #define VM_BUG_ON_PGFLAGS(cond, page) BUILD_BUG_ON_INVALID(cond) #endif +#ifdef CONFIG_DEBUG_VM_PGTABLE +void debug_vm_pgtable(void); +#else +static inline void debug_vm_pgtable(void) { } +#endif #endif diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h index 7a91b357e3ac..4ca08321e251 100644 --- a/include/linux/power/bq2415x_charger.h +++ b/include/linux/power/bq2415x_charger.h @@ -2,7 +2,7 @@ /* * bq2415x charger driver * - * Copyright (C) 2011-2013 Pali Rohár <pali.rohar@gmail.com> + * Copyright (C) 2011-2013 Pali Rohár <pali@kernel.org> */ #ifndef BQ2415X_CHARGER_H diff --git a/include/linux/sched.h b/include/linux/sched.h index 7fefafdc5f7c..983389c3c26d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -794,9 +794,12 @@ struct task_struct { unsigned frozen:1; #endif #ifdef CONFIG_BLK_CGROUP - /* to be used once the psi infrastructure lands upstream. */ unsigned use_memdelay:1; #endif +#ifdef CONFIG_PSI + /* Stalled due to lack of memory */ + unsigned in_memstall:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ @@ -1503,7 +1506,6 @@ extern struct pid *cad_pid; #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ diff --git a/init/main.c b/init/main.c index 6d45af80389e..345a9ab4450f 100644 --- a/init/main.c +++ b/init/main.c @@ -54,6 +54,7 @@ #include <linux/delayacct.h> #include <linux/unistd.h> #include <linux/utsname.h> +#include <linux/mmdebug.h> #include <linux/rmap.h> #include <linux/mempolicy.h> #include <linux/key.h> @@ -1360,6 +1361,7 @@ static int __ref kernel_init(void *unused) kernel_init_freeable(); /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); + debug_vm_pgtable(); ftrace_free_init_mem(); free_initmem(); mark_readonly(); diff --git a/ipc/util.c b/ipc/util.c index 97638eb2d7cb..7acccfded7cb 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -764,13 +764,13 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, total++; } + *new_pos = pos + 1; if (total >= ids->in_use) return NULL; for (; pos < ipc_mni; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { - *new_pos = pos + 1; rcu_read_lock(); ipc_lock_object(ipc); return ipc; diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 5e891c3c2d93..82babf5aa077 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -108,9 +108,9 @@ static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos) { struct gcov_iterator *iter = data; + (*pos)++; if (gcov_iter_next(iter)) return NULL; - (*pos)++; return iter; } diff --git a/kernel/kmod.c b/kernel/kmod.c index bc6addd9152b..a2de58de6ab6 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -120,7 +120,7 @@ out: * invoke it. * * If module auto-loading support is disabled then this function - * becomes a no-operation. + * simply returns -ENOENT. */ int __request_module(bool wait, const char *fmt, ...) { @@ -137,7 +137,7 @@ int __request_module(bool wait, const char *fmt, ...) WARN_ON_ONCE(wait && current_is_async()); if (!modprobe_path[0]) - return 0; + return -ENOENT; va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 028520702717..0a16aac538f2 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -818,17 +818,17 @@ void psi_memstall_enter(unsigned long *flags) if (static_branch_likely(&psi_disabled)) return; - *flags = current->flags & PF_MEMSTALL; + *flags = current->in_memstall; if (*flags) return; /* - * PF_MEMSTALL setting & accounting needs to be atomic wrt + * in_memstall setting & accounting needs to be atomic wrt * changes to the task's scheduling state, otherwise we can * race with CPU migration. */ rq = this_rq_lock_irq(&rf); - current->flags |= PF_MEMSTALL; + current->in_memstall = 1; psi_task_change(current, 0, TSK_MEMSTALL); rq_unlock_irq(rq, &rf); @@ -851,13 +851,13 @@ void psi_memstall_leave(unsigned long *flags) if (*flags) return; /* - * PF_MEMSTALL clearing & accounting needs to be atomic wrt + * in_memstall clearing & accounting needs to be atomic wrt * changes to the task's scheduling state, otherwise we could * race with CPU migration. */ rq = this_rq_lock_irq(&rf); - current->flags &= ~PF_MEMSTALL; + current->in_memstall = 0; psi_task_change(current, TSK_MEMSTALL, 0); rq_unlock_irq(rq, &rf); @@ -921,7 +921,7 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to) else if (task->in_iowait) task_flags = TSK_IOWAIT; - if (task->flags & PF_MEMSTALL) + if (task->in_memstall) task_flags |= TSK_MEMSTALL; if (task_flags) diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index ba683fe81a6e..199e304a86d5 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -70,7 +70,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup) return; if (!wakeup || p->sched_psi_wake_requeue) { - if (p->flags & PF_MEMSTALL) + if (p->in_memstall) set |= TSK_MEMSTALL; if (p->sched_psi_wake_requeue) p->sched_psi_wake_requeue = 0; @@ -90,7 +90,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep) return; if (!sleep) { - if (p->flags & PF_MEMSTALL) + if (p->in_memstall) clear |= TSK_MEMSTALL; } else { if (p->in_iowait) @@ -109,14 +109,14 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) * deregister its sleep-persistent psi states from the old * queue, and let psi_enqueue() know it has to requeue. */ - if (unlikely(p->in_iowait || (p->flags & PF_MEMSTALL))) { + if (unlikely(p->in_iowait || p->in_memstall)) { struct rq_flags rf; struct rq *rq; int clear = 0; if (p->in_iowait) clear |= TSK_IOWAIT; - if (p->flags & PF_MEMSTALL) + if (p->in_memstall) clear |= TSK_MEMSTALL; rq = __task_rq_lock(p, &rf); @@ -131,7 +131,7 @@ static inline void psi_task_tick(struct rq *rq) if (static_branch_likely(&psi_disabled)) return; - if (unlikely(rq->curr->flags & PF_MEMSTALL)) + if (unlikely(rq->curr->in_memstall)) psi_memstall_tick(rq->curr, cpu_of(rq)); } #else /* CONFIG_PSI */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 77937c56c7cf..56a42ea2e884 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -653,6 +653,12 @@ config SCHED_STACK_END_CHECK data corruption or a sporadic crash at a later stage once the region is examined. The runtime overhead introduced is minimal. +config ARCH_HAS_DEBUG_VM_PGTABLE + bool + help + An architecture should select this when it can successfully + build and run DEBUG_VM_PGTABLE. + config DEBUG_VM bool "Debug VM" depends on DEBUG_KERNEL @@ -688,6 +694,26 @@ config DEBUG_VM_PGFLAGS If unsure, say N. +config DEBUG_VM_PGTABLE + bool "Debug arch page table for semantics compliance" + depends on MMU + depends on !IA64 && !ARM + depends on ARCH_HAS_DEBUG_VM_PGTABLE || EXPERT + default n if !ARCH_HAS_DEBUG_VM_PGTABLE + default y if DEBUG_VM + help + This option provides a debug method which can be used to test + architecture page table helper functions on various platforms in + verifying if they comply with expected generic MM semantics. This + will help architecture code in making sure that any changes or + new additions of these helpers still conform to expected + semantics of the generic MM. Platforms will have to opt in for + this through ARCH_HAS_DEBUG_VM_PGTABLE. Although it can also be + enabled through EXPERT without requiring code change. This test + is disabled on IA64 and ARM platforms where it fails to build. + + If unsure, say N. + config ARCH_HAS_DEBUG_VIRTUAL bool diff --git a/mm/Makefile b/mm/Makefile index 7881b8ede627..fa91e963c2f9 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o +obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o obj-$(CONFIG_PAGE_OWNER) += page_owner.o obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c new file mode 100644 index 000000000000..98990a515268 --- /dev/null +++ b/mm/debug_vm_pgtable.c @@ -0,0 +1,392 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This kernel test validates architecture page table helpers and + * accessors and helps in verifying their continued compliance with + * expected generic MM semantics. + * + * Copyright (C) 2019 ARM Ltd. + * + * Author: Anshuman Khandual <anshuman.khandual@arm.com> + */ +#define pr_fmt(fmt) "debug_vm_pgtable: %s: " fmt, __func__ + +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/hugetlb.h> +#include <linux/kernel.h> +#include <linux/kconfig.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/mm_types.h> +#include <linux/module.h> +#include <linux/pfn_t.h> +#include <linux/printk.h> +#include <linux/random.h> +#include <linux/spinlock.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/start_kernel.h> +#include <linux/sched/mm.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> + +/* + * Basic operations + * + * mkold(entry) = An old and not a young entry + * mkyoung(entry) = A young and not an old entry + * mkdirty(entry) = A dirty and not a clean entry + * mkclean(entry) = A clean and not a dirty entry + * mkwrite(entry) = A write and not a write protected entry + * wrprotect(entry) = A write protected and not a write entry + * pxx_bad(entry) = A mapped and non-table entry + * pxx_same(entry1, entry2) = Both entries hold the exact same value + */ +#define VMFLAGS (VM_READ|VM_WRITE|VM_EXEC) + +/* + * On s390 platform, the lower 4 bits are used to identify given page table + * entry type. But these bits might affect the ability to clear entries with + * pxx_clear() because of how dynamic page table folding works on s390. So + * while loading up the entries do not change the lower 4 bits. It does not + * have affect any other platform. + */ +#define S390_MASK_BITS 4 +#define RANDOM_ORVALUE GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS) +#define RANDOM_NZVALUE GENMASK(7, 0) + +static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot) +{ + pte_t pte = pfn_pte(pfn, prot); + + WARN_ON(!pte_same(pte, pte)); + WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte)))); + WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte)))); + WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte)))); + WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte)))); + WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte)))); + WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte)))); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot) +{ + pmd_t pmd = pfn_pmd(pfn, prot); + + WARN_ON(!pmd_same(pmd, pmd)); + WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd)))); + WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd)))); + WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd)))); + WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd)))); + WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd)))); + WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd)))); + /* + * A huge page does not point to next level page table + * entry. Hence this must qualify as pmd_bad(). + */ + WARN_ON(!pmd_bad(pmd_mkhuge(pmd))); +} + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) +{ + pud_t pud = pfn_pud(pfn, prot); + + WARN_ON(!pud_same(pud, pud)); + WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud)))); + WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud)))); + WARN_ON(pud_write(pud_wrprotect(pud_mkwrite(pud)))); + WARN_ON(pud_young(pud_mkold(pud_mkyoung(pud)))); + + if (mm_pmd_folded(mm)) + return; + + /* + * A huge page does not point to next level page table + * entry. Hence this must qualify as pud_bad(). + */ + WARN_ON(!pud_bad(pud_mkhuge(pud))); +} +#else +static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { } +#endif +#else +static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot) { } +static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { } +#endif + +static void __init p4d_basic_tests(unsigned long pfn, pgprot_t prot) +{ + p4d_t p4d; + + memset(&p4d, RANDOM_NZVALUE, sizeof(p4d_t)); + WARN_ON(!p4d_same(p4d, p4d)); +} + +static void __init pgd_basic_tests(unsigned long pfn, pgprot_t prot) +{ + pgd_t pgd; + + memset(&pgd, RANDOM_NZVALUE, sizeof(pgd_t)); + WARN_ON(!pgd_same(pgd, pgd)); +} + +#ifndef __PAGETABLE_PUD_FOLDED +static void __init pud_clear_tests(struct mm_struct *mm, pud_t *pudp) +{ + pud_t pud = READ_ONCE(*pudp); + + if (mm_pmd_folded(mm)) + return; + + pud = __pud(pud_val(pud) | RANDOM_ORVALUE); + WRITE_ONCE(*pudp, pud); + pud_clear(pudp); + pud = READ_ONCE(*pudp); + WARN_ON(!pud_none(pud)); +} + +static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp, + pmd_t *pmdp) +{ + pud_t pud; + + if (mm_pmd_folded(mm)) + return; + /* + * This entry points to next level page table page. + * Hence this must not qualify as pud_bad(). + */ + pmd_clear(pmdp); + pud_clear(pudp); + pud_populate(mm, pudp, pmdp); + pud = READ_ONCE(*pudp); + WARN_ON(pud_bad(pud)); +} +#else +static void __init pud_clear_tests(struct mm_struct *mm, pud_t *pudp) { } +static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp, + pmd_t *pmdp) +{ +} +#endif + +#ifndef __PAGETABLE_P4D_FOLDED +static void __init p4d_clear_tests(struct mm_struct *mm, p4d_t *p4dp) +{ + p4d_t p4d = READ_ONCE(*p4dp); + + if (mm_pud_folded(mm)) + return; + + p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE); + WRITE_ONCE(*p4dp, p4d); + p4d_clear(p4dp); + p4d = READ_ONCE(*p4dp); + WARN_ON(!p4d_none(p4d)); +} + +static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp, + pud_t *pudp) +{ + p4d_t p4d; + + if (mm_pud_folded(mm)) + return; + + /* + * This entry points to next level page table page. + * Hence this must not qualify as p4d_bad(). + */ + pud_clear(pudp); + p4d_clear(p4dp); + p4d_populate(mm, p4dp, pudp); + p4d = READ_ONCE(*p4dp); + WARN_ON(p4d_bad(p4d)); +} + +static void __init pgd_clear_tests(struct mm_struct *mm, pgd_t *pgdp) +{ + pgd_t pgd = READ_ONCE(*pgdp); + + if (mm_p4d_folded(mm)) + return; + + pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE); + WRITE_ONCE(*pgdp, pgd); + pgd_clear(pgdp); + pgd = READ_ONCE(*pgdp); + WARN_ON(!pgd_none(pgd)); +} + +static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp, + p4d_t *p4dp) +{ + pgd_t pgd; + + if (mm_p4d_folded(mm)) + return; + + /* + * This entry points to next level page table page. + * Hence this must not qualify as pgd_bad(). + */ + p4d_clear(p4dp); + pgd_clear(pgdp); + pgd_populate(mm, pgdp, p4dp); + pgd = READ_ONCE(*pgdp); + WARN_ON(pgd_bad(pgd)); +} +#else +static void __init p4d_clear_tests(struct mm_struct *mm, p4d_t *p4dp) { } +static void __init pgd_clear_tests(struct mm_struct *mm, pgd_t *pgdp) { } +static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp, + pud_t *pudp) +{ +} +static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp, + p4d_t *p4dp) +{ +} +#endif + +static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep, + unsigned long vaddr) +{ + pte_t pte = READ_ONCE(*ptep); + + pte = __pte(pte_val(pte) | RANDOM_ORVALUE); + set_pte_at(mm, vaddr, ptep, pte); + barrier(); + pte_clear(mm, vaddr, ptep); + pte = READ_ONCE(*ptep); + WARN_ON(!pte_none(pte)); +} + +static void __init pmd_clear_tests(struct mm_struct *mm, pmd_t *pmdp) +{ + pmd_t pmd = READ_ONCE(*pmdp); + + pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE); + WRITE_ONCE(*pmdp, pmd); + pmd_clear(pmdp); + pmd = READ_ONCE(*pmdp); + WARN_ON(!pmd_none(pmd)); +} + +static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + pmd_t pmd; + + /* + * This entry points to next level page table page. + * Hence this must not qualify as pmd_bad(). + */ + pmd_clear(pmdp); + pmd_populate(mm, pmdp, pgtable); + pmd = READ_ONCE(*pmdp); + WARN_ON(pmd_bad(pmd)); +} + +static unsigned long __init get_random_vaddr(void) +{ + unsigned long random_vaddr, random_pages, total_user_pages; + + total_user_pages = (TASK_SIZE - FIRST_USER_ADDRESS) / PAGE_SIZE; + + random_pages = get_random_long() % total_user_pages; + random_vaddr = FIRST_USER_ADDRESS + random_pages * PAGE_SIZE; + + return random_vaddr; +} + +void __init debug_vm_pgtable(void) +{ + struct mm_struct *mm; + pgd_t *pgdp; + p4d_t *p4dp, *saved_p4dp; + pud_t *pudp, *saved_pudp; + pmd_t *pmdp, *saved_pmdp, pmd; + pte_t *ptep; + pgtable_t saved_ptep; + pgprot_t prot; + phys_addr_t paddr; + unsigned long vaddr, pte_aligned, pmd_aligned; + unsigned long pud_aligned, p4d_aligned, pgd_aligned; + spinlock_t *uninitialized_var(ptl); + + pr_info("Validating architecture page table helpers\n"); + prot = vm_get_page_prot(VMFLAGS); + vaddr = get_random_vaddr(); + mm = mm_alloc(); + if (!mm) { + pr_err("mm_struct allocation failed\n"); + return; + } + + /* + * PFN for mapping at PTE level is determined from a standard kernel + * text symbol. But pfns for higher page table levels are derived by + * masking lower bits of this real pfn. These derived pfns might not + * exist on the platform but that does not really matter as pfn_pxx() + * helpers will still create appropriate entries for the test. This + * helps avoid large memory block allocations to be used for mapping + * at higher page table levels. + */ + paddr = __pa_symbol(&start_kernel); + + pte_aligned = (paddr & PAGE_MASK) >> PAGE_SHIFT; + pmd_aligned = (paddr & PMD_MASK) >> PAGE_SHIFT; + pud_aligned = (paddr & PUD_MASK) >> PAGE_SHIFT; + p4d_aligned = (paddr & P4D_MASK) >> PAGE_SHIFT; + pgd_aligned = (paddr & PGDIR_MASK) >> PAGE_SHIFT; + WARN_ON(!pfn_valid(pte_aligned)); + + pgdp = pgd_offset(mm, vaddr); + p4dp = p4d_alloc(mm, pgdp, vaddr); + pudp = pud_alloc(mm, p4dp, vaddr); + pmdp = pmd_alloc(mm, pudp, vaddr); + ptep = pte_alloc_map_lock(mm, pmdp, vaddr, &ptl); + + /* + * Save all the page table page addresses as the page table + * entries will be used for testing with random or garbage + * values. These saved addresses will be used for freeing + * page table pages. + */ + pmd = READ_ONCE(*pmdp); + saved_p4dp = p4d_offset(pgdp, 0UL); + saved_pudp = pud_offset(p4dp, 0UL); + saved_pmdp = pmd_offset(pudp, 0UL); + saved_ptep = pmd_pgtable(pmd); + + pte_basic_tests(pte_aligned, prot); + pmd_basic_tests(pmd_aligned, prot); + pud_basic_tests(pud_aligned, prot); + p4d_basic_tests(p4d_aligned, prot); + pgd_basic_tests(pgd_aligned, prot); + + pte_clear_tests(mm, ptep, vaddr); + pmd_clear_tests(mm, pmdp); + pud_clear_tests(mm, pudp); + p4d_clear_tests(mm, p4dp); + pgd_clear_tests(mm, pgdp); + + pte_unmap_unlock(ptep, ptl); + + pmd_populate_tests(mm, pmdp, saved_ptep); + pud_populate_tests(mm, pudp, saved_pmdp); + p4d_populate_tests(mm, p4dp, saved_pudp); + pgd_populate_tests(mm, pgdp, saved_p4dp); + + p4d_free(mm, saved_p4dp); + pud_free(mm, saved_pudp); + pmd_free(mm, saved_pmdp); + pte_free(mm, saved_ptep); + + mm_dec_nr_puds(mm); + mm_dec_nr_pmds(mm); + mm_dec_nr_ptes(mm); + mmdrop(mm); +} diff --git a/mm/filemap.c b/mm/filemap.c index 477fc9b0b9d6..80f7e1ae744c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2376,6 +2376,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; pgoff_t offset = vmf->pgoff; + unsigned int mmap_miss; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ) @@ -2391,14 +2392,15 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) } /* Avoid banging the cache line if not needed */ - if (ra->mmap_miss < MMAP_LOTSAMISS * 10) - ra->mmap_miss++; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss < MMAP_LOTSAMISS * 10) + WRITE_ONCE(ra->mmap_miss, ++mmap_miss); /* * Do we miss much more than hit in this file? If so, * stop bothering with read-ahead. It will only hurt. */ - if (ra->mmap_miss > MMAP_LOTSAMISS) + if (mmap_miss > MMAP_LOTSAMISS) return fpin; /* @@ -2424,13 +2426,15 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, struct file_ra_state *ra = &file->f_ra; struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; + unsigned int mmap_miss; pgoff_t offset = vmf->pgoff; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; - if (ra->mmap_miss > 0) - ra->mmap_miss--; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss) + WRITE_ONCE(ra->mmap_miss, --mmap_miss); if (PageReadahead(page)) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); page_cache_async_readahead(mapping, ra, file, @@ -2597,6 +2601,7 @@ void filemap_map_pages(struct vm_fault *vmf, unsigned long max_idx; XA_STATE(xas, &mapping->i_pages, start_pgoff); struct page *page; + unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); rcu_read_lock(); xas_for_each(&xas, page, end_pgoff) { @@ -2633,8 +2638,8 @@ void filemap_map_pages(struct vm_fault *vmf, if (page->index >= max_idx) goto unlock; - if (file->f_ra.mmap_miss > 0) - file->f_ra.mmap_miss--; + if (mmap_miss > 0) + mmap_miss--; vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT; if (vmf->pte) @@ -2654,6 +2659,7 @@ next: break; } rcu_read_unlock(); + WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); } EXPORT_SYMBOL(filemap_map_pages); diff --git a/mm/frontswap.c b/mm/frontswap.c index 60bb20e8a951..b8c14f298332 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -61,16 +61,16 @@ static u64 frontswap_failed_stores; static u64 frontswap_invalidates; static inline void inc_frontswap_loads(void) { - frontswap_loads++; + data_race(frontswap_loads++); } static inline void inc_frontswap_succ_stores(void) { - frontswap_succ_stores++; + data_race(frontswap_succ_stores++); } static inline void inc_frontswap_failed_stores(void) { - frontswap_failed_stores++; + data_race(frontswap_failed_stores++); } static inline void inc_frontswap_invalidates(void) { - frontswap_invalidates++; + data_race(frontswap_invalidates++); } #else static inline void inc_frontswap_loads(void) { } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e362dc3d2028..5e252d91eb14 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1169,8 +1169,10 @@ static bool update_checksum(struct kmemleak_object *object) u32 old_csum = object->checksum; kasan_disable_current(); + kcsan_disable_current(); object->checksum = crc32(0, (void *)object->pointer, object->size); kasan_enable_current(); + kcsan_enable_current(); return object->checksum != old_csum; } diff --git a/mm/list_lru.c b/mm/list_lru.c index 4d5294c39bba..87b540f73323 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -180,7 +180,7 @@ unsigned long list_lru_count_one(struct list_lru *lru, rcu_read_lock(); l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); - count = l->nr_items; + count = READ_ONCE(l->nr_items); rcu_read_unlock(); return count; diff --git a/mm/memory.c b/mm/memory.c index 438fbe614579..15c8cedac5bb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1419,8 +1419,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL_GPL(zap_vma_ptes); -pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl) +static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; @@ -1439,9 +1438,40 @@ pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, return NULL; VM_BUG_ON(pmd_trans_huge(*pmd)); + return pmd; +} + +pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl) +{ + pmd_t *pmd = walk_to_pmd(mm, addr); + + if (!pmd) + return NULL; return pte_alloc_map_lock(mm, pmd, addr, ptl); } +static int validate_page_before_insert(struct page *page) +{ + if (PageAnon(page) || PageSlab(page) || page_has_type(page)) + return -EINVAL; + flush_dcache_page(page); + return 0; +} + +static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte, + unsigned long addr, struct page *page, pgprot_t prot) +{ + if (!pte_none(*pte)) + return -EBUSY; + /* Ok, finally just insert the thing.. */ + get_page(page); + inc_mm_counter_fast(mm, mm_counter_file(page)); + page_add_file_rmap(page, false); + set_pte_at(mm, addr, pte, mk_pte(page, prot)); + return 0; +} + /* * This is the old fallback for page remapping. * @@ -1457,31 +1487,135 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, pte_t *pte; spinlock_t *ptl; - retval = -EINVAL; - if (PageAnon(page) || PageSlab(page) || page_has_type(page)) + retval = validate_page_before_insert(page); + if (retval) goto out; retval = -ENOMEM; - flush_dcache_page(page); pte = get_locked_pte(mm, addr, &ptl); if (!pte) goto out; - retval = -EBUSY; - if (!pte_none(*pte)) - goto out_unlock; - - /* Ok, finally just insert the thing.. */ - get_page(page); - inc_mm_counter_fast(mm, mm_counter_file(page)); - page_add_file_rmap(page, false); - set_pte_at(mm, addr, pte, mk_pte(page, prot)); - - retval = 0; -out_unlock: + retval = insert_page_into_pte_locked(mm, pte, addr, page, prot); pte_unmap_unlock(pte, ptl); out: return retval; } +#ifdef pte_index +static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, struct page *page, pgprot_t prot) +{ + int err; + + if (!page_count(page)) + return -EINVAL; + err = validate_page_before_insert(page); + return err ? err : insert_page_into_pte_locked( + mm, pte_offset_map(pmd, addr), addr, page, prot); +} + +/* insert_pages() amortizes the cost of spinlock operations + * when inserting pages in a loop. Arch *must* define pte_index. + */ +static int insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num, pgprot_t prot) +{ + pmd_t *pmd = NULL; + spinlock_t *pte_lock = NULL; + struct mm_struct *const mm = vma->vm_mm; + unsigned long curr_page_idx = 0; + unsigned long remaining_pages_total = *num; + unsigned long pages_to_write_in_pmd; + int ret; +more: + ret = -EFAULT; + pmd = walk_to_pmd(mm, addr); + if (!pmd) + goto out; + + pages_to_write_in_pmd = min_t(unsigned long, + remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); + + /* Allocate the PTE if necessary; takes PMD lock once only. */ + ret = -ENOMEM; + if (pte_alloc(mm, pmd)) + goto out; + pte_lock = pte_lockptr(mm, pmd); + + while (pages_to_write_in_pmd) { + int pte_idx = 0; + const int batch_size = min_t(int, pages_to_write_in_pmd, 8); + + spin_lock(pte_lock); + for (; pte_idx < batch_size; ++pte_idx) { + int err = insert_page_in_batch_locked(mm, pmd, + addr, pages[curr_page_idx], prot); + if (unlikely(err)) { + spin_unlock(pte_lock); + ret = err; + remaining_pages_total -= pte_idx; + goto out; + } + addr += PAGE_SIZE; + ++curr_page_idx; + } + spin_unlock(pte_lock); + pages_to_write_in_pmd -= batch_size; + remaining_pages_total -= batch_size; + } + if (remaining_pages_total) + goto more; + ret = 0; +out: + *num = remaining_pages_total; + return ret; +} +#endif /* ifdef pte_index */ + +/** + * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock. + * @vma: user vma to map to + * @addr: target start user address of these pages + * @pages: source kernel pages + * @num: in: number of pages to map. out: number of pages that were *not* + * mapped. (0 means all pages were successfully mapped). + * + * Preferred over vm_insert_page() when inserting multiple pages. + * + * In case of error, we may have mapped a subset of the provided + * pages. It is the caller's responsibility to account for this case. + * + * The same restrictions apply as in vm_insert_page(). + */ +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num) +{ +#ifdef pte_index + const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; + + if (addr < vma->vm_start || end_addr >= vma->vm_end) + return -EFAULT; + if (!(vma->vm_flags & VM_MIXEDMAP)) { + BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem)); + BUG_ON(vma->vm_flags & VM_PFNMAP); + vma->vm_flags |= VM_MIXEDMAP; + } + /* Defer page refcount checking till we're about to map that page. */ + return insert_pages(vma, addr, pages, num, vma->vm_page_prot); +#else + unsigned long idx = 0, pgcount = *num; + int err; + + for (; idx < pgcount; ++idx) { + err = vm_insert_page(vma, addr + (PAGE_SIZE * idx), pages[idx]); + if (err) + break; + } + *num = pgcount - idx; + return err; +#endif /* ifdef pte_index */ +} +EXPORT_SYMBOL(vm_insert_pages); + /** * vm_insert_page - insert single page into user vma * @vma: user vma to map to @@ -2991,8 +3125,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!page) { struct swap_info_struct *si = swp_swap_info(entry); - if (si->flags & SWP_SYNCHRONOUS_IO && - __swap_count(entry) == 1) { + if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && + __swap_count(entry) == 1) { /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 635e8e286598..fc0aad0bc1f5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -304,12 +304,15 @@ static int check_hotplug_memory_addressable(unsigned long pfn, * add the new pages. */ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { const unsigned long end_pfn = pfn + nr_pages; unsigned long cur_nr_pages; int err; - struct vmem_altmap *altmap = restrictions->altmap; + struct vmem_altmap *altmap = params->altmap; + + if (WARN_ON_ONCE(!params->pgprot.pgprot)) + return -EINVAL; err = check_hotplug_memory_addressable(pfn, nr_pages); if (err) @@ -1002,7 +1005,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res) { - struct mhp_restrictions restrictions = {}; + struct mhp_params params = { .pgprot = PAGE_KERNEL }; u64 start, size; bool new_node = false; int ret; @@ -1030,7 +1033,7 @@ int __ref add_memory_resource(int nid, struct resource *res) new_node = ret; /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, &restrictions); + ret = arch_add_memory(nid, start, size, ¶ms); if (ret < 0) goto error; diff --git a/mm/mempool.c b/mm/mempool.c index 85efab3da720..79bff63ecf27 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -489,7 +489,7 @@ void mempool_free(void *element, mempool_t *pool) * ensures that there will be frees which return elements to the * pool waking up the waiters. */ - if (unlikely(pool->curr_nr < pool->min_nr)) { + if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) { spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr < pool->min_nr)) { add_element(pool, element); diff --git a/mm/memremap.c b/mm/memremap.c index 0f465a4af7fa..8afcc54c8928 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -204,13 +204,13 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) { struct resource *res = &pgmap->res; struct dev_pagemap *conflict_pgmap; - struct mhp_restrictions restrictions = { + struct mhp_params params = { /* * We do not want any optional features only our own memmap */ .altmap = pgmap_altmap(pgmap), + .pgprot = PAGE_KERNEL, }; - pgprot_t pgprot = PAGE_KERNEL; int error, is_ram; bool need_devmap_managed = true; @@ -237,7 +237,10 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } break; case MEMORY_DEVICE_DEVDAX: + need_devmap_managed = false; + break; case MEMORY_DEVICE_PCI_P2PDMA: + params.pgprot = pgprot_noncached(params.pgprot); need_devmap_managed = false; break; default: @@ -302,8 +305,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) if (nid < 0) nid = numa_mem_id(); - error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0, - resource_size(res)); + error = track_pfn_remap(NULL, ¶ms.pgprot, PHYS_PFN(res->start), + 0, resource_size(res)); if (error) goto err_pfn_remap; @@ -322,7 +325,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) */ if (pgmap->type == MEMORY_DEVICE_PRIVATE) { error = add_pages(nid, PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), &restrictions); + PHYS_PFN(resource_size(res)), ¶ms); } else { error = kasan_add_zero_shadow(__va(res->start), resource_size(res)); if (error) { @@ -331,7 +334,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } error = arch_add_memory(nid, res->start, resource_size(res), - &restrictions); + ¶ms); } if (!error) { @@ -339,7 +342,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; move_pfn_range_to_zone(zone, PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), restrictions.altmap); + PHYS_PFN(resource_size(res)), params.altmap); } mem_hotplug_done(); diff --git a/mm/mmap.c b/mm/mmap.c index 47e8dd50489a..a274a21624fa 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1224,7 +1224,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct * return a->vm_end == b->vm_start && mpol_equal(vma_policy(a), vma_policy(b)) && a->vm_file == b->vm_file && - !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) && + !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) && b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); } diff --git a/mm/mprotect.c b/mm/mprotect.c index 1d823b050329..494192ca954b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -419,7 +419,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, */ if (arch_has_pfn_modify_check() && (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && - (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { + (newflags & VM_ACCESS_FLAGS) == 0) { pgprot_t new_pgprot = vm_get_page_prot(newflags); error = walk_page_range(current->mm, start, end, @@ -598,7 +598,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, newflags |= (vma->vm_flags & ~mask_off_old_flags); /* newflags >> 4 shift VM_MAY% in place of VM_% */ - if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { + if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) { error = -EACCES; goto out; } diff --git a/mm/page_counter.c b/mm/page_counter.c index c56db2d5e159..e3a205275a0b 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -77,8 +77,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) * This is indeed racy, but we can live with some * inaccuracy in the watermark. */ - if (new > c->watermark) - c->watermark = new; + if (new > READ_ONCE(c->watermark)) + WRITE_ONCE(c->watermark, new); } } @@ -119,9 +119,10 @@ bool page_counter_try_charge(struct page_counter *counter, propagate_protected_usage(counter, new); /* * This is racy, but we can live with some - * inaccuracy in the failcnt. + * inaccuracy in the failcnt which is only used + * to report stats. */ - c->failcnt++; + data_race(c->failcnt++); *fail = c; goto failed; } @@ -130,8 +131,8 @@ bool page_counter_try_charge(struct page_counter *counter, * Just like with failcnt, we can live with some * inaccuracy in the watermark. */ - if (new > c->watermark) - c->watermark = new; + if (new > READ_ONCE(c->watermark)) + WRITE_ONCE(c->watermark, new); } return true; diff --git a/mm/page_io.c b/mm/page_io.c index 76965be1d40e..26935db0676c 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -86,7 +86,7 @@ static void swap_slot_free_notify(struct page *page) return; sis = page_swap_info(page); - if (!(sis->flags & SWP_BLKDEV)) + if (data_race(!(sis->flags & SWP_BLKDEV))) return; /* @@ -286,7 +286,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, struct swap_info_struct *sis = page_swap_info(page); VM_BUG_ON_PAGE(!PageSwapCache(page), page); - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct kiocb kiocb; struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -377,7 +377,7 @@ int swap_readpage(struct page *page, bool synchronous) goto out; } - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -439,7 +439,7 @@ int swap_set_page_dirty(struct page *page) { struct swap_info_struct *sis = page_swap_info(page); - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct address_space *mapping = sis->swap_file->f_mapping; VM_BUG_ON_PAGE(!PageSwapCache(page), page); diff --git a/mm/rmap.c b/mm/rmap.c index 9983da09d6fd..b83864763f78 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -672,7 +672,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) */ void flush_tlb_batched_pending(struct mm_struct *mm) { - if (mm->tlb_flush_batched) { + if (data_race(mm->tlb_flush_batched)) { flush_tlb_mm(mm); /* diff --git a/mm/swap.c b/mm/swap.c index bf9a79fed62d..a37bd7b202ac 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -599,7 +599,8 @@ void lru_add_drain_cpu(int cpu) __pagevec_lru_add(pvec); pvec = &per_cpu(lru_rotate_pvecs, cpu); - if (pagevec_count(pvec)) { + /* Disabling interrupts below acts as a compiler barrier. */ + if (data_race(pagevec_count(pvec))) { unsigned long flags; /* No harm done if a racing interrupt already did this */ @@ -744,7 +745,7 @@ void lru_add_drain_all(void) struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || - pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || + data_race(pagevec_count(&per_cpu(lru_rotate_pvecs, cpu))) || pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) || diff --git a/mm/swap_state.c b/mm/swap_state.c index ebed37bbf7a3..c8e0fc5a5a3a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -58,8 +58,8 @@ static bool enable_vma_readahead __read_mostly = true; #define GET_SWAP_RA_VAL(vma) \ (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) -#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) -#define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0) +#define INC_CACHE_INFO(x) data_race(swap_cache_info.x++) +#define ADD_CACHE_INFO(x, nr) data_race(swap_cache_info.x += (nr)) static struct { unsigned long add_total; diff --git a/mm/swapfile.c b/mm/swapfile.c index 5871a2aa86a5..6659ab563448 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -678,7 +678,7 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, if (offset == si->lowest_bit) si->lowest_bit += nr_entries; if (end == si->highest_bit) - si->highest_bit -= nr_entries; + WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries); si->inuse_pages += nr_entries; if (si->inuse_pages == si->pages) { si->lowest_bit = si->max; @@ -710,7 +710,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, if (end > si->highest_bit) { bool was_full = !si->highest_bit; - si->highest_bit = end; + WRITE_ONCE(si->highest_bit, end); if (was_full && (si->flags & SWP_WRITEOK)) add_to_avail_list(si); } @@ -843,7 +843,7 @@ checks: else goto done; } - si->swap_map[offset] = usage; + WRITE_ONCE(si->swap_map[offset], usage); inc_cluster_info_page(si, si->cluster_info, offset); unlock_cluster(ci); @@ -889,12 +889,13 @@ done: scan: spin_unlock(&si->lock); - while (++offset <= si->highest_bit) { - if (!si->swap_map[offset]) { + while (++offset <= READ_ONCE(si->highest_bit)) { + if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); goto checks; } - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { + if (vm_swap_full() && + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); goto checks; } @@ -905,11 +906,12 @@ scan: } offset = si->lowest_bit; while (offset < scan_base) { - if (!si->swap_map[offset]) { + if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); goto checks; } - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { + if (vm_swap_full() && + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); goto checks; } @@ -1111,7 +1113,7 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry) p = swp_swap_info(entry); if (!p) goto bad_nofile; - if (!(p->flags & SWP_USED)) + if (data_race(!(p->flags & SWP_USED))) goto bad_device; offset = swp_offset(entry); if (offset >= p->max) @@ -1137,7 +1139,7 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry) p = __swap_info_get(entry); if (!p) goto out; - if (!p->swap_map[swp_offset(entry)]) + if (data_race(!p->swap_map[swp_offset(entry)])) goto bad_free; return p; @@ -1206,7 +1208,10 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, } usage = count | has_cache; - p->swap_map[offset] = usage ? : SWAP_HAS_CACHE; + if (usage) + WRITE_ONCE(p->swap_map[offset], usage); + else + WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE); return usage; } @@ -1258,7 +1263,7 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry) goto bad_nofile; rcu_read_lock(); - if (!(si->flags & SWP_VALID)) + if (data_race(!(si->flags & SWP_VALID))) goto unlock_out; offset = swp_offset(entry); if (offset >= si->max) @@ -3436,7 +3441,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) } else err = -ENOENT; /* unused swap entry */ - p->swap_map[offset] = count | has_cache; + WRITE_ONCE(p->swap_map[offset], count | has_cache); unlock_out: unlock_cluster_or_swap_info(p, ci); diff --git a/mm/util.c b/mm/util.c index 988d11e6c17c..cc89e2404e19 100644 --- a/mm/util.c +++ b/mm/util.c @@ -798,8 +798,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { long allowed; - VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) < - -(s64)vm_committed_as_batch * num_online_cpus(), + /* + * A transient decrease in the value is unlikely, so no need + * READ_ONCE() for vm_committed_as.count. + */ + VM_WARN_ONCE(data_race(percpu_counter_read(&vm_committed_as) < + -(s64)vm_committed_as_batch * num_online_cpus()), "memory commitment underflow"); vm_acct_memory(pages); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5c57850fab4b..e086e2b57a3e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1734,14 +1734,48 @@ int tcp_mmap(struct file *file, struct socket *sock, } EXPORT_SYMBOL(tcp_mmap); +static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, + struct page **pages, + unsigned long pages_to_map, + unsigned long *insert_addr, + u32 *length_with_pending, + u32 *seq, + struct tcp_zerocopy_receive *zc) +{ + unsigned long pages_remaining = pages_to_map; + int bytes_mapped; + int ret; + + ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining); + bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining); + /* Even if vm_insert_pages fails, it may have partially succeeded in + * mapping (some but not all of the pages). + */ + *seq += bytes_mapped; + *insert_addr += bytes_mapped; + if (ret) { + /* But if vm_insert_pages did fail, we have to unroll some state + * we speculatively touched before. + */ + const int bytes_not_mapped = PAGE_SIZE * pages_remaining; + *length_with_pending -= bytes_not_mapped; + zc->recv_skip_hint += bytes_not_mapped; + } + return ret; +} + static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc) { unsigned long address = (unsigned long)zc->address; u32 length = 0, seq, offset, zap_len; + #define PAGE_BATCH_SIZE 8 + struct page *pages[PAGE_BATCH_SIZE]; const skb_frag_t *frags = NULL; struct vm_area_struct *vma; struct sk_buff *skb = NULL; + unsigned long pg_idx = 0; + unsigned long curr_addr; struct tcp_sock *tp; int inq; int ret; @@ -1754,6 +1788,8 @@ static int tcp_zerocopy_receive(struct sock *sk, sock_rps_record_flow(sk); + tp = tcp_sk(sk); + down_read(¤t->mm->mmap_sem); ret = -EINVAL; @@ -1762,7 +1798,6 @@ static int tcp_zerocopy_receive(struct sock *sk, goto out; zc->length = min_t(unsigned long, zc->length, vma->vm_end - address); - tp = tcp_sk(sk); seq = tp->copied_seq; inq = tcp_inq(sk); zc->length = min_t(u32, zc->length, inq); @@ -1774,8 +1809,20 @@ static int tcp_zerocopy_receive(struct sock *sk, zc->recv_skip_hint = zc->length; } ret = 0; + curr_addr = address; while (length + PAGE_SIZE <= zc->length) { if (zc->recv_skip_hint < PAGE_SIZE) { + /* If we're here, finish the current batch. */ + if (pg_idx) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, + pg_idx, + &curr_addr, + &length, + &seq, zc); + if (ret) + goto out; + pg_idx = 0; + } if (skb) { if (zc->recv_skip_hint > 0) break; @@ -1784,7 +1831,6 @@ static int tcp_zerocopy_receive(struct sock *sk, } else { skb = tcp_recv_skb(sk, seq, &offset); } - zc->recv_skip_hint = skb->len - offset; offset -= skb_headlen(skb); if ((int)offset < 0 || skb_has_frag_list(skb)) @@ -1808,14 +1854,24 @@ static int tcp_zerocopy_receive(struct sock *sk, zc->recv_skip_hint -= remaining; break; } - ret = vm_insert_page(vma, address + length, - skb_frag_page(frags)); - if (ret) - break; + pages[pg_idx] = skb_frag_page(frags); + pg_idx++; length += PAGE_SIZE; - seq += PAGE_SIZE; zc->recv_skip_hint -= PAGE_SIZE; frags++; + if (pg_idx == PAGE_BATCH_SIZE) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, + &curr_addr, &length, + &seq, zc); + if (ret) + goto out; + pg_idx = 0; + } + } + if (pg_idx) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, + &curr_addr, &length, &seq, + zc); } out: up_read(¤t->mm->mmap_sem); diff --git a/tools/laptop/freefall/freefall.c b/tools/laptop/freefall/freefall.c index d29a86cda87f..d77d7861787c 100644 --- a/tools/laptop/freefall/freefall.c +++ b/tools/laptop/freefall/freefall.c @@ -4,7 +4,7 @@ * Copyright 2008 Eric Piel * Copyright 2009 Pavel Machek <pavel@ucw.cz> * Copyright 2012 Sonal Santan - * Copyright 2014 Pali Rohár <pali.rohar@gmail.com> + * Copyright 2014 Pali Rohár <pali@kernel.org> */ #include <stdio.h> diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 8b944cf042f6..3702dbcc90a7 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -61,6 +61,8 @@ ALL_TESTS="$ALL_TESTS 0006:10:1" ALL_TESTS="$ALL_TESTS 0007:5:1" ALL_TESTS="$ALL_TESTS 0008:150:1" ALL_TESTS="$ALL_TESTS 0009:150:1" +ALL_TESTS="$ALL_TESTS 0010:1:1" +ALL_TESTS="$ALL_TESTS 0011:1:1" # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -149,6 +151,7 @@ function load_req_mod() test_finish() { + echo "$MODPROBE" > /proc/sys/kernel/modprobe echo "Test completed" } @@ -443,6 +446,30 @@ kmod_test_0009() config_expect_result ${FUNCNAME[0]} SUCCESS } +kmod_test_0010() +{ + kmod_defaults_driver + config_num_threads 1 + echo "/KMOD_TEST_NONEXISTENT" > /proc/sys/kernel/modprobe + config_trigger ${FUNCNAME[0]} + config_expect_result ${FUNCNAME[0]} -ENOENT + echo "$MODPROBE" > /proc/sys/kernel/modprobe +} + +kmod_test_0011() +{ + kmod_defaults_driver + config_num_threads 1 + # This causes the kernel to not even try executing modprobe. The error + # code is still -ENOENT like when modprobe doesn't exist, so we can't + # easily test for the exact difference. But this still is a useful test + # since there was a bug where request_module() returned 0 in this case. + echo > /proc/sys/kernel/modprobe + config_trigger ${FUNCNAME[0]} + config_expect_result ${FUNCNAME[0]} -ENOENT + echo "$MODPROBE" > /proc/sys/kernel/modprobe +} + list_tests() { echo "Test ID list:" @@ -460,6 +487,8 @@ list_tests() echo "0007 x $(get_test_count 0007) - multithreaded tests with default setup test request_module() and get_fs_type()" echo "0008 x $(get_test_count 0008) - multithreaded - push kmod_concurrent over max_modprobes for request_module()" echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()" + echo "0010 x $(get_test_count 0010) - test nonexistent modprobe path" + echo "0011 x $(get_test_count 0011) - test completely disabling module autoloading" } usage() @@ -505,18 +534,23 @@ function test_num() fi } -function get_test_count() +function get_test_data() { test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + local field_num=$(echo $1 | sed 's/^0*//') + echo $ALL_TESTS | awk '{print $'$field_num'}' +} + +function get_test_count() +{ + TEST_DATA=$(get_test_data $1) LAST_TWO=${TEST_DATA#*:*} echo ${LAST_TWO%:*} } function get_test_enabled() { - test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + TEST_DATA=$(get_test_data $1) echo ${TEST_DATA#*:*:} } @@ -611,6 +645,7 @@ test_reqs allow_user_defaults load_req_mod +MODPROBE=$(</proc/sys/kernel/modprobe) trap "test_finish" EXIT parse_args $@ |