diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 196 |
1 files changed, 105 insertions, 91 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 08522a831c7a..ac2c9f12a7b2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -27,6 +27,7 @@ #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/kasan.h> +#include <linux/kmsan.h> #include <linux/module.h> #include <linux/suspend.h> #include <linux/pagevec.h> @@ -482,6 +483,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) { static unsigned long prev_end_pfn, nr_initialised; + if (early_page_ext_enabled()) + return false; /* * prev_end_pfn static that contains the end of previous zone * No need to protect because called very early in boot before smp_init. @@ -542,7 +545,7 @@ static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn) #ifdef CONFIG_SPARSEMEM pfn &= (PAGES_PER_SECTION-1); #else - pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages); + pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn); #endif /* CONFIG_SPARSEMEM */ return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; } @@ -870,7 +873,8 @@ static inline bool set_page_guard(struct zone *zone, struct page *page, INIT_LIST_HEAD(&page->buddy_list); set_page_private(page, order); /* Guard pages are not available for any usage */ - __mod_zone_freepage_state(zone, -(1 << order), migratetype); + if (!is_migrate_isolate(migratetype)) + __mod_zone_freepage_state(zone, -(1 << order), migratetype); return true; } @@ -900,7 +904,7 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, * order of appearance. So we need to first gather the full picture of what was * enabled, and then make decisions. */ -void init_mem_debugging_and_hardening(void) +void __init init_mem_debugging_and_hardening(void) { bool page_poisoning_requested = false; @@ -935,6 +939,10 @@ void init_mem_debugging_and_hardening(void) else static_branch_disable(&init_on_free); + if (IS_ENABLED(CONFIG_KMSAN) && + (_init_on_alloc_enabled_early || _init_on_free_enabled_early)) + pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n"); + #ifdef CONFIG_DEBUG_PAGEALLOC if (!debug_pagealloc_enabled()) return; @@ -1105,7 +1113,7 @@ static inline void __free_one_page(struct page *page, int migratetype, fpi_t fpi_flags) { struct capture_control *capc = task_capc(zone); - unsigned long buddy_pfn; + unsigned long buddy_pfn = 0; unsigned long combined_pfn; struct page *buddy; bool to_tail; @@ -1283,20 +1291,20 @@ static const char *page_bad_reason(struct page *page, unsigned long flags) return bad_reason; } -static void check_free_page_bad(struct page *page) +static void free_page_is_bad_report(struct page *page) { bad_page(page, page_bad_reason(page, PAGE_FLAGS_CHECK_AT_FREE)); } -static inline int check_free_page(struct page *page) +static inline bool free_page_is_bad(struct page *page) { if (likely(page_expected_state(page, PAGE_FLAGS_CHECK_AT_FREE))) - return 0; + return false; /* Something has gone sideways, find it */ - check_free_page_bad(page); - return 1; + free_page_is_bad_report(page); + return true; } static int free_tail_pages_check(struct page *head_page, struct page *page) @@ -1398,6 +1406,7 @@ static __always_inline bool free_pages_prepare(struct page *page, VM_BUG_ON_PAGE(PageTail(page), page); trace_mm_page_free(page, order); + kmsan_free_page(page, order); if (unlikely(PageHWPoison(page)) && !order) { /* @@ -1428,7 +1437,7 @@ static __always_inline bool free_pages_prepare(struct page *page, for (i = 1; i < (1 << order); i++) { if (compound) bad += free_tail_pages_check(page, page + i); - if (unlikely(check_free_page(page + i))) { + if (unlikely(free_page_is_bad(page + i))) { bad++; continue; } @@ -1439,8 +1448,8 @@ static __always_inline bool free_pages_prepare(struct page *page, page->mapping = NULL; if (memcg_kmem_enabled() && PageMemcgKmem(page)) __memcg_kmem_uncharge_page(page, order); - if (check_free) - bad += check_free_page(page); + if (check_free && free_page_is_bad(page)) + bad++; if (bad) return false; @@ -1499,10 +1508,11 @@ static bool free_pcp_prepare(struct page *page, unsigned int order) return free_pages_prepare(page, order, true, FPI_NONE); } +/* return true if this page has an inappropriate state */ static bool bulkfree_pcp_prepare(struct page *page) { if (debug_pagealloc_enabled_static()) - return check_free_page(page); + return free_page_is_bad(page); else return false; } @@ -1523,7 +1533,7 @@ static bool free_pcp_prepare(struct page *page, unsigned int order) static bool bulkfree_pcp_prepare(struct page *page) { - return check_free_page(page); + return free_page_is_bad(page); } #endif /* CONFIG_DEBUG_VM */ @@ -1575,7 +1585,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, order = pindex_to_order(pindex); nr_pages = 1 << order; - BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH)); do { int mt; @@ -1804,6 +1813,10 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, { if (early_page_uninitialised(pfn)) return; + if (!kmsan_memblock_free_pages(page, order)) { + /* KMSAN will take care of these pages. */ + return; + } __free_pages_core(page, order); } @@ -1855,7 +1868,7 @@ void set_zone_contiguous(struct zone *zone) unsigned long block_start_pfn = zone->zone_start_pfn; unsigned long block_end_pfn; - block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages); + block_end_pfn = pageblock_end_pfn(block_start_pfn); for (; block_start_pfn < zone_end_pfn(zone); block_start_pfn = block_end_pfn, block_end_pfn += pageblock_nr_pages) { @@ -1890,15 +1903,14 @@ static void __init deferred_free_range(unsigned long pfn, page = pfn_to_page(pfn); /* Free a large naturally-aligned chunk if possible */ - if (nr_pages == pageblock_nr_pages && - (pfn & (pageblock_nr_pages - 1)) == 0) { + if (nr_pages == pageblock_nr_pages && pageblock_aligned(pfn)) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); __free_pages_core(page, pageblock_order); return; } for (i = 0; i < nr_pages; i++, page++, pfn++) { - if ((pfn & (pageblock_nr_pages - 1)) == 0) + if (pageblock_aligned(pfn)) set_pageblock_migratetype(page, MIGRATE_MOVABLE); __free_pages_core(page, 0); } @@ -1917,16 +1929,12 @@ static inline void __init pgdat_init_report_one_done(void) /* * Returns true if page needs to be initialized or freed to buddy allocator. * - * First we check if pfn is valid on architectures where it is possible to have - * holes within pageblock_nr_pages. On systems where it is not possible, this - * function is optimized out. - * - * Then, we check if a current large page is valid by only checking the validity + * We check if a current large page is valid by only checking the validity * of the head pfn. */ static inline bool __init deferred_pfn_valid(unsigned long pfn) { - if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn)) + if (pageblock_aligned(pfn) && !pfn_valid(pfn)) return false; return true; } @@ -1938,14 +1946,13 @@ static inline bool __init deferred_pfn_valid(unsigned long pfn) static void __init deferred_free_pages(unsigned long pfn, unsigned long end_pfn) { - unsigned long nr_pgmask = pageblock_nr_pages - 1; unsigned long nr_free = 0; for (; pfn < end_pfn; pfn++) { if (!deferred_pfn_valid(pfn)) { deferred_free_range(pfn - nr_free, nr_free); nr_free = 0; - } else if (!(pfn & nr_pgmask)) { + } else if (pageblock_aligned(pfn)) { deferred_free_range(pfn - nr_free, nr_free); nr_free = 1; } else { @@ -1965,7 +1972,6 @@ static unsigned long __init deferred_init_pages(struct zone *zone, unsigned long pfn, unsigned long end_pfn) { - unsigned long nr_pgmask = pageblock_nr_pages - 1; int nid = zone_to_nid(zone); unsigned long nr_pages = 0; int zid = zone_idx(zone); @@ -1975,7 +1981,7 @@ static unsigned long __init deferred_init_pages(struct zone *zone, if (!deferred_pfn_valid(pfn)) { page = NULL; continue; - } else if (!page || !(pfn & nr_pgmask)) { + } else if (!page || pageblock_aligned(pfn)) { page = pfn_to_page(pfn); } else { page++; @@ -2651,8 +2657,8 @@ int move_freepages_block(struct zone *zone, struct page *page, *num_movable = 0; pfn = page_to_pfn(page); - start_pfn = pfn & ~(pageblock_nr_pages - 1); - end_pfn = start_pfn + pageblock_nr_pages - 1; + start_pfn = pageblock_start_pfn(pfn); + end_pfn = pageblock_end_pfn(pfn) - 1; /* Do not cross zone boundaries */ if (!zone_spans_pfn(zone, start_pfn)) @@ -3010,7 +3016,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, * i.e. orders < pageblock_order. If there are no local zones free, * the zonelists will be reiterated without ALLOC_NOFRAGMENT. */ - if (alloc_flags & ALLOC_NOFRAGMENT) + if (order < pageblock_order && alloc_flags & ALLOC_NOFRAGMENT) min_order = pageblock_order; /* @@ -3598,16 +3604,11 @@ EXPORT_SYMBOL_GPL(split_page); int __isolate_free_page(struct page *page, unsigned int order) { - unsigned long watermark; - struct zone *zone; - int mt; - - BUG_ON(!PageBuddy(page)); - - zone = page_zone(page); - mt = get_pageblock_migratetype(page); + struct zone *zone = page_zone(page); + int mt = get_pageblock_migratetype(page); if (!is_migrate_isolate(mt)) { + unsigned long watermark; /* * Obey watermarks as if the page was being allocated. We can * emulate a high-order watermark check with a raised order-0 @@ -3621,8 +3622,6 @@ int __isolate_free_page(struct page *page, unsigned int order) __mod_zone_freepage_state(zone, -(1UL << order), mt); } - /* Remove page from free list */ - del_page_from_free_list(page, zone, order); /* @@ -3643,7 +3642,6 @@ int __isolate_free_page(struct page *page, unsigned int order) } } - return 1UL << order; } @@ -3670,8 +3668,6 @@ void __putback_isolated_page(struct page *page, unsigned int order, int mt) /* * Update NUMA hit/miss statistics - * - * Must be called with interrupts disabled. */ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z, long nr_account) @@ -3777,8 +3773,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, /* Lock and remove page from the per-cpu list */ static struct page *rmqueue_pcplist(struct zone *preferred_zone, struct zone *zone, unsigned int order, - gfp_t gfp_flags, int migratetype, - unsigned int alloc_flags) + int migratetype, unsigned int alloc_flags) { struct per_cpu_pages *pcp; struct list_head *list; @@ -3815,8 +3810,17 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, } /* - * Allocate a page from the given zone. Use pcplists for order-0 allocations. + * Allocate a page from the given zone. + * Use pcplists for THP or "cheap" high-order allocations. + */ + +/* + * Do not instrument rmqueue() with KMSAN. This function may call + * __msan_poison_alloca() through a call to set_pfnblock_flags_mask(). + * If __msan_poison_alloca() attempts to allocate pages for the stack depot, it + * may call rmqueue() again, which will result in a deadlock. */ +__no_sanitize_memory static inline struct page *rmqueue(struct zone *preferred_zone, struct zone *zone, unsigned int order, @@ -3839,7 +3843,7 @@ struct page *rmqueue(struct zone *preferred_zone, if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA || migratetype != MIGRATE_MOVABLE) { page = rmqueue_pcplist(preferred_zone, zone, order, - gfp_flags, migratetype, alloc_flags); + migratetype, alloc_flags); if (likely(page)) goto out; } @@ -4329,7 +4333,7 @@ static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask) if (!in_task() || !(gfp_mask & __GFP_DIRECT_RECLAIM)) filter &= ~SHOW_MEM_FILTER_NODES; - show_mem(filter, nodemask); + __show_mem(filter, nodemask, gfp_zone(gfp_mask)); } void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) @@ -5147,7 +5151,8 @@ retry: reserve_flags = __gfp_pfmemalloc_flags(gfp_mask); if (reserve_flags) - alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags); + alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags) | + (alloc_flags & ALLOC_KSWAPD); /* * Reset the nodemask and zonelist iterators if memory policies can be @@ -5272,7 +5277,7 @@ nopage: * so that we can identify them and convert them to something * else. */ - WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask); + WARN_ON_ONCE_GFP(costly_order, gfp_mask); /* * Help non-failing allocations by giving them access to memory @@ -5569,6 +5574,7 @@ out: } trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype); + kmsan_alloc_page(page, order, alloc_gfp); return page; } @@ -6057,6 +6063,15 @@ static void show_migration_types(unsigned char type) printk(KERN_CONT "(%s) ", tmp); } +static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx) +{ + int zone_idx; + for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++) + if (zone_managed_pages(pgdat->node_zones + zone_idx)) + return true; + return false; +} + /* * Show free area list (used inside shift_scroll-lock stuff) * We also calculate the percentage fragmentation. We do this by counting the @@ -6066,7 +6081,7 @@ static void show_migration_types(unsigned char type) * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's * cpuset. */ -void show_free_areas(unsigned int filter, nodemask_t *nodemask) +void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { unsigned long free_pcp = 0; int cpu, nid; @@ -6074,6 +6089,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) pg_data_t *pgdat; for_each_populated_zone(zone) { + if (zone_idx(zone) > max_zone_idx) + continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; @@ -6113,6 +6130,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) for_each_online_pgdat(pgdat) { if (show_mem_node_skip(filter, pgdat->node_id, nodemask)) continue; + if (!node_has_managed_zones(pgdat, max_zone_idx)) + continue; printk("Node %d" " active_anon:%lukB" @@ -6171,6 +6190,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) for_each_populated_zone(zone) { int i; + if (zone_idx(zone) > max_zone_idx) + continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; @@ -6232,6 +6253,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) unsigned long nr[MAX_ORDER], flags, total = 0; unsigned char types[MAX_ORDER]; + if (zone_idx(zone) > max_zone_idx) + continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; show_node(zone); @@ -6557,7 +6580,7 @@ static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonesta #define BOOT_PAGESET_BATCH 1 static DEFINE_PER_CPU(struct per_cpu_pages, boot_pageset); static DEFINE_PER_CPU(struct per_cpu_zonestat, boot_zonestats); -DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); +static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); static void __build_all_zonelists(void *data) { @@ -6753,7 +6776,7 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone * such that unmovable allocations won't be scattered all * over the place during system boot. */ - if (IS_ALIGNED(pfn, pageblock_nr_pages)) { + if (pageblock_aligned(pfn)) { set_pageblock_migratetype(page, migratetype); cond_resched(); } @@ -6796,7 +6819,7 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn, * Please note that MEMINIT_HOTPLUG path doesn't clear memmap * because this is done early in section_activate() */ - if (IS_ALIGNED(pfn, pageblock_nr_pages)) { + if (pageblock_aligned(pfn)) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); cond_resched(); } @@ -6859,7 +6882,7 @@ void __ref memmap_init_zone_device(struct zone *zone, unsigned long start = jiffies; int nid = pgdat->node_id; - if (WARN_ON_ONCE(!pgmap || zone_idx(zone) != ZONE_DEVICE)) + if (WARN_ON_ONCE(!pgmap || zone_idx != ZONE_DEVICE)) return; /* @@ -6928,9 +6951,8 @@ static void __init init_unavailable_range(unsigned long spfn, u64 pgcnt = 0; for (pfn = spfn; pfn < epfn; pfn++) { - if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { - pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) - + pageblock_nr_pages - 1; + if (!pfn_valid(pageblock_start_pfn(pfn))) { + pfn = pageblock_end_pfn(pfn) - 1; continue; } __init_single_page(pfn_to_page(pfn), pfn, zone, node); @@ -7035,7 +7057,7 @@ static int zone_batchsize(struct zone *zone) * size is striking a balance between allocation latency * and zone lock contention. */ - batch = min(zone_managed_pages(zone) >> 10, (1024 * 1024) / PAGE_SIZE); + batch = min(zone_managed_pages(zone) >> 10, SZ_1M / PAGE_SIZE); batch /= 4; /* We effectively *= 4 below */ if (batch < 1) batch = 1; @@ -7220,6 +7242,17 @@ void __meminit setup_zone_pageset(struct zone *zone) } /* + * The zone indicated has a new number of managed_pages; batch sizes and percpu + * page high values need to be recalculated. + */ +static void zone_pcp_update(struct zone *zone, int cpu_online) +{ + mutex_lock(&pcp_batch_high_lock); + zone_set_pageset_high_and_batch(zone, cpu_online); + mutex_unlock(&pcp_batch_high_lock); +} + +/* * Allocate per cpu pagesets and initialize them. * Before this call only boot pagesets were available. */ @@ -7663,6 +7696,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) int i; pgdat_resize_init(pgdat); + pgdat_kswapd_lock_init(pgdat); pgdat_init_split_queue(pgdat); pgdat_init_kcompactd(pgdat); @@ -7957,17 +7991,6 @@ unsigned long __init node_map_pfn_alignment(void) return ~accl_mask + 1; } -/** - * find_min_pfn_with_active_regions - Find the minimum PFN registered - * - * Return: the minimum PFN based on information provided via - * memblock_set_node(). - */ -unsigned long __init find_min_pfn_with_active_regions(void) -{ - return PHYS_PFN(memblock_start_of_DRAM()); -} - /* * early_calculate_totalpages() * Sum pages in active regions for movable zone. @@ -8260,7 +8283,7 @@ void __init free_area_init(unsigned long *max_zone_pfn) memset(arch_zone_highest_possible_pfn, 0, sizeof(arch_zone_highest_possible_pfn)); - start_pfn = find_min_pfn_with_active_regions(); + start_pfn = PHYS_PFN(memblock_start_of_DRAM()); descending = arch_has_descending_max_zone_pfns(); for (i = 0; i < MAX_NR_ZONES; i++) { @@ -8509,8 +8532,8 @@ void __init mem_init_print_info(void) #endif ")\n", K(nr_free_pages()), K(physpages), - codesize >> 10, datasize >> 10, rosize >> 10, - (init_data_size + init_code_size) >> 10, bss_size >> 10, + codesize / SZ_1K, datasize / SZ_1K, rosize / SZ_1K, + (init_data_size + init_code_size) / SZ_1K, bss_size / SZ_1K, K(physpages - totalram_pages() - totalcma_pages), K(totalcma_pages) #ifdef CONFIG_HIGHMEM @@ -9023,7 +9046,7 @@ void *__init alloc_large_system_hash(const char *tablename, { unsigned long long max = high_limit; unsigned long log2qty, size; - void *table = NULL; + void *table; gfp_t gfp_flags; bool virt; bool huge; @@ -9035,8 +9058,8 @@ void *__init alloc_large_system_hash(const char *tablename, numentries -= arch_reserved_kernel_pages(); /* It isn't necessary when PAGE_SIZE >= 1MB */ - if (PAGE_SHIFT < 20) - numentries = round_up(numentries, (1<<20)/PAGE_SIZE); + if (PAGE_SIZE < SZ_1M) + numentries = round_up(numentries, SZ_1M / PAGE_SIZE); #if __BITS_PER_LONG > 32 if (!high_limit) { @@ -9461,17 +9484,6 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages) EXPORT_SYMBOL(free_contig_range); /* - * The zone indicated has a new number of managed_pages; batch sizes and percpu - * page high values need to be recalculated. - */ -void zone_pcp_update(struct zone *zone, int cpu_online) -{ - mutex_lock(&pcp_batch_high_lock); - zone_set_pageset_high_and_batch(zone, cpu_online); - mutex_unlock(&pcp_batch_high_lock); -} - -/* * Effectively disable pcplists for the zone by setting the high limit to 0 * and draining all cpus. A concurrent page freeing on another CPU that's about * to put the page on pcplist will either finish before the drain and the page @@ -9503,9 +9515,11 @@ void zone_pcp_reset(struct zone *zone) drain_zonestat(zone, pzstats); } free_percpu(zone->per_cpu_pageset); - free_percpu(zone->per_cpu_zonestats); zone->per_cpu_pageset = &boot_pageset; - zone->per_cpu_zonestats = &boot_zonestats; + if (zone->per_cpu_zonestats != &boot_zonestats) { + free_percpu(zone->per_cpu_zonestats); + zone->per_cpu_zonestats = &boot_zonestats; + } } } |