summaryrefslogtreecommitdiff
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c143
1 files changed, 85 insertions, 58 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index da59aece4756..87d875e5e0a9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -66,12 +66,6 @@ static bool hugetlb_cma_page(struct page *page, unsigned int order)
#endif
static unsigned long hugetlb_cma_size __initdata;
-/*
- * Minimum page order among possible hugepage sizes, set to a proper value
- * at boot time.
- */
-static unsigned int minimum_order __read_mostly = UINT_MAX;
-
__initdata LIST_HEAD(huge_boot_pages);
/* for command line parsing */
@@ -2152,11 +2146,17 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
unsigned long pfn;
struct page *page;
int rc = 0;
+ unsigned int order;
+ struct hstate *h;
if (!hugepages_supported())
return rc;
- for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) {
+ order = huge_page_order(&default_hstate);
+ for_each_hstate(h)
+ order = min(order, huge_page_order(h));
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order) {
page = pfn_to_page(pfn);
rc = dissolve_free_huge_page(page);
if (rc)
@@ -2766,8 +2766,7 @@ retry:
* Fail with -EBUSY if not possible.
*/
spin_unlock_irq(&hugetlb_lock);
- if (!isolate_huge_page(old_page, list))
- ret = -EBUSY;
+ ret = isolate_hugetlb(old_page, list);
spin_lock_irq(&hugetlb_lock);
goto free_new;
} else if (!HPageFreed(old_page)) {
@@ -2843,7 +2842,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
if (hstate_is_gigantic(h))
return -ENOMEM;
- if (page_count(head) && isolate_huge_page(head, list))
+ if (page_count(head) && !isolate_hugetlb(head, list))
ret = 0;
else if (!page_count(head))
ret = alloc_and_dissolve_huge_page(h, head, list);
@@ -3149,9 +3148,6 @@ static void __init hugetlb_init_hstates(void)
struct hstate *h, *h2;
for_each_hstate(h) {
- if (minimum_order > huge_page_order(h))
- minimum_order = huge_page_order(h);
-
/* oversize hugepages were init'ed in early boot */
if (!hstate_is_gigantic(h))
hugetlb_hstate_alloc_pages(h);
@@ -3176,7 +3172,6 @@ static void __init hugetlb_init_hstates(void)
h->demote_order = h2->order;
}
}
- VM_BUG_ON(minimum_order == UINT_MAX);
}
static void __init report_hugepages(void)
@@ -4732,6 +4727,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
unsigned long npages = pages_per_huge_page(h);
struct address_space *mapping = src_vma->vm_file->f_mapping;
struct mmu_notifier_range range;
+ unsigned long last_addr_mask;
int ret = 0;
if (cow) {
@@ -4751,11 +4747,14 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
i_mmap_lock_read(mapping);
}
+ last_addr_mask = hugetlb_mask_last_page(h);
for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
spinlock_t *src_ptl, *dst_ptl;
src_pte = huge_pte_offset(src, addr, sz);
- if (!src_pte)
+ if (!src_pte) {
+ addr |= last_addr_mask;
continue;
+ }
dst_pte = huge_pte_alloc(dst, dst_vma, addr, sz);
if (!dst_pte) {
ret = -ENOMEM;
@@ -4772,8 +4771,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
* after taking the lock below.
*/
dst_entry = huge_ptep_get(dst_pte);
- if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
+ if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) {
+ addr |= last_addr_mask;
continue;
+ }
dst_ptl = huge_pte_lock(h, dst, dst_pte);
src_ptl = huge_pte_lockptr(h, src, src_pte);
@@ -4803,12 +4804,11 @@ again:
entry = swp_entry_to_pte(swp_entry);
if (userfaultfd_wp(src_vma) && uffd_wp)
entry = huge_pte_mkuffd_wp(entry);
- set_huge_swap_pte_at(src, addr, src_pte,
- entry, sz);
+ set_huge_pte_at(src, addr, src_pte, entry);
}
if (!userfaultfd_wp(dst_vma) && uffd_wp)
entry = huge_pte_clear_uffd_wp(entry);
- set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
+ set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_pte_marker(entry))) {
/*
* We copy the pte marker only if the dst vma has
@@ -4934,7 +4934,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
unsigned long sz = huge_page_size(h);
struct mm_struct *mm = vma->vm_mm;
unsigned long old_end = old_addr + len;
- unsigned long old_addr_copy;
+ unsigned long last_addr_mask;
pte_t *src_pte, *dst_pte;
struct mmu_notifier_range range;
bool shared_pmd = false;
@@ -4949,23 +4949,23 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
flush_cache_range(vma, range.start, range.end);
mmu_notifier_invalidate_range_start(&range);
+ last_addr_mask = hugetlb_mask_last_page(h);
/* Prevent race with file truncation */
i_mmap_lock_write(mapping);
for (; old_addr < old_end; old_addr += sz, new_addr += sz) {
src_pte = huge_pte_offset(mm, old_addr, sz);
- if (!src_pte)
+ if (!src_pte) {
+ old_addr |= last_addr_mask;
+ new_addr |= last_addr_mask;
continue;
+ }
if (huge_pte_none(huge_ptep_get(src_pte)))
continue;
- /* old_addr arg to huge_pmd_unshare() is a pointer and so the
- * arg may be modified. Pass a copy instead to preserve the
- * value in old_addr.
- */
- old_addr_copy = old_addr;
-
- if (huge_pmd_unshare(mm, vma, &old_addr_copy, src_pte)) {
+ if (huge_pmd_unshare(mm, vma, old_addr, src_pte)) {
shared_pmd = true;
+ old_addr |= last_addr_mask;
+ new_addr |= last_addr_mask;
continue;
}
@@ -4999,6 +4999,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
struct mmu_notifier_range range;
+ unsigned long last_addr_mask;
bool force_flush = false;
WARN_ON(!is_vm_hugetlb_page(vma));
@@ -5019,17 +5020,21 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
end);
adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
mmu_notifier_invalidate_range_start(&range);
+ last_addr_mask = hugetlb_mask_last_page(h);
address = start;
for (; address < end; address += sz) {
ptep = huge_pte_offset(mm, address, sz);
- if (!ptep)
+ if (!ptep) {
+ address |= last_addr_mask;
continue;
+ }
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, vma, &address, ptep)) {
+ if (huge_pmd_unshare(mm, vma, address, ptep)) {
spin_unlock(ptl);
tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
force_flush = true;
+ address |= last_addr_mask;
continue;
}
@@ -5709,7 +5714,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*/
entry = huge_ptep_get(ptep);
if (unlikely(is_hugetlb_entry_migration(entry))) {
- migration_entry_wait_huge(vma, mm, ptep);
+ migration_entry_wait_huge(vma, ptep);
return 0;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
@@ -6046,8 +6051,6 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
- (void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte,
- dst_vma->vm_flags & VM_WRITE);
hugetlb_count_add(pages_per_huge_page(h), dst_mm);
/* No need to invalidate - it was non-present before */
@@ -6299,6 +6302,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long pages = 0, psize = huge_page_size(h);
bool shared_pmd = false;
struct mmu_notifier_range range;
+ unsigned long last_addr_mask;
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
@@ -6315,14 +6319,17 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
flush_cache_range(vma, range.start, range.end);
mmu_notifier_invalidate_range_start(&range);
+ last_addr_mask = hugetlb_mask_last_page(h);
i_mmap_lock_write(vma->vm_file->f_mapping);
for (; address < end; address += psize) {
spinlock_t *ptl;
ptep = huge_pte_offset(mm, address, psize);
- if (!ptep)
+ if (!ptep) {
+ address |= last_addr_mask;
continue;
+ }
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, vma, &address, ptep)) {
+ if (huge_pmd_unshare(mm, vma, address, ptep)) {
/*
* When uffd-wp is enabled on the vma, unshare
* shouldn't happen at all. Warn about it if it
@@ -6332,6 +6339,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
pages++;
spin_unlock(ptl);
shared_pmd = true;
+ address |= last_addr_mask;
continue;
}
pte = huge_ptep_get(ptep);
@@ -6357,8 +6365,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
newpte = pte_swp_mkuffd_wp(newpte);
else if (uffd_wp_resolve)
newpte = pte_swp_clear_uffd_wp(newpte);
- set_huge_swap_pte_at(mm, address, ptep,
- newpte, psize);
+ set_huge_pte_at(mm, address, ptep, newpte);
pages++;
}
spin_unlock(ptl);
@@ -6755,11 +6762,11 @@ out:
* 0 the underlying pte page is not shared, or it is the last user
*/
int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long *addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep)
{
- pgd_t *pgd = pgd_offset(mm, *addr);
- p4d_t *p4d = p4d_offset(pgd, *addr);
- pud_t *pud = pud_offset(p4d, *addr);
+ pgd_t *pgd = pgd_offset(mm, addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ pud_t *pud = pud_offset(p4d, addr);
i_mmap_assert_write_locked(vma->vm_file->f_mapping);
BUG_ON(page_count(virt_to_page(ptep)) == 0);
@@ -6769,14 +6776,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
pud_clear(pud);
put_page(virt_to_page(ptep));
mm_dec_nr_pmds(mm);
- /*
- * This update of passed address optimizes loops sequentially
- * processing addresses in increments of huge page size (PMD_SIZE
- * in this case). By clearing the pud, a PUD_SIZE area is unmapped.
- * Update address to the 'last page' in the cleared area so that
- * calling loop can move to first page past this area.
- */
- *addr |= PUD_SIZE - PMD_SIZE;
return 1;
}
@@ -6788,7 +6787,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
}
int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long *addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep)
{
return 0;
}
@@ -6871,6 +6870,37 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return (pte_t *)pmd;
}
+/*
+ * Return a mask that can be used to update an address to the last huge
+ * page in a page table page mapping size. Used to skip non-present
+ * page table entries when linearly scanning address ranges. Architectures
+ * with unique huge page to page table relationships can define their own
+ * version of this routine.
+ */
+unsigned long hugetlb_mask_last_page(struct hstate *h)
+{
+ unsigned long hp_size = huge_page_size(h);
+
+ if (hp_size == PUD_SIZE)
+ return P4D_SIZE - PUD_SIZE;
+ else if (hp_size == PMD_SIZE)
+ return PUD_SIZE - PMD_SIZE;
+ else
+ return 0UL;
+}
+
+#else
+
+/* See description above. Architectures can provide their own version. */
+__weak unsigned long hugetlb_mask_last_page(struct hstate *h)
+{
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+ if (huge_page_size(h) == PMD_SIZE)
+ return PUD_SIZE - PMD_SIZE;
+#endif
+ return 0UL;
+}
+
#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
/*
@@ -6934,7 +6964,7 @@ retry:
} else {
if (is_hugetlb_entry_migration(pte)) {
spin_unlock(ptl);
- __migration_entry_wait(mm, (pte_t *)pmd, ptl);
+ __migration_entry_wait_huge((pte_t *)pmd, ptl);
goto retry;
}
/*
@@ -6966,15 +6996,15 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla
return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
}
-bool isolate_huge_page(struct page *page, struct list_head *list)
+int isolate_hugetlb(struct page *page, struct list_head *list)
{
- bool ret = true;
+ int ret = 0;
spin_lock_irq(&hugetlb_lock);
if (!PageHeadHuge(page) ||
!HPageMigratable(page) ||
!get_page_unless_zero(page)) {
- ret = false;
+ ret = -EBUSY;
goto unlock;
}
ClearHPageMigratable(page);
@@ -7094,14 +7124,11 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
mmu_notifier_invalidate_range_start(&range);
i_mmap_lock_write(vma->vm_file->f_mapping);
for (address = start; address < end; address += PUD_SIZE) {
- unsigned long tmp = address;
-
ptep = huge_pte_offset(mm, address, sz);
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
- /* We don't want 'address' to be changed */
- huge_pmd_unshare(mm, vma, &tmp, ptep);
+ huge_pmd_unshare(mm, vma, address, ptep);
spin_unlock(ptl);
}
flush_hugetlb_tlb_range(vma, start, end);