3 files changed, 41 insertions, 4 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c5ed2f1bc015..a2227f73a85c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -41,6 +41,9 @@ int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			unsigned long *, int *, int, unsigned int flags);
 void unmap_hugepage_range(struct vm_area_struct *,
 			unsigned long, unsigned long, struct page *);
+void __unmap_hugepage_range_final(struct vm_area_struct *vma,
+			  unsigned long start, unsigned long end,
+			  struct page *ref_page);
 void __unmap_hugepage_range(struct vm_area_struct *,
 			unsigned long, unsigned long, struct page *);
 int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
@@ -99,6 +102,13 @@ static inline unsigned long hugetlb_total_pages(void)
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
 #define hugetlb_prefault(mapping, vma)		({ BUG(); 0; })
 #define unmap_hugepage_range(vma, start, end, page)	BUG()
+static inline void __unmap_hugepage_range_final(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end,
+			struct page *ref_page)
+{
+	BUG();
+}
+
 static inline void hugetlb_report_meminfo(struct seq_file *m)
 {
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b1e1bad921c1..0f897b89bbc4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2382,6 +2382,25 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	}
 }
 
+void __unmap_hugepage_range_final(struct vm_area_struct *vma,
+			  unsigned long start, unsigned long end,
+			  struct page *ref_page)
+{
+	__unmap_hugepage_range(vma, start, end, ref_page);
+
+	/*
+	 * Clear this flag so that x86's huge_pmd_share page_table_shareable
+	 * test will fail on a vma being torn down, and not grab a page table
+	 * on its way out.  We're lucky that the flag has such an appropriate
+	 * name, and can in fact be safely cleared here. We could clear it
+	 * before the __unmap_hugepage_range above, but all that's necessary
+	 * is to clear it before releasing the i_mmap_mutex. This works
+	 * because in the context this is called, the VMA is about to be
+	 * destroyed and the i_mmap_mutex is held.
+	 */
+	vma->vm_flags &= ~VM_MAYSHARE;
+}
+
 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 			  unsigned long end, struct page *ref_page)
 {
@@ -2939,9 +2958,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
-	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
-
+	/*
+	 * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
+	 * may have cleared our pud entry and done put_page on the page table:
+	 * once we release i_mmap_mutex, another task can do the final put_page
+	 * and that page table be reused and filled with junk.
+	 */
 	flush_tlb_range(vma, start, end);
+	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
 }
 
 int hugetlb_reserve_pages(struct inode *inode,
diff --git a/mm/memory.c b/mm/memory.c
index 1b1ca176397e..70f5dafabb2a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1358,8 +1358,11 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
 				 * Since no pte has actually been setup, it is
 				 * safe to do nothing in this case.
 				 */
-				if (vma->vm_file)
-					unmap_hugepage_range(vma, start, end, NULL);
+				if (vma->vm_file) {
+					mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
+					__unmap_hugepage_range_final(vma, start, end, NULL);
+					mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
+				}
 
 				start = end;
 			} else