Skip to content

Commit 6dfeaff

Browse files
xzpetertorvalds
authored andcommitted
hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp
Huge pmd sharing for hugetlbfs is racy with userfaultfd-wp because userfaultfd-wp is always based on pgtable entries, so they cannot be shared. Walk the hugetlb range and unshare all such mappings if there is, right before UFFDIO_REGISTER will succeed and return to userspace. This will pair with want_pmd_share() in hugetlb code so that huge pmd sharing is completely disabled for userfaultfd-wp registered range. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Peter Xu <[email protected]> Reviewed-by: Mike Kravetz <[email protected]> Cc: Peter Xu <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Axel Rasmussen <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Kirill A. Shutemov <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Adam Ruprecht <[email protected]> Cc: Alexander Viro <[email protected]> Cc: Alexey Dobriyan <[email protected]> Cc: Anshuman Khandual <[email protected]> Cc: Cannon Matthews <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Chinwen Chang <[email protected]> Cc: David Rientjes <[email protected]> Cc: "Dr . David Alan Gilbert" <[email protected]> Cc: Huang Ying <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jann Horn <[email protected]> Cc: Jerome Glisse <[email protected]> Cc: Lokesh Gidra <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: "Michal Koutn" <[email protected]> Cc: Michel Lespinasse <[email protected]> Cc: Mina Almasry <[email protected]> Cc: Nicholas Piggin <[email protected]> Cc: Oliver Upton <[email protected]> Cc: Shaohua Li <[email protected]> Cc: Shawn Anastasio <[email protected]> Cc: Steven Price <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Vlastimil Babka <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 537cf30 commit 6dfeaff

File tree

3 files changed

+58
-0
lines changed

3 files changed

+58
-0
lines changed

fs/userfaultfd.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/sched/signal.h>
1616
#include <linux/sched/mm.h>
1717
#include <linux/mm.h>
18+
#include <linux/mmu_notifier.h>
1819
#include <linux/poll.h>
1920
#include <linux/slab.h>
2021
#include <linux/seq_file.h>
@@ -1449,6 +1450,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
14491450
vma->vm_flags = new_flags;
14501451
vma->vm_userfaultfd_ctx.ctx = ctx;
14511452

1453+
if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
1454+
hugetlb_unshare_all_pmds(vma);
1455+
14521456
skip:
14531457
prev = vma;
14541458
start = vma->vm_end;

include/linux/hugetlb.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
188188
unsigned long address, unsigned long end, pgprot_t newprot);
189189

190190
bool is_hugetlb_entry_migration(pte_t pte);
191+
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
191192

192193
#else /* !CONFIG_HUGETLB_PAGE */
193194

@@ -369,6 +370,8 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
369370
return 0;
370371
}
371372

373+
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
374+
372375
#endif /* !CONFIG_HUGETLB_PAGE */
373376
/*
374377
* hugepages at page global directory. If arch support

mm/hugetlb.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5691,6 +5691,57 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
56915691
}
56925692
}
56935693

5694+
/*
5695+
* This function will unconditionally remove all the shared pmd pgtable entries
5696+
* within the specific vma for a hugetlbfs memory range.
5697+
*/
5698+
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
5699+
{
5700+
struct hstate *h = hstate_vma(vma);
5701+
unsigned long sz = huge_page_size(h);
5702+
struct mm_struct *mm = vma->vm_mm;
5703+
struct mmu_notifier_range range;
5704+
unsigned long address, start, end;
5705+
spinlock_t *ptl;
5706+
pte_t *ptep;
5707+
5708+
if (!(vma->vm_flags & VM_MAYSHARE))
5709+
return;
5710+
5711+
start = ALIGN(vma->vm_start, PUD_SIZE);
5712+
end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
5713+
5714+
if (start >= end)
5715+
return;
5716+
5717+
/*
5718+
* No need to call adjust_range_if_pmd_sharing_possible(), because
5719+
* we have already done the PUD_SIZE alignment.
5720+
*/
5721+
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
5722+
start, end);
5723+
mmu_notifier_invalidate_range_start(&range);
5724+
i_mmap_lock_write(vma->vm_file->f_mapping);
5725+
for (address = start; address < end; address += PUD_SIZE) {
5726+
unsigned long tmp = address;
5727+
5728+
ptep = huge_pte_offset(mm, address, sz);
5729+
if (!ptep)
5730+
continue;
5731+
ptl = huge_pte_lock(h, mm, ptep);
5732+
/* We don't want 'address' to be changed */
5733+
huge_pmd_unshare(mm, vma, &tmp, ptep);
5734+
spin_unlock(ptl);
5735+
}
5736+
flush_hugetlb_tlb_range(vma, start, end);
5737+
i_mmap_unlock_write(vma->vm_file->f_mapping);
5738+
/*
5739+
* No need to call mmu_notifier_invalidate_range(), see
5740+
* Documentation/vm/mmu_notifier.rst.
5741+
*/
5742+
mmu_notifier_invalidate_range_end(&range);
5743+
}
5744+
56945745
#ifdef CONFIG_CMA
56955746
static bool cma_reserve_called __initdata;
56965747

0 commit comments

Comments
 (0)