Skip to content

Commit d7056d3

Browse files
committed
mm/hugetlb: unshare page tables during VMA split, not before
jira VULN-71585 cve CVE-2025-38084 commit-author Jann Horn <[email protected]> commit 081056d upstream-diff Stable 5.15 backport 366298f2b04d2bf1f2f2b7078405bdf9df9bd5d0 was used for the actual (clean) cherry-pick Currently, __split_vma() triggers hugetlb page table unsharing through vm_ops->may_split(). This happens before the VMA lock and rmap locks are taken - which is too early, it allows racing VMA-locked page faults in our process and racing rmap walks from other processes to cause page tables to be shared again before we actually perform the split. Fix it by explicitly calling into the hugetlb unshare logic from __split_vma() in the same place where THP splitting also happens. At that point, both the VMA and the rmap(s) are write-locked. An annoying detail is that we can now call into the helper hugetlb_unshare_pmds() from two different locking contexts: 1. from hugetlb_split(), holding: - mmap lock (exclusively) - VMA lock - file rmap lock (exclusively) 2. hugetlb_unshare_all_pmds(), which I think is designed to be able to call us with only the mmap lock held (in shared mode), but currently only runs while holding mmap lock (exclusively) and VMA lock Backporting note: This commit fixes a racy protection that was introduced in commit b30c14c ("hugetlb: unshare some PMDs when splitting VMAs"); that commit claimed to fix an issue introduced in 5.13, but it should actually also go all the way back. [[email protected]: v2] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Fixes: 39dde65 ("[PATCH] shared page table for hugetlb page") Signed-off-by: Jann Horn <[email protected]> Cc: Liam Howlett <[email protected]> Reviewed-by: Lorenzo Stoakes <[email protected]> Reviewed-by: Oscar Salvador <[email protected]> Cc: Lorenzo Stoakes <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: <[email protected]> [b30c14c: hugetlb: unshare some PMDs when splitting VMAs] Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]> (cherry picked from commit 081056d) Signed-off-by: Marcin Wcisło <[email protected]>
1 parent 6b0f840 commit d7056d3

File tree

3 files changed

+53
-14
lines changed

3 files changed

+53
-14
lines changed

include/linux/hugetlb.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
215215

216216
bool is_hugetlb_entry_migration(pte_t pte);
217217
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
218+
void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
218219

219220
#else /* !CONFIG_HUGETLB_PAGE */
220221

@@ -420,6 +421,8 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
420421

421422
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
422423

424+
static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
425+
423426
#endif /* !CONFIG_HUGETLB_PAGE */
424427
/*
425428
* hugepages at page global directory. If arch support

mm/hugetlb.c

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
9696
/* Forward declaration */
9797
static int hugetlb_acct_memory(struct hstate *h, long delta);
9898
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
99-
unsigned long start, unsigned long end);
99+
unsigned long start, unsigned long end, bool take_locks);
100100

101101
static inline bool subpool_is_free(struct hugepage_subpool *spool)
102102
{
@@ -4630,26 +4630,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
46304630
{
46314631
if (addr & ~(huge_page_mask(hstate_vma(vma))))
46324632
return -EINVAL;
4633+
return 0;
4634+
}
46334635

4636+
void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
4637+
{
46344638
/*
46354639
* PMD sharing is only possible for PUD_SIZE-aligned address ranges
46364640
* in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
46374641
* split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
4642+
* This function is called in the middle of a VMA split operation, with
4643+
* MM, VMA and rmap all write-locked to prevent concurrent page table
4644+
* walks (except hardware and gup_fast()).
46384645
*/
4646+
mmap_assert_write_locked(vma->vm_mm);
4647+
i_mmap_assert_write_locked(vma->vm_file->f_mapping);
4648+
46394649
if (addr & ~PUD_MASK) {
4640-
/*
4641-
* hugetlb_vm_op_split is called right before we attempt to
4642-
* split the VMA. We will need to unshare PMDs in the old and
4643-
* new VMAs, so let's unshare before we split.
4644-
*/
46454650
unsigned long floor = addr & PUD_MASK;
46464651
unsigned long ceil = floor + PUD_SIZE;
46474652

4648-
if (floor >= vma->vm_start && ceil <= vma->vm_end)
4649-
hugetlb_unshare_pmds(vma, floor, ceil);
4653+
if (floor >= vma->vm_start && ceil <= vma->vm_end) {
4654+
/*
4655+
* Locking:
4656+
* Use take_locks=false here.
4657+
* The file rmap lock is already held.
4658+
* The hugetlb VMA lock can't be taken when we already
4659+
* hold the file rmap lock, and we don't need it because
4660+
* its purpose is to synchronize against concurrent page
4661+
* table walks, which are not possible thanks to the
4662+
* locks held by our caller.
4663+
*/
4664+
hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
4665+
}
46504666
}
4651-
4652-
return 0;
46534667
}
46544668

46554669
static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
@@ -7055,9 +7069,16 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
70557069
}
70567070
}
70577071

7072+
/*
7073+
* If @take_locks is false, the caller must ensure that no concurrent page table
7074+
* access can happen (except for gup_fast() and hardware page walks).
7075+
* If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
7076+
* concurrent page fault handling) and the file rmap lock.
7077+
*/
70587078
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
70597079
unsigned long start,
7060-
unsigned long end)
7080+
unsigned long end,
7081+
bool take_locks)
70617082
{
70627083
struct hstate *h = hstate_vma(vma);
70637084
unsigned long sz = huge_page_size(h);
@@ -7081,7 +7102,11 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
70817102
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
70827103
start, end);
70837104
mmu_notifier_invalidate_range_start(&range);
7084-
i_mmap_lock_write(vma->vm_file->f_mapping);
7105+
if (take_locks) {
7106+
i_mmap_lock_write(vma->vm_file->f_mapping);
7107+
} else {
7108+
i_mmap_assert_write_locked(vma->vm_file->f_mapping);
7109+
}
70857110
for (address = start; address < end; address += PUD_SIZE) {
70867111
unsigned long tmp = address;
70877112

@@ -7094,7 +7119,9 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
70947119
spin_unlock(ptl);
70957120
}
70967121
flush_hugetlb_tlb_range(vma, start, end);
7097-
i_mmap_unlock_write(vma->vm_file->f_mapping);
7122+
if (take_locks) {
7123+
i_mmap_unlock_write(vma->vm_file->f_mapping);
7124+
}
70987125
/*
70997126
* No need to call mmu_notifier_invalidate_range(), see
71007127
* Documentation/vm/mmu_notifier.rst.
@@ -7109,7 +7136,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
71097136
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
71107137
{
71117138
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
7112-
ALIGN_DOWN(vma->vm_end, PUD_SIZE));
7139+
ALIGN_DOWN(vma->vm_end, PUD_SIZE),
7140+
/* take_locks = */ true);
71137141
}
71147142

71157143
#ifdef CONFIG_CMA

mm/mmap.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,15 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
851851
}
852852
}
853853
again:
854+
/*
855+
* Get rid of huge pages and shared page tables straddling the split
856+
* boundary.
857+
*/
854858
vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
859+
if (is_vm_hugetlb_page(orig_vma)) {
860+
hugetlb_split(orig_vma, start);
861+
hugetlb_split(orig_vma, end);
862+
}
855863

856864
if (file) {
857865
mapping = file->f_mapping;

0 commit comments

Comments
 (0)