@@ -314,8 +314,11 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
314314 if (!pmd_present (_pmd ))
315315 goto out ;
316316
317- if (pmd_trans_huge (_pmd ))
317+ if (pmd_trans_huge (_pmd )) {
318+ if (!pmd_write (_pmd ) && (reason & VM_UFFD_WP ))
319+ ret = true;
318320 goto out ;
321+ }
319322
320323 /*
321324 * the pmd is stable (as in !pmd_trans_unstable) so we can re-read it
@@ -328,6 +331,8 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
328331 */
329332 if (pte_none (* pte ))
330333 ret = true;
334+ if (!pte_write (* pte ) && (reason & VM_UFFD_WP ))
335+ ret = true;
331336 pte_unmap (pte );
332337
333338out :
@@ -1287,10 +1292,13 @@ static __always_inline int validate_range(struct mm_struct *mm,
12871292 return 0 ;
12881293}
12891294
1290- static inline bool vma_can_userfault (struct vm_area_struct * vma )
1295+ static inline bool vma_can_userfault (struct vm_area_struct * vma ,
1296+ unsigned long vm_flags )
12911297{
1292- return vma_is_anonymous (vma ) || is_vm_hugetlb_page (vma ) ||
1293- vma_is_shmem (vma );
1298+ /* FIXME: add WP support to hugetlbfs and shmem */
1299+ return vma_is_anonymous (vma ) ||
1300+ ((is_vm_hugetlb_page (vma ) || vma_is_shmem (vma )) &&
1301+ !(vm_flags & VM_UFFD_WP ));
12941302}
12951303
12961304static int userfaultfd_register (struct userfaultfd_ctx * ctx ,
@@ -1322,15 +1330,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
13221330 vm_flags = 0 ;
13231331 if (uffdio_register .mode & UFFDIO_REGISTER_MODE_MISSING )
13241332 vm_flags |= VM_UFFD_MISSING ;
1325- if (uffdio_register .mode & UFFDIO_REGISTER_MODE_WP ) {
1333+ if (uffdio_register .mode & UFFDIO_REGISTER_MODE_WP )
13261334 vm_flags |= VM_UFFD_WP ;
1327- /*
1328- * FIXME: remove the below error constraint by
1329- * implementing the wprotect tracking mode.
1330- */
1331- ret = - EINVAL ;
1332- goto out ;
1333- }
13341335
13351336 ret = validate_range (mm , & uffdio_register .range .start ,
13361337 uffdio_register .range .len );
@@ -1380,7 +1381,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
13801381
13811382 /* check not compatible vmas */
13821383 ret = - EINVAL ;
1383- if (!vma_can_userfault (cur ))
1384+ if (!vma_can_userfault (cur , vm_flags ))
13841385 goto out_unlock ;
13851386
13861387 /*
@@ -1408,6 +1409,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
14081409 if (end & (vma_hpagesize - 1 ))
14091410 goto out_unlock ;
14101411 }
1412+ if ((vm_flags & VM_UFFD_WP ) && !(cur -> vm_flags & VM_MAYWRITE ))
1413+ goto out_unlock ;
14111414
14121415 /*
14131416 * Check that this vma isn't already owned by a
@@ -1437,7 +1440,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
14371440 do {
14381441 cond_resched ();
14391442
1440- BUG_ON (!vma_can_userfault (vma ));
1443+ BUG_ON (!vma_can_userfault (vma , vm_flags ));
14411444 BUG_ON (vma -> vm_userfaultfd_ctx .ctx &&
14421445 vma -> vm_userfaultfd_ctx .ctx != ctx );
14431446 WARN_ON (!(vma -> vm_flags & VM_MAYWRITE ));
@@ -1575,7 +1578,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
15751578 * provides for more strict behavior to notice
15761579 * unregistration errors.
15771580 */
1578- if (!vma_can_userfault (cur ))
1581+ if (!vma_can_userfault (cur , cur -> vm_flags ))
15791582 goto out_unlock ;
15801583
15811584 found = true;
@@ -1589,7 +1592,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
15891592 do {
15901593 cond_resched ();
15911594
1592- BUG_ON (!vma_can_userfault (vma ));
1595+ BUG_ON (!vma_can_userfault (vma , vma -> vm_flags ));
15931596
15941597 /*
15951598 * Nothing to do: this vma is already registered into this
@@ -1802,6 +1805,50 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
18021805 return ret ;
18031806}
18041807
1808+ static int userfaultfd_writeprotect (struct userfaultfd_ctx * ctx ,
1809+ unsigned long arg )
1810+ {
1811+ int ret ;
1812+ struct uffdio_writeprotect uffdio_wp ;
1813+ struct uffdio_writeprotect __user * user_uffdio_wp ;
1814+ struct userfaultfd_wake_range range ;
1815+
1816+ if (READ_ONCE (ctx -> mmap_changing ))
1817+ return - EAGAIN ;
1818+
1819+ user_uffdio_wp = (struct uffdio_writeprotect __user * ) arg ;
1820+
1821+ if (copy_from_user (& uffdio_wp , user_uffdio_wp ,
1822+ sizeof (struct uffdio_writeprotect )))
1823+ return - EFAULT ;
1824+
1825+ ret = validate_range (ctx -> mm , & uffdio_wp .range .start ,
1826+ uffdio_wp .range .len );
1827+ if (ret )
1828+ return ret ;
1829+
1830+ if (uffdio_wp .mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE |
1831+ UFFDIO_WRITEPROTECT_MODE_WP ))
1832+ return - EINVAL ;
1833+ if ((uffdio_wp .mode & UFFDIO_WRITEPROTECT_MODE_WP ) &&
1834+ (uffdio_wp .mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE ))
1835+ return - EINVAL ;
1836+
1837+ ret = mwriteprotect_range (ctx -> mm , uffdio_wp .range .start ,
1838+ uffdio_wp .range .len , uffdio_wp .mode &
1839+ UFFDIO_WRITEPROTECT_MODE_WP ,
1840+ & ctx -> mmap_changing );
1841+ if (ret )
1842+ return ret ;
1843+
1844+ if (!(uffdio_wp .mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE )) {
1845+ range .start = uffdio_wp .range .start ;
1846+ range .len = uffdio_wp .range .len ;
1847+ wake_userfault (ctx , & range );
1848+ }
1849+ return ret ;
1850+ }
1851+
18051852static inline unsigned int uffd_ctx_features (__u64 user_features )
18061853{
18071854 /*
@@ -1883,6 +1930,9 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
18831930 case UFFDIO_ZEROPAGE :
18841931 ret = userfaultfd_zeropage (ctx , arg );
18851932 break ;
1933+ case UFFDIO_WRITEPROTECT :
1934+ ret = userfaultfd_writeprotect (ctx , arg );
1935+ break ;
18861936 }
18871937 return ret ;
18881938}
0 commit comments