Skip to content

Commit d09e8ca

Browse files
soleenakpm00
authored andcommitted
mm: anonymous shared memory naming
Since commit 9a10064 ("mm: add a field to store names for private anonymous memory"), name for private anonymous memory, but not shared anonymous, can be set. However, naming shared anonymous memory just as useful for tracking purposes. Extend the functionality to be able to set names for shared anon. There are two ways to create anonymous shared memory, using memfd or directly via mmap(): 1. fd = memfd_create(...) mem = mmap(..., MAP_SHARED, fd, ...) 2. mem = mmap(..., MAP_SHARED | MAP_ANONYMOUS, -1, ...) In both cases the anonymous shared memory is created the same way by mapping an unlinked file on tmpfs. The memfd way allows to give a name for anonymous shared memory, but not useful when parts of shared memory require to have distinct names. Example use case: The VMM maps VM memory as anonymous shared memory (not private because VMM is sandboxed and drivers are running in their own processes). However, the VM tells back to the VMM how parts of the memory are actually used by the guest, how each of the segments should be backed (i.e. 4K pages, 2M pages), and some other information about the segments. The naming allows us to monitor the effective memory footprint for each of these segments from the host without looking inside the guest. Sample output: /* Create shared anonymous segmenet */ anon_shmem = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); /* Name the segment: "MY-NAME" */ rv = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, anon_shmem, SIZE, "MY-NAME"); cat /proc/<pid>/maps (and smaps): 7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 [anon_shmem:MY-NAME] If the segment is not named, the output is: 7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 /dev/zero (deleted) Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Pasha Tatashin <[email protected]> Acked-by: David Hildenbrand <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Bagas Sanjaya <[email protected]> Cc: Colin Cross <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Jonathan Corbet <[email protected]> Cc: "Kirill A . Shutemov" <[email protected]> Cc: Liam Howlett <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Paul Gortmaker <[email protected]> Cc: Peter Xu <[email protected]> Cc: Sean Christopherson <[email protected]> Cc: Vincent Whitchurch <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: xu xin <[email protected]> Cc: Yang Shi <[email protected]> Cc: Yu Zhao <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent b7217a0 commit d09e8ca

File tree

6 files changed

+57
-30
lines changed

6 files changed

+57
-30
lines changed

Documentation/filesystems/proc.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -426,14 +426,16 @@ with the memory region, as the case would be with BSS (uninitialized data).
426426
The "pathname" shows the name associated file for this mapping. If the mapping
427427
is not associated with a file:
428428

429-
============= ====================================
429+
=================== ===========================================
430430
[heap] the heap of the program
431431
[stack] the stack of the main process
432432
[vdso] the "virtual dynamic shared object",
433433
the kernel system call handler
434-
[anon:<name>] an anonymous mapping that has been
434+
[anon:<name>] a private anonymous mapping that has been
435435
named by userspace
436-
============= ====================================
436+
[anon_shmem:<name>] an anonymous shared memory mapping that has
437+
been named by userspace
438+
=================== ===========================================
437439

438440
or if empty, the mapping is anonymous.
439441

fs/proc/task_mmu.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ static void show_vma_header_prefix(struct seq_file *m,
274274
static void
275275
show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
276276
{
277+
struct anon_vma_name *anon_name = NULL;
277278
struct mm_struct *mm = vma->vm_mm;
278279
struct file *file = vma->vm_file;
279280
vm_flags_t flags = vma->vm_flags;
@@ -293,14 +294,23 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
293294
start = vma->vm_start;
294295
end = vma->vm_end;
295296
show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
297+
if (mm)
298+
anon_name = anon_vma_name(vma);
296299

297300
/*
298301
* Print the dentry name for named mappings, and a
299302
* special [heap] marker for the heap:
300303
*/
301304
if (file) {
302305
seq_pad(m, ' ');
303-
seq_file_path(m, file, "\n");
306+
/*
307+
* If user named this anon shared memory via
308+
* prctl(PR_SET_VMA ..., use the provided name.
309+
*/
310+
if (anon_name)
311+
seq_printf(m, "[anon_shmem:%s]", anon_name->name);
312+
else
313+
seq_file_path(m, file, "\n");
304314
goto done;
305315
}
306316

@@ -312,8 +322,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
312322

313323
name = arch_vma_name(vma);
314324
if (!name) {
315-
struct anon_vma_name *anon_name;
316-
317325
if (!mm) {
318326
name = "[vdso]";
319327
goto done;
@@ -330,7 +338,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
330338
goto done;
331339
}
332340

333-
anon_name = anon_vma_name(vma);
334341
if (anon_name) {
335342
seq_pad(m, ' ');
336343
seq_printf(m, "[anon:%s]", anon_name->name);

include/linux/mm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,8 +700,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
700700
* paths in userfault.
701701
*/
702702
bool vma_is_shmem(struct vm_area_struct *vma);
703+
bool vma_is_anon_shmem(struct vm_area_struct *vma);
703704
#else
704705
static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
706+
static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
705707
#endif
706708

707709
int vma_is_stack_for_current(struct vm_area_struct *vma);

include/linux/mm_types.h

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -549,21 +549,11 @@ struct vm_area_struct {
549549
* For areas with an address space and backing store,
550550
* linkage into the address_space->i_mmap interval tree.
551551
*
552-
* For private anonymous mappings, a pointer to a null terminated string
553-
* containing the name given to the vma, or NULL if unnamed.
554552
*/
555-
556-
union {
557-
struct {
558-
struct rb_node rb;
559-
unsigned long rb_subtree_last;
560-
} shared;
561-
/*
562-
* Serialized by mmap_sem. Never use directly because it is
563-
* valid only when vm_file is NULL. Use anon_vma_name instead.
564-
*/
565-
struct anon_vma_name *anon_name;
566-
};
553+
struct {
554+
struct rb_node rb;
555+
unsigned long rb_subtree_last;
556+
} shared;
567557

568558
/*
569559
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@ -584,6 +574,14 @@ struct vm_area_struct {
584574
struct file * vm_file; /* File we map to (can be NULL). */
585575
void * vm_private_data; /* was vm_pte (shared mem) */
586576

577+
#ifdef CONFIG_ANON_VMA_NAME
578+
/*
579+
* For private and shared anonymous mappings, a pointer to a null
580+
* terminated string containing the name given to the vma, or NULL if
581+
* unnamed. Serialized by mmap_sem. Use anon_vma_name to access.
582+
*/
583+
struct anon_vma_name *anon_name;
584+
#endif
587585
#ifdef CONFIG_SWAP
588586
atomic_long_t swap_readahead_info;
589587
#endif

mm/madvise.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,6 @@ struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
9595
{
9696
mmap_assert_locked(vma->vm_mm);
9797

98-
if (vma->vm_file)
99-
return NULL;
100-
10198
return vma->anon_name;
10299
}
103100

@@ -183,7 +180,7 @@ static int madvise_update_vma(struct vm_area_struct *vma,
183180
* vm_flags is protected by the mmap_lock held in write mode.
184181
*/
185182
vma->vm_flags = new_flags;
186-
if (!vma->vm_file) {
183+
if (!vma->vm_file || vma_is_anon_shmem(vma)) {
187184
error = replace_anon_vma_name(vma, anon_name);
188185
if (error)
189186
return error;
@@ -1273,7 +1270,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
12731270
int error;
12741271

12751272
/* Only anonymous mappings can be named */
1276-
if (vma->vm_file)
1273+
if (vma->vm_file && !vma_is_anon_shmem(vma))
12771274
return -EBADF;
12781275

12791276
error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,

mm/shmem.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -237,11 +237,17 @@ static const struct inode_operations shmem_inode_operations;
237237
static const struct inode_operations shmem_dir_inode_operations;
238238
static const struct inode_operations shmem_special_inode_operations;
239239
static const struct vm_operations_struct shmem_vm_ops;
240+
static const struct vm_operations_struct shmem_anon_vm_ops;
240241
static struct file_system_type shmem_fs_type;
241242

243+
bool vma_is_anon_shmem(struct vm_area_struct *vma)
244+
{
245+
return vma->vm_ops == &shmem_anon_vm_ops;
246+
}
247+
242248
bool vma_is_shmem(struct vm_area_struct *vma)
243249
{
244-
return vma->vm_ops == &shmem_vm_ops;
250+
return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops;
245251
}
246252

247253
static LIST_HEAD(shmem_swaplist);
@@ -2263,7 +2269,8 @@ int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
22632269

22642270
static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
22652271
{
2266-
struct shmem_inode_info *info = SHMEM_I(file_inode(file));
2272+
struct inode *inode = file_inode(file);
2273+
struct shmem_inode_info *info = SHMEM_I(inode);
22672274
int ret;
22682275

22692276
ret = seal_check_future_write(info->seals, vma);
@@ -2274,7 +2281,11 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
22742281
vma->vm_flags |= VM_MTE_ALLOWED;
22752282

22762283
file_accessed(file);
2277-
vma->vm_ops = &shmem_vm_ops;
2284+
/* This is anonymous shared memory if it is unlinked at the time of mmap */
2285+
if (inode->i_nlink)
2286+
vma->vm_ops = &shmem_vm_ops;
2287+
else
2288+
vma->vm_ops = &shmem_anon_vm_ops;
22782289
return 0;
22792290
}
22802291

@@ -3988,6 +3999,15 @@ static const struct vm_operations_struct shmem_vm_ops = {
39883999
#endif
39894000
};
39904001

4002+
static const struct vm_operations_struct shmem_anon_vm_ops = {
4003+
.fault = shmem_fault,
4004+
.map_pages = filemap_map_pages,
4005+
#ifdef CONFIG_NUMA
4006+
.set_policy = shmem_set_policy,
4007+
.get_policy = shmem_get_policy,
4008+
#endif
4009+
};
4010+
39914011
int shmem_init_fs_context(struct fs_context *fc)
39924012
{
39934013
struct shmem_options *ctx;
@@ -4163,6 +4183,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
41634183
EXPORT_SYMBOL_GPL(shmem_truncate_range);
41644184

41654185
#define shmem_vm_ops generic_file_vm_ops
4186+
#define shmem_anon_vm_ops generic_file_vm_ops
41664187
#define shmem_file_operations ramfs_file_operations
41674188
#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
41684189
#define shmem_acct_size(flags, size) 0
@@ -4268,7 +4289,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
42684289
if (vma->vm_file)
42694290
fput(vma->vm_file);
42704291
vma->vm_file = file;
4271-
vma->vm_ops = &shmem_vm_ops;
4292+
vma->vm_ops = &shmem_anon_vm_ops;
42724293

42734294
return 0;
42744295
}

0 commit comments

Comments
 (0)