Skip to content

Commit bfccacd

Browse files
committed
Merge branch 'allow-mmap-of-sys-kernel-btf-vmlinux'
Lorenz Bauer says: ==================== Allow mmap of /sys/kernel/btf/vmlinux I'd like to cut down the memory usage of parsing vmlinux BTF in ebpf-go. With some upcoming changes the library is sitting at 5MiB for a parse. Most of that memory is simply copying the BTF blob into user space. By allowing vmlinux BTF to be mmapped read-only into user space I can cut memory usage by about 75%. Signed-off-by: Lorenz Bauer <[email protected]> --- Changes in v5: - Fix error return of btf_parse_raw_mmap (Andrii) - Link to v4: https://lore.kernel.org/r/[email protected] Changes in v4: - Go back to remap_pfn_range for aarch64 compat - Dropped btf_new_no_copy (Andrii) - Fixed nits in selftests (Andrii) - Clearer error handling in the mmap handler (Andrii) - Fixed build on s390 - Link to v3: https://lore.kernel.org/r/[email protected] Changes in v3: - Remove slightly confusing calculation of trailing (Alexei) - Use vm_insert_page (Alexei) - Simplified libbpf code - Link to v2: https://lore.kernel.org/r/[email protected] Changes in v2: - Use btf__new in selftest - Avoid vm_iomap_memory in btf_vmlinux_mmap - Add VM_DONTDUMP - Add support to libbpf - Link to v1: https://lore.kernel.org/r/[email protected] --- ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Andrii Nakryiko <[email protected]>
2 parents 8259eb0 + 3c0421c commit bfccacd

File tree

4 files changed

+186
-19
lines changed

4 files changed

+186
-19
lines changed

include/asm-generic/vmlinux.lds.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
667667
*/
668668
#ifdef CONFIG_DEBUG_INFO_BTF
669669
#define BTF \
670+
. = ALIGN(PAGE_SIZE); \
670671
.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \
671672
BOUNDED_SECTION_BY(.BTF, _BTF) \
672673
} \
673-
. = ALIGN(4); \
674+
. = ALIGN(PAGE_SIZE); \
674675
.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \
675676
*(.BTF_ids) \
676677
}

kernel/bpf/sysfs_btf.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,46 @@
77
#include <linux/kobject.h>
88
#include <linux/init.h>
99
#include <linux/sysfs.h>
10+
#include <linux/mm.h>
11+
#include <linux/io.h>
12+
#include <linux/btf.h>
1013

1114
/* See scripts/link-vmlinux.sh, gen_btf() func for details */
1215
extern char __start_BTF[];
1316
extern char __stop_BTF[];
1417

18+
static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj,
19+
const struct bin_attribute *attr,
20+
struct vm_area_struct *vma)
21+
{
22+
unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT;
23+
size_t vm_size = vma->vm_end - vma->vm_start;
24+
phys_addr_t addr = virt_to_phys(__start_BTF);
25+
unsigned long pfn = addr >> PAGE_SHIFT;
26+
27+
if (attr->private != __start_BTF || !PAGE_ALIGNED(addr))
28+
return -EINVAL;
29+
30+
if (vma->vm_pgoff)
31+
return -EINVAL;
32+
33+
if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_MAYSHARE))
34+
return -EACCES;
35+
36+
if (pfn + pages < pfn)
37+
return -EINVAL;
38+
39+
if ((vm_size >> PAGE_SHIFT) > pages)
40+
return -EINVAL;
41+
42+
vm_flags_mod(vma, VM_DONTDUMP, VM_MAYEXEC | VM_MAYWRITE);
43+
return remap_pfn_range(vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot);
44+
}
45+
1546
static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
1647
.attr = { .name = "vmlinux", .mode = 0444, },
1748
.read_new = sysfs_bin_attr_simple_read,
49+
.mmap = btf_sysfs_vmlinux_mmap,
1850
};
1951

2052
struct kobject *btf_kobj;

tools/lib/bpf/btf.c

Lines changed: 71 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <sys/utsname.h>
1313
#include <sys/param.h>
1414
#include <sys/stat.h>
15+
#include <sys/mman.h>
1516
#include <linux/kernel.h>
1617
#include <linux/err.h>
1718
#include <linux/btf.h>
@@ -120,6 +121,9 @@ struct btf {
120121
/* whether base_btf should be freed in btf_free for this instance */
121122
bool owns_base;
122123

124+
/* whether raw_data is a (read-only) mmap */
125+
bool raw_data_is_mmap;
126+
123127
/* BTF object FD, if loaded into kernel */
124128
int fd;
125129

@@ -951,6 +955,17 @@ static bool btf_is_modifiable(const struct btf *btf)
951955
return (void *)btf->hdr != btf->raw_data;
952956
}
953957

958+
static void btf_free_raw_data(struct btf *btf)
959+
{
960+
if (btf->raw_data_is_mmap) {
961+
munmap(btf->raw_data, btf->raw_size);
962+
btf->raw_data_is_mmap = false;
963+
} else {
964+
free(btf->raw_data);
965+
}
966+
btf->raw_data = NULL;
967+
}
968+
954969
void btf__free(struct btf *btf)
955970
{
956971
if (IS_ERR_OR_NULL(btf))
@@ -970,7 +985,7 @@ void btf__free(struct btf *btf)
970985
free(btf->types_data);
971986
strset__free(btf->strs_set);
972987
}
973-
free(btf->raw_data);
988+
btf_free_raw_data(btf);
974989
free(btf->raw_data_swapped);
975990
free(btf->type_offs);
976991
if (btf->owns_base)
@@ -1030,7 +1045,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf)
10301045
return libbpf_ptr(btf_new_empty(base_btf));
10311046
}
10321047

1033-
static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
1048+
static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap)
10341049
{
10351050
struct btf *btf;
10361051
int err;
@@ -1050,12 +1065,18 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
10501065
btf->start_str_off = base_btf->hdr->str_len;
10511066
}
10521067

1053-
btf->raw_data = malloc(size);
1054-
if (!btf->raw_data) {
1055-
err = -ENOMEM;
1056-
goto done;
1068+
if (is_mmap) {
1069+
btf->raw_data = (void *)data;
1070+
btf->raw_data_is_mmap = true;
1071+
} else {
1072+
btf->raw_data = malloc(size);
1073+
if (!btf->raw_data) {
1074+
err = -ENOMEM;
1075+
goto done;
1076+
}
1077+
memcpy(btf->raw_data, data, size);
10571078
}
1058-
memcpy(btf->raw_data, data, size);
1079+
10591080
btf->raw_size = size;
10601081

10611082
btf->hdr = btf->raw_data;
@@ -1083,12 +1104,12 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
10831104

10841105
struct btf *btf__new(const void *data, __u32 size)
10851106
{
1086-
return libbpf_ptr(btf_new(data, size, NULL));
1107+
return libbpf_ptr(btf_new(data, size, NULL, false));
10871108
}
10881109

10891110
struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
10901111
{
1091-
return libbpf_ptr(btf_new(data, size, base_btf));
1112+
return libbpf_ptr(btf_new(data, size, base_btf, false));
10921113
}
10931114

10941115
struct btf_elf_secs {
@@ -1209,7 +1230,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
12091230

12101231
if (secs.btf_base_data) {
12111232
dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size,
1212-
NULL);
1233+
NULL, false);
12131234
if (IS_ERR(dist_base_btf)) {
12141235
err = PTR_ERR(dist_base_btf);
12151236
dist_base_btf = NULL;
@@ -1218,7 +1239,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
12181239
}
12191240

12201241
btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size,
1221-
dist_base_btf ?: base_btf);
1242+
dist_base_btf ?: base_btf, false);
12221243
if (IS_ERR(btf)) {
12231244
err = PTR_ERR(btf);
12241245
goto done;
@@ -1335,7 +1356,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
13351356
}
13361357

13371358
/* finally parse BTF data */
1338-
btf = btf_new(data, sz, base_btf);
1359+
btf = btf_new(data, sz, base_btf, false);
13391360

13401361
err_out:
13411362
free(data);
@@ -1354,6 +1375,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
13541375
return libbpf_ptr(btf_parse_raw(path, base_btf));
13551376
}
13561377

1378+
static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf)
1379+
{
1380+
struct stat st;
1381+
void *data;
1382+
struct btf *btf;
1383+
int fd, err;
1384+
1385+
fd = open(path, O_RDONLY);
1386+
if (fd < 0)
1387+
return libbpf_err_ptr(-errno);
1388+
1389+
if (fstat(fd, &st) < 0) {
1390+
err = -errno;
1391+
close(fd);
1392+
return libbpf_err_ptr(err);
1393+
}
1394+
1395+
data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
1396+
err = -errno;
1397+
close(fd);
1398+
1399+
if (data == MAP_FAILED)
1400+
return libbpf_err_ptr(err);
1401+
1402+
btf = btf_new(data, st.st_size, base_btf, true);
1403+
if (IS_ERR(btf))
1404+
munmap(data, st.st_size);
1405+
1406+
return btf;
1407+
}
1408+
13571409
static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
13581410
{
13591411
struct btf *btf;
@@ -1618,7 +1670,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
16181670
goto exit_free;
16191671
}
16201672

1621-
btf = btf_new(ptr, btf_info.btf_size, base_btf);
1673+
btf = btf_new(ptr, btf_info.btf_size, base_btf, false);
16221674

16231675
exit_free:
16241676
free(ptr);
@@ -1658,10 +1710,8 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
16581710

16591711
static void btf_invalidate_raw_data(struct btf *btf)
16601712
{
1661-
if (btf->raw_data) {
1662-
free(btf->raw_data);
1663-
btf->raw_data = NULL;
1664-
}
1713+
if (btf->raw_data)
1714+
btf_free_raw_data(btf);
16651715
if (btf->raw_data_swapped) {
16661716
free(btf->raw_data_swapped);
16671717
btf->raw_data_swapped = NULL;
@@ -5331,7 +5381,10 @@ struct btf *btf__load_vmlinux_btf(void)
53315381
pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
53325382
sysfs_btf_path);
53335383
} else {
5334-
btf = btf__parse(sysfs_btf_path, NULL);
5384+
btf = btf_parse_raw_mmap(sysfs_btf_path, NULL);
5385+
if (IS_ERR(btf))
5386+
btf = btf__parse(sysfs_btf_path, NULL);
5387+
53355388
if (!btf) {
53365389
err = -errno;
53375390
pr_warn("failed to read kernel BTF from '%s': %s\n",
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2+
/* Copyright (c) 2025 Isovalent */
3+
4+
#include <test_progs.h>
5+
#include <bpf/btf.h>
6+
#include <sys/stat.h>
7+
#include <sys/mman.h>
8+
#include <fcntl.h>
9+
#include <unistd.h>
10+
11+
static void test_btf_mmap_sysfs(const char *path, struct btf *base)
12+
{
13+
struct stat st;
14+
__u64 btf_size, end;
15+
void *raw_data = NULL;
16+
int fd = -1;
17+
long page_size;
18+
struct btf *btf = NULL;
19+
20+
page_size = sysconf(_SC_PAGESIZE);
21+
if (!ASSERT_GE(page_size, 0, "get_page_size"))
22+
goto cleanup;
23+
24+
if (!ASSERT_OK(stat(path, &st), "stat_btf"))
25+
goto cleanup;
26+
27+
btf_size = st.st_size;
28+
end = (btf_size + page_size - 1) / page_size * page_size;
29+
30+
fd = open(path, O_RDONLY);
31+
if (!ASSERT_GE(fd, 0, "open_btf"))
32+
goto cleanup;
33+
34+
raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
35+
if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable"))
36+
goto cleanup;
37+
38+
raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0);
39+
if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared"))
40+
goto cleanup;
41+
42+
raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0);
43+
if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size"))
44+
goto cleanup;
45+
46+
raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
47+
if (!ASSERT_OK_PTR(raw_data, "mmap_btf"))
48+
goto cleanup;
49+
50+
if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1,
51+
"mprotect_writable"))
52+
goto cleanup;
53+
54+
if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1,
55+
"mprotect_executable"))
56+
goto cleanup;
57+
58+
/* Check padding is zeroed */
59+
for (int i = btf_size; i < end; i++) {
60+
if (((__u8 *)raw_data)[i] != 0) {
61+
PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i);
62+
goto cleanup;
63+
}
64+
}
65+
66+
btf = btf__new_split(raw_data, btf_size, base);
67+
if (!ASSERT_OK_PTR(btf, "parse_btf"))
68+
goto cleanup;
69+
70+
cleanup:
71+
btf__free(btf);
72+
if (raw_data && raw_data != MAP_FAILED)
73+
munmap(raw_data, btf_size);
74+
if (fd >= 0)
75+
close(fd);
76+
}
77+
78+
void test_btf_sysfs(void)
79+
{
80+
test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL);
81+
}

0 commit comments

Comments
 (0)