Skip to content

Commit 31a304c

Browse files
lmbkernel-patches-bot
authored andcommitted
Add bpf_iter support for sockmap / sockhash, based on the bpf_sk_storage and
hashtable implementation. sockmap and sockhash share the same iteration context: a pointer to an arbitrary key and a pointer to a socket. Both pointers may be NULL, and so BPF has to perform a NULL check before accessing them. Technically it's not possible for sockhash iteration to yield a NULL socket, but we ignore this to be able to use a single iteration point. Iteration will visit all keys that remain unmodified during the lifetime of the iterator. It may or may not visit newly added ones. Switch from using rcu_dereference_raw to plain rcu_dereference, so we gain another guard rail if CONFIG_PROVE_RCU is enabled. Signed-off-by: Lorenz Bauer <[email protected]> --- net/core/sock_map.c | 280 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 278 insertions(+), 2 deletions(-)
1 parent 705843c commit 31a304c

File tree

1 file changed

+278
-2
lines changed

1 file changed

+278
-2
lines changed

net/core/sock_map.c

Lines changed: 278 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
33

44
#include <linux/bpf.h>
5+
#include <linux/btf_ids.h>
56
#include <linux/filter.h>
67
#include <linux/errno.h>
78
#include <linux/file.h>
@@ -703,6 +704,109 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
703704
.arg4_type = ARG_ANYTHING,
704705
};
705706

707+
struct sock_map_seq_info {
708+
struct bpf_map *map;
709+
struct sock *sk;
710+
u32 index;
711+
};
712+
713+
struct bpf_iter__sockmap {
714+
__bpf_md_ptr(struct bpf_iter_meta *, meta);
715+
__bpf_md_ptr(struct bpf_map *, map);
716+
__bpf_md_ptr(void *, key);
717+
__bpf_md_ptr(struct sock *, sk);
718+
};
719+
720+
DEFINE_BPF_ITER_FUNC(sockmap, struct bpf_iter_meta *meta,
721+
struct bpf_map *map, void *key,
722+
struct sock *sk)
723+
724+
static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info)
725+
{
726+
if (unlikely(info->index >= info->map->max_entries))
727+
return NULL;
728+
729+
info->sk = __sock_map_lookup_elem(info->map, info->index);
730+
731+
/* can't return sk directly, since that might be NULL */
732+
return info;
733+
}
734+
735+
static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos)
736+
{
737+
struct sock_map_seq_info *info = seq->private;
738+
739+
if (*pos == 0)
740+
++*pos;
741+
742+
/* pairs with sock_map_seq_stop */
743+
rcu_read_lock();
744+
return sock_map_seq_lookup_elem(info);
745+
}
746+
747+
static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
748+
{
749+
struct sock_map_seq_info *info = seq->private;
750+
751+
++*pos;
752+
++info->index;
753+
754+
return sock_map_seq_lookup_elem(info);
755+
}
756+
757+
static int sock_map_seq_show(struct seq_file *seq, void *v)
758+
{
759+
struct sock_map_seq_info *info = seq->private;
760+
struct bpf_iter__sockmap ctx = {};
761+
struct bpf_iter_meta meta;
762+
struct bpf_prog *prog;
763+
764+
meta.seq = seq;
765+
prog = bpf_iter_get_info(&meta, !v);
766+
if (!prog)
767+
return 0;
768+
769+
ctx.meta = &meta;
770+
ctx.map = info->map;
771+
if (v) {
772+
ctx.key = &info->index;
773+
ctx.sk = info->sk;
774+
}
775+
776+
return bpf_iter_run_prog(prog, &ctx);
777+
}
778+
779+
static void sock_map_seq_stop(struct seq_file *seq, void *v)
780+
{
781+
if (!v)
782+
(void)sock_map_seq_show(seq, NULL);
783+
784+
/* pairs with sock_map_seq_start */
785+
rcu_read_unlock();
786+
}
787+
788+
static const struct seq_operations sock_map_seq_ops = {
789+
.start = sock_map_seq_start,
790+
.next = sock_map_seq_next,
791+
.stop = sock_map_seq_stop,
792+
.show = sock_map_seq_show,
793+
};
794+
795+
static int sock_map_init_seq_private(void *priv_data,
796+
struct bpf_iter_aux_info *aux)
797+
{
798+
struct sock_map_seq_info *info = priv_data;
799+
800+
info->map = aux->map;
801+
return 0;
802+
}
803+
804+
static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
805+
.seq_ops = &sock_map_seq_ops,
806+
.init_seq_private = sock_map_init_seq_private,
807+
.seq_priv_size = sizeof(struct sock_map_seq_info),
808+
};
809+
706810
static int sock_map_btf_id;
707811
const struct bpf_map_ops sock_map_ops = {
708812
.map_meta_equal = bpf_map_meta_equal,
@@ -717,6 +821,7 @@ const struct bpf_map_ops sock_map_ops = {
717821
.map_check_btf = map_check_no_btf,
718822
.map_btf_name = "bpf_stab",
719823
.map_btf_id = &sock_map_btf_id,
824+
.iter_seq_info = &sock_map_iter_seq_info,
720825
};
721826

722827
struct bpf_shtab_elem {
@@ -953,7 +1058,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
9531058
if (!elem)
9541059
goto find_first_elem;
9551060

956-
elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
1061+
elem_next = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&elem->node)),
9571062
struct bpf_shtab_elem, node);
9581063
if (elem_next) {
9591064
memcpy(key_next, elem_next->key, key_size);
@@ -965,7 +1070,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
9651070
find_first_elem:
9661071
for (; i < htab->buckets_num; i++) {
9671072
head = &sock_hash_select_bucket(htab, i)->head;
968-
elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
1073+
elem_next = hlist_entry_safe(rcu_dereference(hlist_first_rcu(head)),
9691074
struct bpf_shtab_elem, node);
9701075
if (elem_next) {
9711076
memcpy(key_next, elem_next->key, key_size);
@@ -1199,6 +1304,117 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
11991304
.arg4_type = ARG_ANYTHING,
12001305
};
12011306

1307+
struct sock_hash_seq_info {
1308+
struct bpf_map *map;
1309+
struct bpf_shtab *htab;
1310+
u32 bucket_id;
1311+
};
1312+
1313+
static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info,
1314+
struct bpf_shtab_elem *prev_elem)
1315+
{
1316+
const struct bpf_shtab *htab = info->htab;
1317+
struct bpf_shtab_bucket *bucket;
1318+
struct bpf_shtab_elem *elem;
1319+
struct hlist_node *node;
1320+
1321+
/* try to find next elem in the same bucket */
1322+
if (prev_elem) {
1323+
node = rcu_dereference(hlist_next_rcu(&prev_elem->node));
1324+
elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
1325+
if (elem)
1326+
return elem;
1327+
1328+
/* no more elements, continue in the next bucket */
1329+
info->bucket_id++;
1330+
}
1331+
1332+
for (; info->bucket_id < htab->buckets_num; info->bucket_id++) {
1333+
bucket = &htab->buckets[info->bucket_id];
1334+
node = rcu_dereference(hlist_first_rcu(&bucket->head));
1335+
elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
1336+
if (elem)
1337+
return elem;
1338+
}
1339+
1340+
return NULL;
1341+
}
1342+
1343+
static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos)
1344+
{
1345+
struct sock_hash_seq_info *info = seq->private;
1346+
1347+
if (*pos == 0)
1348+
++*pos;
1349+
1350+
/* pairs with sock_hash_seq_stop */
1351+
rcu_read_lock();
1352+
return sock_hash_seq_find_next(info, NULL);
1353+
}
1354+
1355+
static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1356+
{
1357+
struct sock_hash_seq_info *info = seq->private;
1358+
1359+
++*pos;
1360+
return sock_hash_seq_find_next(info, v);
1361+
}
1362+
1363+
static int sock_hash_seq_show(struct seq_file *seq, void *v)
1364+
{
1365+
struct sock_hash_seq_info *info = seq->private;
1366+
struct bpf_iter__sockmap ctx = {};
1367+
struct bpf_shtab_elem *elem = v;
1368+
struct bpf_iter_meta meta;
1369+
struct bpf_prog *prog;
1370+
1371+
meta.seq = seq;
1372+
prog = bpf_iter_get_info(&meta, !elem);
1373+
if (!prog)
1374+
return 0;
1375+
1376+
ctx.meta = &meta;
1377+
ctx.map = info->map;
1378+
if (elem) {
1379+
ctx.key = elem->key;
1380+
ctx.sk = elem->sk;
1381+
}
1382+
1383+
return bpf_iter_run_prog(prog, &ctx);
1384+
}
1385+
1386+
static void sock_hash_seq_stop(struct seq_file *seq, void *v)
1387+
{
1388+
if (!v)
1389+
(void)sock_hash_seq_show(seq, NULL);
1390+
1391+
/* pairs with sock_hash_seq_start */
1392+
rcu_read_unlock();
1393+
}
1394+
1395+
static const struct seq_operations sock_hash_seq_ops = {
1396+
.start = sock_hash_seq_start,
1397+
.next = sock_hash_seq_next,
1398+
.stop = sock_hash_seq_stop,
1399+
.show = sock_hash_seq_show,
1400+
};
1401+
1402+
static int sock_hash_init_seq_private(void *priv_data,
1403+
struct bpf_iter_aux_info *aux)
1404+
{
1405+
struct sock_hash_seq_info *info = priv_data;
1406+
1407+
info->map = aux->map;
1408+
info->htab = container_of(aux->map, struct bpf_shtab, map);
1409+
return 0;
1410+
}
1411+
1412+
static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
1413+
.seq_ops = &sock_hash_seq_ops,
1414+
.init_seq_private = sock_hash_init_seq_private,
1415+
.seq_priv_size = sizeof(struct sock_hash_seq_info),
1416+
};
1417+
12021418
static int sock_hash_map_btf_id;
12031419
const struct bpf_map_ops sock_hash_ops = {
12041420
.map_meta_equal = bpf_map_meta_equal,
@@ -1213,6 +1429,7 @@ const struct bpf_map_ops sock_hash_ops = {
12131429
.map_check_btf = map_check_no_btf,
12141430
.map_btf_name = "bpf_shtab",
12151431
.map_btf_id = &sock_hash_map_btf_id,
1432+
.iter_seq_info = &sock_hash_iter_seq_info,
12161433
};
12171434

12181435
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
@@ -1323,3 +1540,62 @@ void sock_map_close(struct sock *sk, long timeout)
13231540
release_sock(sk);
13241541
saved_close(sk, timeout);
13251542
}
1543+
1544+
static int sock_map_iter_attach_target(struct bpf_prog *prog,
1545+
union bpf_iter_link_info *linfo,
1546+
struct bpf_iter_aux_info *aux)
1547+
{
1548+
struct bpf_map *map;
1549+
int err = -EINVAL;
1550+
1551+
if (!linfo->map.map_fd)
1552+
return -EBADF;
1553+
1554+
map = bpf_map_get_with_uref(linfo->map.map_fd);
1555+
if (IS_ERR(map))
1556+
return PTR_ERR(map);
1557+
1558+
if (map->map_type != BPF_MAP_TYPE_SOCKMAP &&
1559+
map->map_type != BPF_MAP_TYPE_SOCKHASH)
1560+
goto put_map;
1561+
1562+
if (prog->aux->max_rdonly_access > map->key_size) {
1563+
err = -EACCES;
1564+
goto put_map;
1565+
}
1566+
1567+
aux->map = map;
1568+
return 0;
1569+
1570+
put_map:
1571+
bpf_map_put_with_uref(map);
1572+
return err;
1573+
}
1574+
1575+
static void sock_map_iter_detach_target(struct bpf_iter_aux_info *aux)
1576+
{
1577+
bpf_map_put_with_uref(aux->map);
1578+
}
1579+
1580+
static struct bpf_iter_reg sock_map_iter_reg = {
1581+
.target = "sockmap",
1582+
.attach_target = sock_map_iter_attach_target,
1583+
.detach_target = sock_map_iter_detach_target,
1584+
.show_fdinfo = bpf_iter_map_show_fdinfo,
1585+
.fill_link_info = bpf_iter_map_fill_link_info,
1586+
.ctx_arg_info_size = 2,
1587+
.ctx_arg_info = {
1588+
{ offsetof(struct bpf_iter__sockmap, key),
1589+
PTR_TO_RDONLY_BUF_OR_NULL },
1590+
{ offsetof(struct bpf_iter__sockmap, sk),
1591+
PTR_TO_BTF_ID_OR_NULL },
1592+
},
1593+
};
1594+
1595+
static int __init bpf_sockmap_iter_init(void)
1596+
{
1597+
sock_map_iter_reg.ctx_arg_info[1].btf_id =
1598+
btf_sock_ids[BTF_SOCK_TYPE_SOCK];
1599+
return bpf_iter_reg_target(&sock_map_iter_reg);
1600+
}
1601+
late_initcall(bpf_sockmap_iter_init);

0 commit comments

Comments
 (0)