Skip to content

Commit c824034

Browse files
Amery HungMartin KaFai Lau
authored andcommitted
bpf: net_sched: Support implementation of Qdisc_ops in bpf
The recent advancement in bpf such as allocated objects, bpf list and bpf rbtree has provided powerful and flexible building blocks to realize sophisticated packet scheduling algorithms. As struct_ops now supports core operators in Qdisc_ops, start allowing qdisc to be implemented using bpf struct_ops with this patch. Users can implement Qdisc_ops.{enqueue, dequeue, init, reset, destroy} in bpf and register the qdisc dynamically into the kernel. Co-developed-by: Cong Wang <[email protected]> Signed-off-by: Cong Wang <[email protected]> Signed-off-by: Amery Hung <[email protected]> Signed-off-by: Martin KaFai Lau <[email protected]> Acked-by: Cong Wang <[email protected]> Acked-by: Toke Høiland-Jørgensen <[email protected]> Link: https://patch.msgid.link/[email protected]
1 parent a1b669e commit c824034

File tree

5 files changed

+235
-4
lines changed

5 files changed

+235
-4
lines changed

net/sched/Kconfig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,18 @@ config NET_SCH_ETS
403403

404404
If unsure, say N.
405405

406+
config NET_SCH_BPF
407+
bool "BPF-based Qdisc"
408+
depends on BPF_SYSCALL && BPF_JIT && DEBUG_INFO_BTF
409+
help
410+
This option allows BPF-based queueing disiplines. With BPF struct_ops,
411+
users can implement supported operators in Qdisc_ops using BPF programs.
412+
The queue holding skb can be built with BPF maps or graphs.
413+
414+
Say Y here if you want to use BPF-based Qdisc.
415+
416+
If unsure, say N.
417+
406418
menuconfig NET_SCH_DEFAULT
407419
bool "Allow override default queue discipline"
408420
help

net/sched/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ obj-$(CONFIG_NET_SCH_FQ_PIE) += sch_fq_pie.o
6262
obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
6363
obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o
6464
obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o
65+
obj-$(CONFIG_NET_SCH_BPF) += bpf_qdisc.o
6566

6667
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
6768
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o

net/sched/bpf_qdisc.c

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <linux/types.h>
4+
#include <linux/bpf_verifier.h>
5+
#include <linux/bpf.h>
6+
#include <linux/btf.h>
7+
#include <linux/filter.h>
8+
#include <net/pkt_sched.h>
9+
#include <net/pkt_cls.h>
10+
11+
static struct bpf_struct_ops bpf_Qdisc_ops;
12+
13+
struct bpf_sk_buff_ptr {
14+
struct sk_buff *skb;
15+
};
16+
17+
static int bpf_qdisc_init(struct btf *btf)
18+
{
19+
return 0;
20+
}
21+
22+
BTF_ID_LIST_SINGLE(bpf_qdisc_ids, struct, Qdisc)
23+
BTF_ID_LIST_SINGLE(bpf_sk_buff_ids, struct, sk_buff)
24+
BTF_ID_LIST_SINGLE(bpf_sk_buff_ptr_ids, struct, bpf_sk_buff_ptr)
25+
26+
static bool bpf_qdisc_is_valid_access(int off, int size,
27+
enum bpf_access_type type,
28+
const struct bpf_prog *prog,
29+
struct bpf_insn_access_aux *info)
30+
{
31+
struct btf *btf = prog->aux->attach_btf;
32+
u32 arg;
33+
34+
arg = btf_ctx_arg_idx(btf, prog->aux->attach_func_proto, off);
35+
if (prog->aux->attach_st_ops_member_off == offsetof(struct Qdisc_ops, enqueue)) {
36+
if (arg == 2 && type == BPF_READ) {
37+
info->reg_type = PTR_TO_BTF_ID | PTR_TRUSTED;
38+
info->btf = btf;
39+
info->btf_id = bpf_sk_buff_ptr_ids[0];
40+
return true;
41+
}
42+
}
43+
44+
return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
45+
}
46+
47+
static int bpf_qdisc_qdisc_access(struct bpf_verifier_log *log,
48+
const struct bpf_reg_state *reg,
49+
int off, size_t *end)
50+
{
51+
switch (off) {
52+
case offsetof(struct Qdisc, limit):
53+
*end = offsetofend(struct Qdisc, limit);
54+
break;
55+
case offsetof(struct Qdisc, q) + offsetof(struct qdisc_skb_head, qlen):
56+
*end = offsetof(struct Qdisc, q) + offsetofend(struct qdisc_skb_head, qlen);
57+
break;
58+
case offsetof(struct Qdisc, qstats) ... offsetofend(struct Qdisc, qstats) - 1:
59+
*end = offsetofend(struct Qdisc, qstats);
60+
break;
61+
default:
62+
return -EACCES;
63+
}
64+
65+
return 0;
66+
}
67+
68+
static int bpf_qdisc_sk_buff_access(struct bpf_verifier_log *log,
69+
const struct bpf_reg_state *reg,
70+
int off, size_t *end)
71+
{
72+
switch (off) {
73+
case offsetof(struct sk_buff, tstamp):
74+
*end = offsetofend(struct sk_buff, tstamp);
75+
break;
76+
case offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb, data[0]) ...
77+
offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb,
78+
data[QDISC_CB_PRIV_LEN - 1]):
79+
*end = offsetof(struct sk_buff, cb) +
80+
offsetofend(struct qdisc_skb_cb, data[QDISC_CB_PRIV_LEN - 1]);
81+
break;
82+
default:
83+
return -EACCES;
84+
}
85+
86+
return 0;
87+
}
88+
89+
static int bpf_qdisc_btf_struct_access(struct bpf_verifier_log *log,
90+
const struct bpf_reg_state *reg,
91+
int off, int size)
92+
{
93+
const struct btf_type *t, *skbt, *qdisct;
94+
size_t end;
95+
int err;
96+
97+
skbt = btf_type_by_id(reg->btf, bpf_sk_buff_ids[0]);
98+
qdisct = btf_type_by_id(reg->btf, bpf_qdisc_ids[0]);
99+
t = btf_type_by_id(reg->btf, reg->btf_id);
100+
101+
if (t == skbt) {
102+
err = bpf_qdisc_sk_buff_access(log, reg, off, &end);
103+
} else if (t == qdisct) {
104+
err = bpf_qdisc_qdisc_access(log, reg, off, &end);
105+
} else {
106+
bpf_log(log, "only read is supported\n");
107+
return -EACCES;
108+
}
109+
110+
if (err) {
111+
bpf_log(log, "no write support to %s at off %d\n",
112+
btf_name_by_offset(reg->btf, t->name_off), off);
113+
return -EACCES;
114+
}
115+
116+
if (off + size > end) {
117+
bpf_log(log,
118+
"write access at off %d with size %d beyond the member of %s ended at %zu\n",
119+
off, size, btf_name_by_offset(reg->btf, t->name_off), end);
120+
return -EACCES;
121+
}
122+
123+
return 0;
124+
}
125+
126+
static const struct bpf_verifier_ops bpf_qdisc_verifier_ops = {
127+
.get_func_proto = bpf_base_func_proto,
128+
.is_valid_access = bpf_qdisc_is_valid_access,
129+
.btf_struct_access = bpf_qdisc_btf_struct_access,
130+
};
131+
132+
static int bpf_qdisc_init_member(const struct btf_type *t,
133+
const struct btf_member *member,
134+
void *kdata, const void *udata)
135+
{
136+
const struct Qdisc_ops *uqdisc_ops;
137+
struct Qdisc_ops *qdisc_ops;
138+
u32 moff;
139+
140+
uqdisc_ops = (const struct Qdisc_ops *)udata;
141+
qdisc_ops = (struct Qdisc_ops *)kdata;
142+
143+
moff = __btf_member_bit_offset(t, member) / 8;
144+
switch (moff) {
145+
case offsetof(struct Qdisc_ops, peek):
146+
qdisc_ops->peek = qdisc_peek_dequeued;
147+
return 0;
148+
case offsetof(struct Qdisc_ops, id):
149+
if (bpf_obj_name_cpy(qdisc_ops->id, uqdisc_ops->id,
150+
sizeof(qdisc_ops->id)) <= 0)
151+
return -EINVAL;
152+
return 1;
153+
}
154+
155+
return 0;
156+
}
157+
158+
static int bpf_qdisc_reg(void *kdata, struct bpf_link *link)
159+
{
160+
return register_qdisc(kdata);
161+
}
162+
163+
static void bpf_qdisc_unreg(void *kdata, struct bpf_link *link)
164+
{
165+
return unregister_qdisc(kdata);
166+
}
167+
168+
static int Qdisc_ops__enqueue(struct sk_buff *skb__ref, struct Qdisc *sch,
169+
struct sk_buff **to_free)
170+
{
171+
return 0;
172+
}
173+
174+
static struct sk_buff *Qdisc_ops__dequeue(struct Qdisc *sch)
175+
{
176+
return NULL;
177+
}
178+
179+
static int Qdisc_ops__init(struct Qdisc *sch, struct nlattr *arg,
180+
struct netlink_ext_ack *extack)
181+
{
182+
return 0;
183+
}
184+
185+
static void Qdisc_ops__reset(struct Qdisc *sch)
186+
{
187+
}
188+
189+
static void Qdisc_ops__destroy(struct Qdisc *sch)
190+
{
191+
}
192+
193+
static struct Qdisc_ops __bpf_ops_qdisc_ops = {
194+
.enqueue = Qdisc_ops__enqueue,
195+
.dequeue = Qdisc_ops__dequeue,
196+
.init = Qdisc_ops__init,
197+
.reset = Qdisc_ops__reset,
198+
.destroy = Qdisc_ops__destroy,
199+
};
200+
201+
static struct bpf_struct_ops bpf_Qdisc_ops = {
202+
.verifier_ops = &bpf_qdisc_verifier_ops,
203+
.reg = bpf_qdisc_reg,
204+
.unreg = bpf_qdisc_unreg,
205+
.init_member = bpf_qdisc_init_member,
206+
.init = bpf_qdisc_init,
207+
.name = "Qdisc_ops",
208+
.cfi_stubs = &__bpf_ops_qdisc_ops,
209+
.owner = THIS_MODULE,
210+
};
211+
212+
static int __init bpf_qdisc_kfunc_init(void)
213+
{
214+
return register_bpf_struct_ops(&bpf_Qdisc_ops, Qdisc_ops);
215+
}
216+
late_initcall(bpf_qdisc_kfunc_init);

net/sched/sch_api.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <linux/hrtimer.h>
2626
#include <linux/slab.h>
2727
#include <linux/hashtable.h>
28+
#include <linux/bpf.h>
2829

2930
#include <net/netdev_lock.h>
3031
#include <net/net_namespace.h>
@@ -359,7 +360,7 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
359360
read_lock(&qdisc_mod_lock);
360361
for (q = qdisc_base; q; q = q->next) {
361362
if (nla_strcmp(kind, q->id) == 0) {
362-
if (!try_module_get(q->owner))
363+
if (!bpf_try_module_get(q, q->owner))
363364
q = NULL;
364365
break;
365366
}
@@ -1370,7 +1371,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
13701371
netdev_put(dev, &sch->dev_tracker);
13711372
qdisc_free(sch);
13721373
err_out2:
1373-
module_put(ops->owner);
1374+
bpf_module_put(ops, ops->owner);
13741375
err_out:
13751376
*errp = err;
13761377
return NULL;
@@ -1782,7 +1783,7 @@ static void request_qdisc_module(struct nlattr *kind)
17821783

17831784
ops = qdisc_lookup_ops(kind);
17841785
if (ops) {
1785-
module_put(ops->owner);
1786+
bpf_module_put(ops, ops->owner);
17861787
return;
17871788
}
17881789

net/sched/sch_generic.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/if_vlan.h>
2525
#include <linux/skb_array.h>
2626
#include <linux/if_macvlan.h>
27+
#include <linux/bpf.h>
2728
#include <net/sch_generic.h>
2829
#include <net/pkt_sched.h>
2930
#include <net/dst.h>
@@ -1078,7 +1079,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
10781079
ops->destroy(qdisc);
10791080

10801081
lockdep_unregister_key(&qdisc->root_lock_key);
1081-
module_put(ops->owner);
1082+
bpf_module_put(ops, ops->owner);
10821083
netdev_put(dev, &qdisc->dev_tracker);
10831084

10841085
trace_qdisc_destroy(qdisc);

0 commit comments

Comments
 (0)