Skip to content

Commit 376dcfe

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf, sockmap: allow verdict only sk_skb progs'
John Fastabend says: ==================== This allows a sockmap sk_skb verdict programs to run without a parser. For some use cases, such as verdict program that support streaming data or a l3/l4 proxy that does not use data in packet, loading the nop parser 'return skb->len' is an extra unnecessary complexity. With this series we simply call the verdict program directly from data_ready instead of bouncing through the strparser logic. Patches 1,2 do the lifting on the sockmap side then patches 3,4 add the selftests. This applies on top of the series here, sockmap/sk_skb program memory acct fixes https://patchwork.ozlabs.org/project/netdev/list/?series=206975 it will apply without the above series cleanly, but will have an incorrect memory accounting causing a failure in ./test_sockmap. I could have left it so the series passed without above series, but it seemed odd to have it out there and then require yet another patch to fix it up here. Thanks. --- ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 20a6d91 + a24fb42 commit 376dcfe

File tree

4 files changed

+142
-29
lines changed

4 files changed

+142
-29
lines changed

include/linux/skmsg.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node);
308308
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
309309
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
310310
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
311+
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
312+
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
311313

312314
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
313315
struct sk_msg *msg);

net/core/skmsg.c

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
627627
rcu_assign_sk_user_data(sk, NULL);
628628
if (psock->progs.skb_parser)
629629
sk_psock_stop_strp(sk, psock);
630+
else if (psock->progs.skb_verdict)
631+
sk_psock_stop_verdict(sk, psock);
630632
write_unlock_bh(&sk->sk_callback_lock);
631633
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
632634

@@ -871,6 +873,57 @@ static void sk_psock_strp_data_ready(struct sock *sk)
871873
rcu_read_unlock();
872874
}
873875

876+
static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
877+
unsigned int offset, size_t orig_len)
878+
{
879+
struct sock *sk = (struct sock *)desc->arg.data;
880+
struct sk_psock *psock;
881+
struct bpf_prog *prog;
882+
int ret = __SK_DROP;
883+
int len = skb->len;
884+
885+
/* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
886+
skb = skb_clone(skb, GFP_ATOMIC);
887+
if (!skb) {
888+
desc->error = -ENOMEM;
889+
return 0;
890+
}
891+
892+
rcu_read_lock();
893+
psock = sk_psock(sk);
894+
if (unlikely(!psock)) {
895+
len = 0;
896+
kfree_skb(skb);
897+
goto out;
898+
}
899+
skb_set_owner_r(skb, sk);
900+
prog = READ_ONCE(psock->progs.skb_verdict);
901+
if (likely(prog)) {
902+
tcp_skb_bpf_redirect_clear(skb);
903+
ret = sk_psock_bpf_run(psock, prog, skb);
904+
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
905+
}
906+
sk_psock_verdict_apply(psock, skb, ret);
907+
out:
908+
rcu_read_unlock();
909+
return len;
910+
}
911+
912+
static void sk_psock_verdict_data_ready(struct sock *sk)
913+
{
914+
struct socket *sock = sk->sk_socket;
915+
read_descriptor_t desc;
916+
917+
if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
918+
return;
919+
920+
desc.arg.data = sk;
921+
desc.error = 0;
922+
desc.count = 1;
923+
924+
sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
925+
}
926+
874927
static void sk_psock_write_space(struct sock *sk)
875928
{
876929
struct sk_psock *psock;
@@ -900,6 +953,19 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
900953
return strp_init(&psock->parser.strp, sk, &cb);
901954
}
902955

956+
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
957+
{
958+
struct sk_psock_parser *parser = &psock->parser;
959+
960+
if (parser->enabled)
961+
return;
962+
963+
parser->saved_data_ready = sk->sk_data_ready;
964+
sk->sk_data_ready = sk_psock_verdict_data_ready;
965+
sk->sk_write_space = sk_psock_write_space;
966+
parser->enabled = true;
967+
}
968+
903969
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
904970
{
905971
struct sk_psock_parser *parser = &psock->parser;
@@ -925,3 +991,15 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
925991
strp_stop(&parser->strp);
926992
parser->enabled = false;
927993
}
994+
995+
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
996+
{
997+
struct sk_psock_parser *parser = &psock->parser;
998+
999+
if (!parser->enabled)
1000+
return;
1001+
1002+
sk->sk_data_ready = parser->saved_data_ready;
1003+
parser->saved_data_ready = NULL;
1004+
parser->enabled = false;
1005+
}

net/core/sock_map.c

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ static void sock_map_add_link(struct sk_psock *psock,
148148
static void sock_map_del_link(struct sock *sk,
149149
struct sk_psock *psock, void *link_raw)
150150
{
151+
bool strp_stop = false, verdict_stop = false;
151152
struct sk_psock_link *link, *tmp;
152-
bool strp_stop = false;
153153

154154
spin_lock_bh(&psock->link_lock);
155155
list_for_each_entry_safe(link, tmp, &psock->link, list) {
@@ -159,14 +159,19 @@ static void sock_map_del_link(struct sock *sk,
159159
map);
160160
if (psock->parser.enabled && stab->progs.skb_parser)
161161
strp_stop = true;
162+
if (psock->parser.enabled && stab->progs.skb_verdict)
163+
verdict_stop = true;
162164
list_del(&link->list);
163165
sk_psock_free_link(link);
164166
}
165167
}
166168
spin_unlock_bh(&psock->link_lock);
167-
if (strp_stop) {
169+
if (strp_stop || verdict_stop) {
168170
write_lock_bh(&sk->sk_callback_lock);
169-
sk_psock_stop_strp(sk, psock);
171+
if (strp_stop)
172+
sk_psock_stop_strp(sk, psock);
173+
else
174+
sk_psock_stop_verdict(sk, psock);
170175
write_unlock_bh(&sk->sk_callback_lock);
171176
}
172177
}
@@ -230,16 +235,16 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
230235
{
231236
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
232237
struct sk_psock *psock;
233-
bool skb_progs;
234238
int ret;
235239

236240
skb_verdict = READ_ONCE(progs->skb_verdict);
237241
skb_parser = READ_ONCE(progs->skb_parser);
238-
skb_progs = skb_parser && skb_verdict;
239-
if (skb_progs) {
242+
if (skb_verdict) {
240243
skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
241244
if (IS_ERR(skb_verdict))
242245
return PTR_ERR(skb_verdict);
246+
}
247+
if (skb_parser) {
243248
skb_parser = bpf_prog_inc_not_zero(skb_parser);
244249
if (IS_ERR(skb_parser)) {
245250
bpf_prog_put(skb_verdict);
@@ -264,7 +269,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
264269

265270
if (psock) {
266271
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
267-
(skb_progs && READ_ONCE(psock->progs.skb_parser))) {
272+
(skb_parser && READ_ONCE(psock->progs.skb_parser)) ||
273+
(skb_verdict && READ_ONCE(psock->progs.skb_verdict))) {
268274
sk_psock_put(sk, psock);
269275
ret = -EBUSY;
270276
goto out_progs;
@@ -285,28 +291,31 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
285291
goto out_drop;
286292

287293
write_lock_bh(&sk->sk_callback_lock);
288-
if (skb_progs && !psock->parser.enabled) {
294+
if (skb_parser && skb_verdict && !psock->parser.enabled) {
289295
ret = sk_psock_init_strp(sk, psock);
290-
if (ret) {
291-
write_unlock_bh(&sk->sk_callback_lock);
292-
goto out_drop;
293-
}
296+
if (ret)
297+
goto out_unlock_drop;
294298
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
295299
psock_set_prog(&psock->progs.skb_parser, skb_parser);
296300
sk_psock_start_strp(sk, psock);
301+
} else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
302+
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
303+
sk_psock_start_verdict(sk,psock);
297304
}
298305
write_unlock_bh(&sk->sk_callback_lock);
299306
return 0;
307+
out_unlock_drop:
308+
write_unlock_bh(&sk->sk_callback_lock);
300309
out_drop:
301310
sk_psock_put(sk, psock);
302311
out_progs:
303312
if (msg_parser)
304313
bpf_prog_put(msg_parser);
305314
out:
306-
if (skb_progs) {
315+
if (skb_verdict)
307316
bpf_prog_put(skb_verdict);
317+
if (skb_parser)
308318
bpf_prog_put(skb_parser);
309-
}
310319
return ret;
311320
}
312321

tools/testing/selftests/bpf/test_sockmap.c

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ int txmsg_ktls_skb_redir;
8686
int ktls;
8787
int peek_flag;
8888
int skb_use_parser;
89+
int txmsg_omit_skb_parser;
8990

9091
static const struct option long_options[] = {
9192
{"help", no_argument, NULL, 'h' },
@@ -111,6 +112,7 @@ static const struct option long_options[] = {
111112
{"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
112113
{"ktls", no_argument, &ktls, 1 },
113114
{"peek", no_argument, &peek_flag, 1 },
115+
{"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1},
114116
{"whitelist", required_argument, NULL, 'n' },
115117
{"blacklist", required_argument, NULL, 'b' },
116118
{0, 0, NULL, 0 }
@@ -175,6 +177,7 @@ static void test_reset(void)
175177
txmsg_apply = txmsg_cork = 0;
176178
txmsg_ingress = txmsg_redir_skb = 0;
177179
txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
180+
txmsg_omit_skb_parser = 0;
178181
skb_use_parser = 0;
179182
}
180183

@@ -912,13 +915,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
912915
goto run;
913916

914917
/* Attach programs to sockmap */
915-
err = bpf_prog_attach(prog_fd[0], map_fd[0],
916-
BPF_SK_SKB_STREAM_PARSER, 0);
917-
if (err) {
918-
fprintf(stderr,
919-
"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
920-
prog_fd[0], map_fd[0], err, strerror(errno));
921-
return err;
918+
if (!txmsg_omit_skb_parser) {
919+
err = bpf_prog_attach(prog_fd[0], map_fd[0],
920+
BPF_SK_SKB_STREAM_PARSER, 0);
921+
if (err) {
922+
fprintf(stderr,
923+
"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
924+
prog_fd[0], map_fd[0], err, strerror(errno));
925+
return err;
926+
}
922927
}
923928

924929
err = bpf_prog_attach(prog_fd[1], map_fd[0],
@@ -931,13 +936,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
931936

932937
/* Attach programs to TLS sockmap */
933938
if (txmsg_ktls_skb) {
934-
err = bpf_prog_attach(prog_fd[0], map_fd[8],
935-
BPF_SK_SKB_STREAM_PARSER, 0);
936-
if (err) {
937-
fprintf(stderr,
938-
"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
939-
prog_fd[0], map_fd[8], err, strerror(errno));
940-
return err;
939+
if (!txmsg_omit_skb_parser) {
940+
err = bpf_prog_attach(prog_fd[0], map_fd[8],
941+
BPF_SK_SKB_STREAM_PARSER, 0);
942+
if (err) {
943+
fprintf(stderr,
944+
"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
945+
prog_fd[0], map_fd[8], err, strerror(errno));
946+
return err;
947+
}
941948
}
942949

943950
err = bpf_prog_attach(prog_fd[2], map_fd[8],
@@ -1465,12 +1472,29 @@ static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
14651472
txmsg_ktls_skb_drop = 0;
14661473
txmsg_ktls_skb_redir = 1;
14671474
test_exec(cgrp, opt);
1475+
txmsg_ktls_skb_redir = 0;
1476+
1477+
/* Tests that omit skb_parser */
1478+
txmsg_omit_skb_parser = 1;
1479+
ktls = 0;
1480+
txmsg_ktls_skb = 0;
1481+
test_exec(cgrp, opt);
1482+
1483+
txmsg_ktls_skb_drop = 1;
1484+
test_exec(cgrp, opt);
1485+
txmsg_ktls_skb_drop = 0;
1486+
1487+
txmsg_ktls_skb_redir = 1;
1488+
test_exec(cgrp, opt);
1489+
1490+
ktls = 1;
1491+
test_exec(cgrp, opt);
1492+
txmsg_omit_skb_parser = 0;
14681493

14691494
opt->data_test = data;
14701495
ktls = k;
14711496
}
14721497

1473-
14741498
/* Test cork with hung data. This tests poor usage patterns where
14751499
* cork can leave data on the ring if user program is buggy and
14761500
* doesn't flush them somehow. They do take some time however

0 commit comments

Comments
 (0)