Skip to content

Commit 2bab48c

Browse files
author
Alexei Starovoitov
committed
Merge branch 'improve-bpf-tcp-cc-init'
Neal Cardwell says: ==================== This patch series reorganizes TCP congestion control initialization so that if EBPF code called by tcp_init_transfer() sets the congestion control algorithm by calling setsockopt(TCP_CONGESTION) then the TCP stack initializes the congestion control module immediately, instead of having tcp_init_transfer() later initialize the congestion control module. This increases flexibility for the EBPF code that runs at connection establishment time, and simplifies the code. This has the following benefits: (1) This allows CC module customizations made by the EBPF called in tcp_init_transfer() to persist, and not be wiped out by a later call to tcp_init_congestion_control() in tcp_init_transfer(). (2) Does not flip the order of EBPF and CC init, to avoid causing bugs for existing code upstream that depends on the current order. (3) Does not cause 2 initializations for for CC in the case where the EBPF called in tcp_init_transfer() wants to set the CC to a new CC algorithm. (4) Allows follow-on simplifications to the code in net/core/filter.c and net/ipv4/tcp_cong.c, which currently both have some complexity to special-case CC initialization to avoid double CC initialization if EBPF sets the CC. changes in v2: o rebase onto bpf-next o add another follow-on simplification suggested by Martin KaFai Lau: "tcp: simplify tcp_set_congestion_control() load=false case" changes in v3: o no change in commits o resent patch series from @gmail.com, since mail from [email protected] stopped being accepted at [email protected] mid-way through processing the v2 patch series (between patches 2 and 3), confusing patchwork about which patches belonged to the v2 patch series ==================== Acked-by: Martin KaFai Lau <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 18841da + 5050bef commit 2bab48c

File tree

6 files changed

+19
-38
lines changed

6 files changed

+19
-38
lines changed

include/net/inet_connection_sock.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ struct inet_connection_sock {
9696
void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
9797
struct hlist_node icsk_listen_portaddr_node;
9898
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
99-
__u8 icsk_ca_state:6,
99+
__u8 icsk_ca_state:5,
100+
icsk_ca_initialized:1,
100101
icsk_ca_setsockopt:1,
101102
icsk_ca_dst_locked:1;
102103
__u8 icsk_retransmits;

include/net/tcp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,7 @@ void tcp_get_available_congestion_control(char *buf, size_t len);
11041104
void tcp_get_allowed_congestion_control(char *buf, size_t len);
11051105
int tcp_set_allowed_congestion_control(char *allowed);
11061106
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
1107-
bool reinit, bool cap_net_admin);
1107+
bool cap_net_admin);
11081108
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
11091109
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
11101110

net/core/filter.c

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4313,10 +4313,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
43134313
.arg1_type = ARG_PTR_TO_CTX,
43144314
};
43154315

4316-
#define SOCKOPT_CC_REINIT (1 << 0)
4317-
43184316
static int _bpf_setsockopt(struct sock *sk, int level, int optname,
4319-
char *optval, int optlen, u32 flags)
4317+
char *optval, int optlen)
43204318
{
43214319
char devname[IFNAMSIZ];
43224320
int val, valbool;
@@ -4449,13 +4447,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
44494447
sk->sk_prot->setsockopt == tcp_setsockopt) {
44504448
if (optname == TCP_CONGESTION) {
44514449
char name[TCP_CA_NAME_MAX];
4452-
bool reinit = flags & SOCKOPT_CC_REINIT;
44534450

44544451
strncpy(name, optval, min_t(long, optlen,
44554452
TCP_CA_NAME_MAX-1));
44564453
name[TCP_CA_NAME_MAX-1] = 0;
4457-
ret = tcp_set_congestion_control(sk, name, false,
4458-
reinit, true);
4454+
ret = tcp_set_congestion_control(sk, name, false, true);
44594455
} else {
44604456
struct inet_connection_sock *icsk = inet_csk(sk);
44614457
struct tcp_sock *tp = tcp_sk(sk);
@@ -4615,9 +4611,7 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
46154611
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
46164612
int, level, int, optname, char *, optval, int, optlen)
46174613
{
4618-
u32 flags = 0;
4619-
return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen,
4620-
flags);
4614+
return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen);
46214615
}
46224616

46234617
static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
@@ -4651,11 +4645,7 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
46514645
BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
46524646
int, level, int, optname, char *, optval, int, optlen)
46534647
{
4654-
u32 flags = 0;
4655-
if (bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
4656-
flags |= SOCKOPT_CC_REINIT;
4657-
return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen,
4658-
flags);
4648+
return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
46594649
}
46604650

46614651
static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {

net/ipv4/tcp.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2698,6 +2698,7 @@ int tcp_disconnect(struct sock *sk, int flags)
26982698
if (icsk->icsk_ca_ops->release)
26992699
icsk->icsk_ca_ops->release(sk);
27002700
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
2701+
icsk->icsk_ca_initialized = 0;
27012702
tcp_set_ca_state(sk, TCP_CA_Open);
27022703
tp->is_sack_reneg = 0;
27032704
tcp_clear_retrans(tp);
@@ -3049,7 +3050,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
30493050
name[val] = 0;
30503051

30513052
lock_sock(sk);
3052-
err = tcp_set_congestion_control(sk, name, true, true,
3053+
err = tcp_set_congestion_control(sk, name, true,
30533054
ns_capable(sock_net(sk)->user_ns,
30543055
CAP_NET_ADMIN));
30553056
release_sock(sk);

net/ipv4/tcp_cong.c

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ void tcp_assign_congestion_control(struct sock *sk)
176176

177177
void tcp_init_congestion_control(struct sock *sk)
178178
{
179-
const struct inet_connection_sock *icsk = inet_csk(sk);
179+
struct inet_connection_sock *icsk = inet_csk(sk);
180180

181181
tcp_sk(sk)->prior_ssthresh = 0;
182182
if (icsk->icsk_ca_ops->init)
@@ -185,6 +185,7 @@ void tcp_init_congestion_control(struct sock *sk)
185185
INET_ECN_xmit(sk);
186186
else
187187
INET_ECN_dontxmit(sk);
188+
icsk->icsk_ca_initialized = 1;
188189
}
189190

190191
static void tcp_reinit_congestion_control(struct sock *sk,
@@ -340,7 +341,7 @@ int tcp_set_allowed_congestion_control(char *val)
340341
* already initialized.
341342
*/
342343
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
343-
bool reinit, bool cap_net_admin)
344+
bool cap_net_admin)
344345
{
345346
struct inet_connection_sock *icsk = inet_csk(sk);
346347
const struct tcp_congestion_ops *ca;
@@ -361,28 +362,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
361362
goto out;
362363
}
363364

364-
if (!ca) {
365+
if (!ca)
365366
err = -ENOENT;
366-
} else if (!load) {
367-
const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
368-
369-
if (bpf_try_module_get(ca, ca->owner)) {
370-
if (reinit) {
371-
tcp_reinit_congestion_control(sk, ca);
372-
} else {
373-
icsk->icsk_ca_ops = ca;
374-
bpf_module_put(old_ca, old_ca->owner);
375-
}
376-
} else {
377-
err = -EBUSY;
378-
}
379-
} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) {
367+
else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin))
380368
err = -EPERM;
381-
} else if (!bpf_try_module_get(ca, ca->owner)) {
369+
else if (!bpf_try_module_get(ca, ca->owner))
382370
err = -EBUSY;
383-
} else {
371+
else
384372
tcp_reinit_congestion_control(sk, ca);
385-
}
386373
out:
387374
rcu_read_unlock();
388375
return err;

net/ipv4/tcp_input.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5894,8 +5894,10 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
58945894
tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
58955895
tp->snd_cwnd_stamp = tcp_jiffies32;
58965896

5897+
icsk->icsk_ca_initialized = 0;
58975898
bpf_skops_established(sk, bpf_op, skb);
5898-
tcp_init_congestion_control(sk);
5899+
if (!icsk->icsk_ca_initialized)
5900+
tcp_init_congestion_control(sk);
58995901
tcp_init_buffer_space(sk);
59005902
}
59015903

0 commit comments

Comments
 (0)