Skip to content

Commit 5c70eb5

Browse files
edumazetkuba-moo
authored andcommitted
net: better track kernel sockets lifetime
While kernel sockets are dismantled during pernet_operations->exit(), their freeing can be delayed by any tx packets still held in qdisc or device queues, due to skb_set_owner_w() prior calls. This then trigger the following warning from ref_tracker_dir_exit() [1] To fix this, make sure that kernel sockets own a reference on net->passive. Add sk_net_refcnt_upgrade() helper, used whenever a kernel socket is converted to a refcounted one. [1] [ 136.263918][ T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at [ 136.263918][ T35] sk_alloc+0x2b3/0x370 [ 136.263918][ T35] inet6_create+0x6ce/0x10f0 [ 136.263918][ T35] __sock_create+0x4c0/0xa30 [ 136.263918][ T35] inet_ctl_sock_create+0xc2/0x250 [ 136.263918][ T35] igmp6_net_init+0x39/0x390 [ 136.263918][ T35] ops_init+0x31e/0x590 [ 136.263918][ T35] setup_net+0x287/0x9e0 [ 136.263918][ T35] copy_net_ns+0x33f/0x570 [ 136.263918][ T35] create_new_namespaces+0x425/0x7b0 [ 136.263918][ T35] unshare_nsproxy_namespaces+0x124/0x180 [ 136.263918][ T35] ksys_unshare+0x57d/0xa70 [ 136.263918][ T35] __x64_sys_unshare+0x38/0x40 [ 136.263918][ T35] do_syscall_64+0xf3/0x230 [ 136.263918][ T35] entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 136.263918][ T35] [ 136.343488][ T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at [ 136.343488][ T35] sk_alloc+0x2b3/0x370 [ 136.343488][ T35] inet6_create+0x6ce/0x10f0 [ 136.343488][ T35] __sock_create+0x4c0/0xa30 [ 136.343488][ T35] inet_ctl_sock_create+0xc2/0x250 [ 136.343488][ T35] ndisc_net_init+0xa7/0x2b0 [ 136.343488][ T35] ops_init+0x31e/0x590 [ 136.343488][ T35] setup_net+0x287/0x9e0 [ 136.343488][ T35] copy_net_ns+0x33f/0x570 [ 136.343488][ T35] create_new_namespaces+0x425/0x7b0 [ 136.343488][ T35] unshare_nsproxy_namespaces+0x124/0x180 [ 136.343488][ T35] ksys_unshare+0x57d/0xa70 [ 136.343488][ T35] __x64_sys_unshare+0x38/0x40 [ 136.343488][ T35] do_syscall_64+0xf3/0x230 [ 136.343488][ T35] entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 0cafd77 ("net: add a refcount tracker for kernel sockets") Reported-by: [email protected] Closes: https://lore.kernel.org/netdev/[email protected]/T/#u Signed-off-by: Eric Dumazet <[email protected]> Reviewed-by: Kuniyuki Iwashima <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent fde9836 commit 5c70eb5

File tree

8 files changed

+30
-39
lines changed

8 files changed

+30
-39
lines changed

include/net/sock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1751,6 +1751,7 @@ static inline bool sock_allow_reclassification(const struct sock *csk)
17511751
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
17521752
struct proto *prot, int kern);
17531753
void sk_free(struct sock *sk);
1754+
void sk_net_refcnt_upgrade(struct sock *sk);
17541755
void sk_destruct(struct sock *sk);
17551756
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
17561757
void sk_free_unlock_clone(struct sock *sk);

net/core/sock.c

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2246,6 +2246,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
22462246
get_net_track(net, &sk->ns_tracker, priority);
22472247
sock_inuse_add(net, 1);
22482248
} else {
2249+
net_passive_inc(net);
22492250
__netns_tracker_alloc(net, &sk->ns_tracker,
22502251
false, priority);
22512252
}
@@ -2270,6 +2271,7 @@ EXPORT_SYMBOL(sk_alloc);
22702271
static void __sk_destruct(struct rcu_head *head)
22712272
{
22722273
struct sock *sk = container_of(head, struct sock, sk_rcu);
2274+
struct net *net = sock_net(sk);
22732275
struct sk_filter *filter;
22742276

22752277
if (sk->sk_destruct)
@@ -2301,14 +2303,28 @@ static void __sk_destruct(struct rcu_head *head)
23012303
put_cred(sk->sk_peer_cred);
23022304
put_pid(sk->sk_peer_pid);
23032305

2304-
if (likely(sk->sk_net_refcnt))
2305-
put_net_track(sock_net(sk), &sk->ns_tracker);
2306-
else
2307-
__netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
2308-
2306+
if (likely(sk->sk_net_refcnt)) {
2307+
put_net_track(net, &sk->ns_tracker);
2308+
} else {
2309+
__netns_tracker_free(net, &sk->ns_tracker, false);
2310+
net_passive_dec(net);
2311+
}
23092312
sk_prot_free(sk->sk_prot_creator, sk);
23102313
}
23112314

2315+
void sk_net_refcnt_upgrade(struct sock *sk)
2316+
{
2317+
struct net *net = sock_net(sk);
2318+
2319+
WARN_ON_ONCE(sk->sk_net_refcnt);
2320+
__netns_tracker_free(net, &sk->ns_tracker, false);
2321+
net_passive_dec(net);
2322+
sk->sk_net_refcnt = 1;
2323+
get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
2324+
sock_inuse_add(net, 1);
2325+
}
2326+
EXPORT_SYMBOL_GPL(sk_net_refcnt_upgrade);
2327+
23122328
void sk_destruct(struct sock *sk)
23132329
{
23142330
bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
@@ -2405,6 +2421,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
24052421
* is not properly dismantling its kernel sockets at netns
24062422
* destroy time.
24072423
*/
2424+
net_passive_inc(sock_net(newsk));
24082425
__netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
24092426
false, priority);
24102427
}

net/mptcp/subflow.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1772,10 +1772,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
17721772
* needs it.
17731773
* Update ns_tracker to current stack trace and refcounted tracker.
17741774
*/
1775-
__netns_tracker_free(net, &sf->sk->ns_tracker, false);
1776-
sf->sk->sk_net_refcnt = 1;
1777-
get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
1778-
sock_inuse_add(net, 1);
1775+
sk_net_refcnt_upgrade(sf->sk);
17791776
err = tcp_set_ulp(sf->sk, "mptcp");
17801777
if (err)
17811778
goto err_free;

net/netlink/af_netlink.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -795,16 +795,6 @@ static int netlink_release(struct socket *sock)
795795

796796
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
797797

798-
/* Because struct net might disappear soon, do not keep a pointer. */
799-
if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) {
800-
__netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
801-
/* Because of deferred_put_nlk_sk and use of work queue,
802-
* it is possible netns will be freed before this socket.
803-
*/
804-
sock_net_set(sk, &init_net);
805-
__netns_tracker_alloc(&init_net, &sk->ns_tracker,
806-
false, GFP_KERNEL);
807-
}
808798
call_rcu(&nlk->rcu, deferred_put_nlk_sk);
809799
return 0;
810800
}

net/rds/tcp.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -504,12 +504,8 @@ bool rds_tcp_tune(struct socket *sock)
504504
release_sock(sk);
505505
return false;
506506
}
507-
/* Update ns_tracker to current stack trace and refcounted tracker */
508-
__netns_tracker_free(net, &sk->ns_tracker, false);
509-
510-
sk->sk_net_refcnt = 1;
511-
netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
512-
sock_inuse_add(net, 1);
507+
sk_net_refcnt_upgrade(sk);
508+
put_net(net);
513509
}
514510
rtn = net_generic(net, rds_tcp_netid);
515511
if (rtn->sndbuf_size > 0) {

net/smc/af_smc.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3337,10 +3337,7 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family)
33373337
* which need net ref.
33383338
*/
33393339
sk = smc->clcsock->sk;
3340-
__netns_tracker_free(net, &sk->ns_tracker, false);
3341-
sk->sk_net_refcnt = 1;
3342-
get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
3343-
sock_inuse_add(net, 1);
3340+
sk_net_refcnt_upgrade(sk);
33443341
return 0;
33453342
}
33463343

net/sunrpc/svcsock.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1541,10 +1541,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
15411541
newlen = error;
15421542

15431543
if (protocol == IPPROTO_TCP) {
1544-
__netns_tracker_free(net, &sock->sk->ns_tracker, false);
1545-
sock->sk->sk_net_refcnt = 1;
1546-
get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL);
1547-
sock_inuse_add(net, 1);
1544+
sk_net_refcnt_upgrade(sock->sk);
15481545
if ((error = kernel_listen(sock, 64)) < 0)
15491546
goto bummer;
15501547
}

net/sunrpc/xprtsock.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1941,12 +1941,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
19411941
goto out;
19421942
}
19431943

1944-
if (protocol == IPPROTO_TCP) {
1945-
__netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false);
1946-
sock->sk->sk_net_refcnt = 1;
1947-
get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL);
1948-
sock_inuse_add(xprt->xprt_net, 1);
1949-
}
1944+
if (protocol == IPPROTO_TCP)
1945+
sk_net_refcnt_upgrade(sock->sk);
19501946

19511947
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
19521948
if (IS_ERR(filp))

0 commit comments

Comments
 (0)