Skip to content

Commit 77376dd

Browse files
ordexNipaLocal
authored and
NipaLocal
committed
ovpn: ensure sk is still valid during cleanup
Removing a peer while userspace attempts to close its transport socket triggers a race condition resulting in the following crash: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000077: 0000 [kernel-patches#1] SMP KASAN KASAN: null-ptr-deref in range [0x00000000000003b8-0x00000000000003bf] CPU: 12 UID: 0 PID: 162 Comm: kworker/12:1 Tainted: G O 6.15.0-rc2-00635-g521139ac3840 kernel-patches#272 PREEMPT(full) Tainted: [O]=OOT_MODULE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-20240910_120124-localhost 04/01/2014 Workqueue: events ovpn_peer_keepalive_work [ovpn] RIP: 0010:ovpn_socket_release+0x23c/0x500 [ovpn] Code: ea 03 80 3c 02 00 0f 85 71 02 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b 64 24 18 49 8d bc 24 be 03 00 00 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 01 38 d0 7c 08 84 d2 0f 85 30 RSP: 0018:ffffc90000c9fb18 EFLAGS: 00010217 RAX: dffffc0000000000 RBX: ffff8881148d7940 RCX: ffffffff817787bb RDX: 0000000000000077 RSI: 0000000000000008 RDI: 00000000000003be RBP: ffffc90000c9fb30 R08: 0000000000000000 R09: fffffbfff0d3e840 R10: ffffffff869f4207 R11: 0000000000000000 R12: 0000000000000000 R13: ffff888115eb9300 R14: ffffc90000c9fbc8 R15: 000000000000000c FS: 0000000000000000(0000) GS:ffff8882b0151000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f37266b6114 CR3: 00000000054a8000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: <TASK> unlock_ovpn+0x8b/0xe0 [ovpn] ovpn_peer_keepalive_work+0xe3/0x540 [ovpn] ? ovpn_peers_free+0x780/0x780 [ovpn] ? lock_acquire+0x56/0x70 ? process_one_work+0x888/0x1740 process_one_work+0x933/0x1740 ? pwq_dec_nr_in_flight+0x10b0/0x10b0 ? move_linked_works+0x12d/0x2c0 ? assign_work+0x163/0x270 worker_thread+0x4d6/0xd90 ? preempt_count_sub+0x4c/0x70 ? process_one_work+0x1740/0x1740 kthread+0x36c/0x710 ? trace_preempt_on+0x8c/0x1e0 ? kthread_is_per_cpu+0xc0/0xc0 ? preempt_count_sub+0x4c/0x70 ? _raw_spin_unlock_irq+0x36/0x60 ? calculate_sigpending+0x7b/0xa0 ? kthread_is_per_cpu+0xc0/0xc0 ret_from_fork+0x3a/0x80 ? kthread_is_per_cpu+0xc0/0xc0 ret_from_fork_asm+0x11/0x20 </TASK> Modules linked in: ovpn(O) This happens because the peer deletion operation reaches ovpn_socket_release() while ovpn_sock->sock (struct socket *) and its sk member (struct sock *) are still both valid. Here synchronize_rcu() is invoked, after which ovpn_sock->sock->sk becomes NULL, due to the concurrent socket closing triggered from userspace. After having invoked synchronize_rcu(), ovpn_socket_release() will attempt dereferencing ovpn_sock->sock->sk, triggering the crash reported above. The reason for accessing sk is that we need to retrieve its protocol and continue the cleanup routine accordingly. This crash can be easily produced by running openvpn userspace in client mode with `--keepalive 10 20`, while entirely omitting this option on the server side. After 20 seconds ovpn will assume the peer (server) to be dead, will start removing it and will notify userspace. The latter will receive the notification and close the transport socket, thus triggering the crash. To fix the race condition for good, we need to refactor struct ovpn_socket. Since ovpn is always only interested in the sock->sk member (struct sock *) we can directly hold a reference to it, raher than accessing it via its struct socket container. This means changing "struct socket *ovpn_socket->sock" to "struct sock *ovpn_socket->sk". While acquiring a reference to sk, we can increase its refcounter without affecting the socket close()/destroy() notification (which we rely on when userspace closes a socket we are using). By increasing sk's refcounter we know we can dereference it in ovpn_socket_release() without incurring in any race condition anymore. ovpn_socket_release() will ultimately decrease the reference counter. Cc: Oleksandr Natalenko <[email protected]> Fixes: 11851cb ("ovpn: implement TCP transport") Reported-by: Qingfang Deng <[email protected]> Closes: OpenVPN/ovpn-net-next#1 Tested-by: Gert Doering <[email protected]> Link: https://www.mail-archive.com/[email protected]/msg31575.html Signed-off-by: Antonio Quartulli <[email protected]> Signed-off-by: NipaLocal <nipa@local>
1 parent 3d87b6d commit 77376dd

File tree

9 files changed

+109
-106
lines changed

9 files changed

+109
-106
lines changed

drivers/net/ovpn/io.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ void ovpn_decrypt_post(void *data, int ret)
134134

135135
rcu_read_lock();
136136
sock = rcu_dereference(peer->sock);
137-
if (sock && sock->sock->sk->sk_protocol == IPPROTO_UDP)
137+
if (sock && sock->sk->sk_protocol == IPPROTO_UDP)
138138
/* check if this peer changed local or remote endpoint */
139139
ovpn_peer_endpoints_update(peer, skb);
140140
rcu_read_unlock();
@@ -270,12 +270,12 @@ void ovpn_encrypt_post(void *data, int ret)
270270
if (unlikely(!sock))
271271
goto err_unlock;
272272

273-
switch (sock->sock->sk->sk_protocol) {
273+
switch (sock->sk->sk_protocol) {
274274
case IPPROTO_UDP:
275-
ovpn_udp_send_skb(peer, sock->sock, skb);
275+
ovpn_udp_send_skb(peer, sock->sk, skb);
276276
break;
277277
case IPPROTO_TCP:
278-
ovpn_tcp_send_skb(peer, sock->sock, skb);
278+
ovpn_tcp_send_skb(peer, sock->sk, skb);
279279
break;
280280
default:
281281
/* no transport configured yet */

drivers/net/ovpn/netlink.c

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -423,9 +423,14 @@ int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info)
423423
ovpn_sock = ovpn_socket_new(sock, peer);
424424
/* at this point we unconditionally drop the reference to the socket:
425425
* - in case of error, the socket has to be dropped
426-
* - if case of success, the socket is configured and let
426+
* - if case of success, the socket is configured and we let
427427
* userspace own the reference, so that the latter can
428-
* trigger the final close()
428+
* trigger the final close().
429+
*
430+
* NOTE: at this point ovpn_socket_new() has acquired a reference
431+
* to sock->sk. That's needed especially to avoid race conditions
432+
* during cleanup, where sock may still be alive, but sock->sk may be
433+
* getting released concurrently.
429434
*/
430435
sockfd_put(sock);
431436
if (IS_ERR(ovpn_sock)) {
@@ -501,7 +506,7 @@ int ovpn_nl_peer_set_doit(struct sk_buff *skb, struct genl_info *info)
501506
/* when using a TCP socket the remote IP is not expected */
502507
rcu_read_lock();
503508
sock = rcu_dereference(peer->sock);
504-
if (sock && sock->sock->sk->sk_protocol == IPPROTO_TCP &&
509+
if (sock && sock->sk->sk_protocol == IPPROTO_TCP &&
505510
(attrs[OVPN_A_PEER_REMOTE_IPV4] ||
506511
attrs[OVPN_A_PEER_REMOTE_IPV6])) {
507512
rcu_read_unlock();
@@ -559,14 +564,14 @@ static int ovpn_nl_send_peer(struct sk_buff *skb, const struct genl_info *info,
559564
goto err_unlock;
560565
}
561566

562-
if (!net_eq(genl_info_net(info), sock_net(sock->sock->sk))) {
567+
if (!net_eq(genl_info_net(info), sock_net(sock->sk))) {
563568
id = peernet2id_alloc(genl_info_net(info),
564-
sock_net(sock->sock->sk),
569+
sock_net(sock->sk),
565570
GFP_ATOMIC);
566571
if (nla_put_s32(skb, OVPN_A_PEER_SOCKET_NETNSID, id))
567572
goto err_unlock;
568573
}
569-
local_port = inet_sk(sock->sock->sk)->inet_sport;
574+
local_port = inet_sk(sock->sk)->inet_sport;
570575
rcu_read_unlock();
571576

572577
if (nla_put_u32(skb, OVPN_A_PEER_ID, peer->id))
@@ -1153,8 +1158,8 @@ int ovpn_nl_peer_del_notify(struct ovpn_peer *peer)
11531158
ret = -EINVAL;
11541159
goto err_unlock;
11551160
}
1156-
genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sock->sk),
1157-
msg, 0, OVPN_NLGRP_PEERS, GFP_ATOMIC);
1161+
genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sk), msg, 0,
1162+
OVPN_NLGRP_PEERS, GFP_ATOMIC);
11581163
rcu_read_unlock();
11591164

11601165
return 0;
@@ -1218,8 +1223,8 @@ int ovpn_nl_key_swap_notify(struct ovpn_peer *peer, u8 key_id)
12181223
ret = -EINVAL;
12191224
goto err_unlock;
12201225
}
1221-
genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sock->sk),
1222-
msg, 0, OVPN_NLGRP_PEERS, GFP_ATOMIC);
1226+
genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sk), msg, 0,
1227+
OVPN_NLGRP_PEERS, GFP_ATOMIC);
12231228
rcu_read_unlock();
12241229

12251230
return 0;

drivers/net/ovpn/peer.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ static void ovpn_peer_release_p2p(struct ovpn_priv *ovpn, struct sock *sk,
11451145

11461146
if (sk) {
11471147
ovpn_sock = rcu_access_pointer(peer->sock);
1148-
if (!ovpn_sock || ovpn_sock->sock->sk != sk) {
1148+
if (!ovpn_sock || ovpn_sock->sk != sk) {
11491149
spin_unlock_bh(&ovpn->lock);
11501150
ovpn_peer_put(peer);
11511151
return;
@@ -1175,7 +1175,7 @@ static void ovpn_peers_release_mp(struct ovpn_priv *ovpn, struct sock *sk,
11751175
if (sk) {
11761176
rcu_read_lock();
11771177
ovpn_sock = rcu_dereference(peer->sock);
1178-
remove = ovpn_sock && ovpn_sock->sock->sk == sk;
1178+
remove = ovpn_sock && ovpn_sock->sk == sk;
11791179
rcu_read_unlock();
11801180
}
11811181

drivers/net/ovpn/socket.c

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ static void ovpn_socket_release_kref(struct kref *kref)
2424
struct ovpn_socket *sock = container_of(kref, struct ovpn_socket,
2525
refcount);
2626

27-
if (sock->sock->sk->sk_protocol == IPPROTO_UDP)
27+
if (sock->sk->sk_protocol == IPPROTO_UDP)
2828
ovpn_udp_socket_detach(sock);
29-
else if (sock->sock->sk->sk_protocol == IPPROTO_TCP)
29+
else if (sock->sk->sk_protocol == IPPROTO_TCP)
3030
ovpn_tcp_socket_detach(sock);
3131
}
3232

@@ -75,37 +75,31 @@ void ovpn_socket_release(struct ovpn_peer *peer)
7575
if (!sock)
7676
return;
7777

78-
/* sanity check: we should not end up here if the socket
79-
* was already closed
80-
*/
81-
if (!sock->sock->sk) {
82-
DEBUG_NET_WARN_ON_ONCE(1);
83-
return;
84-
}
85-
8678
/* Drop the reference while holding the sock lock to avoid
8779
* concurrent ovpn_socket_new call to mess up with a partially
8880
* detached socket.
8981
*
9082
* Holding the lock ensures that a socket with refcnt 0 is fully
9183
* detached before it can be picked by a concurrent reader.
9284
*/
93-
lock_sock(sock->sock->sk);
85+
lock_sock(sock->sk);
9486
released = ovpn_socket_put(peer, sock);
95-
release_sock(sock->sock->sk);
87+
release_sock(sock->sk);
9688

9789
/* align all readers with sk_user_data being NULL */
9890
synchronize_rcu();
9991

10092
/* following cleanup should happen with lock released */
10193
if (released) {
102-
if (sock->sock->sk->sk_protocol == IPPROTO_UDP) {
94+
if (sock->sk->sk_protocol == IPPROTO_UDP) {
10395
netdev_put(sock->ovpn->dev, &sock->dev_tracker);
104-
} else if (sock->sock->sk->sk_protocol == IPPROTO_TCP) {
96+
} else if (sock->sk->sk_protocol == IPPROTO_TCP) {
10597
/* wait for TCP jobs to terminate */
10698
ovpn_tcp_socket_wait_finish(sock);
10799
ovpn_peer_put(sock->peer);
108100
}
101+
/* drop reference acquired in ovpn_socket_new() */
102+
sock_put(sock->sk);
109103
/* we can call plain kfree() because we already waited one RCU
110104
* period due to synchronize_rcu()
111105
*/
@@ -118,12 +112,14 @@ static bool ovpn_socket_hold(struct ovpn_socket *sock)
118112
return kref_get_unless_zero(&sock->refcount);
119113
}
120114

121-
static int ovpn_socket_attach(struct ovpn_socket *sock, struct ovpn_peer *peer)
115+
static int ovpn_socket_attach(struct ovpn_socket *ovpn_sock,
116+
struct socket *sock,
117+
struct ovpn_peer *peer)
122118
{
123-
if (sock->sock->sk->sk_protocol == IPPROTO_UDP)
124-
return ovpn_udp_socket_attach(sock, peer->ovpn);
125-
else if (sock->sock->sk->sk_protocol == IPPROTO_TCP)
126-
return ovpn_tcp_socket_attach(sock, peer);
119+
if (sock->sk->sk_protocol == IPPROTO_UDP)
120+
return ovpn_udp_socket_attach(ovpn_sock, sock, peer->ovpn);
121+
else if (sock->sk->sk_protocol == IPPROTO_TCP)
122+
return ovpn_tcp_socket_attach(ovpn_sock, peer);
127123

128124
return -EOPNOTSUPP;
129125
}
@@ -138,23 +134,24 @@ static int ovpn_socket_attach(struct ovpn_socket *sock, struct ovpn_peer *peer)
138134
struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
139135
{
140136
struct ovpn_socket *ovpn_sock;
137+
struct sock *sk = sock->sk;
141138
int ret;
142139

143-
lock_sock(sock->sk);
140+
lock_sock(sk);
144141

145142
/* a TCP socket can only be owned by a single peer, therefore there
146143
* can't be any other user
147144
*/
148-
if (sock->sk->sk_protocol == IPPROTO_TCP && sock->sk->sk_user_data) {
145+
if (sk->sk_protocol == IPPROTO_TCP && sk->sk_user_data) {
149146
ovpn_sock = ERR_PTR(-EBUSY);
150147
goto sock_release;
151148
}
152149

153150
/* a UDP socket can be shared across multiple peers, but we must make
154151
* sure it is not owned by something else
155152
*/
156-
if (sock->sk->sk_protocol == IPPROTO_UDP) {
157-
u8 type = READ_ONCE(udp_sk(sock->sk)->encap_type);
153+
if (sk->sk_protocol == IPPROTO_UDP) {
154+
u8 type = READ_ONCE(udp_sk(sk)->encap_type);
158155

159156
/* socket owned by other encapsulation module */
160157
if (type && type != UDP_ENCAP_OVPNINUDP) {
@@ -163,7 +160,7 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
163160
}
164161

165162
rcu_read_lock();
166-
ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
163+
ovpn_sock = rcu_dereference_sk_user_data(sk);
167164
if (ovpn_sock) {
168165
/* socket owned by another ovpn instance, we can't use it */
169166
if (ovpn_sock->ovpn != peer->ovpn) {
@@ -192,19 +189,30 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
192189
rcu_read_unlock();
193190
}
194191

192+
/* increase sk refcounter as we'll store a reference in
193+
* ovpn_socket.
194+
* ovpn_socket_release() will decrement the refcounter.
195+
*/
196+
if (!refcount_inc_not_zero(&sk->sk_refcnt)) {
197+
ovpn_sock = ERR_PTR(-ENOTSOCK);
198+
goto sock_release;
199+
}
200+
195201
/* socket is not owned: attach to this ovpn instance */
196202

197203
ovpn_sock = kzalloc(sizeof(*ovpn_sock), GFP_KERNEL);
198204
if (!ovpn_sock) {
205+
sock_put(sk);
199206
ovpn_sock = ERR_PTR(-ENOMEM);
200207
goto sock_release;
201208
}
202209

203-
ovpn_sock->sock = sock;
210+
ovpn_sock->sk = sk;
204211
kref_init(&ovpn_sock->refcount);
205212

206-
ret = ovpn_socket_attach(ovpn_sock, peer);
213+
ret = ovpn_socket_attach(ovpn_sock, sock, peer);
207214
if (ret < 0) {
215+
sock_put(sk);
208216
kfree(ovpn_sock);
209217
ovpn_sock = ERR_PTR(ret);
210218
goto sock_release;
@@ -213,11 +221,11 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
213221
/* TCP sockets are per-peer, therefore they are linked to their unique
214222
* peer
215223
*/
216-
if (sock->sk->sk_protocol == IPPROTO_TCP) {
224+
if (sk->sk_protocol == IPPROTO_TCP) {
217225
INIT_WORK(&ovpn_sock->tcp_tx_work, ovpn_tcp_tx_work);
218226
ovpn_sock->peer = peer;
219227
ovpn_peer_hold(peer);
220-
} else if (sock->sk->sk_protocol == IPPROTO_UDP) {
228+
} else if (sk->sk_protocol == IPPROTO_UDP) {
221229
/* in UDP we only link the ovpn instance since the socket is
222230
* shared among multiple peers
223231
*/
@@ -226,8 +234,8 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
226234
GFP_KERNEL);
227235
}
228236

229-
rcu_assign_sk_user_data(sock->sk, ovpn_sock);
237+
rcu_assign_sk_user_data(sk, ovpn_sock);
230238
sock_release:
231-
release_sock(sock->sk);
239+
release_sock(sk);
232240
return ovpn_sock;
233241
}

drivers/net/ovpn/socket.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ struct ovpn_peer;
2222
* @ovpn: ovpn instance owning this socket (UDP only)
2323
* @dev_tracker: reference tracker for associated dev (UDP only)
2424
* @peer: unique peer transmitting over this socket (TCP only)
25-
* @sock: the low level sock object
25+
* @sk: the low level sock object
2626
* @refcount: amount of contexts currently referencing this object
2727
* @work: member used to schedule release routine (it may block)
2828
* @tcp_tx_work: work for deferring outgoing packet processing (TCP only)
@@ -36,7 +36,7 @@ struct ovpn_socket {
3636
struct ovpn_peer *peer;
3737
};
3838

39-
struct socket *sock;
39+
struct sock *sk;
4040
struct kref refcount;
4141
struct work_struct work;
4242
struct work_struct tcp_tx_work;

0 commit comments

Comments
 (0)