Skip to content

Commit 01770a1

Browse files
rjfdiaskuba-moo
authored andcommitted
tcp: fix race condition when creating child sockets from syncookies
When the TCP stack is in SYN flood mode, the server child socket is created from the SYN cookie received in a TCP packet with the ACK flag set. The child socket is created when the server receives the first TCP packet with a valid SYN cookie from the client. Usually, this packet corresponds to the final step of the TCP 3-way handshake, the ACK packet. But is also possible to receive a valid SYN cookie from the first TCP data packet sent by the client, and thus create a child socket from that SYN cookie. Since a client socket is ready to send data as soon as it receives the SYN+ACK packet from the server, the client can send the ACK packet (sent by the TCP stack code), and the first data packet (sent by the userspace program) almost at the same time, and thus the server will equally receive the two TCP packets with valid SYN cookies almost at the same instant. When such event happens, the TCP stack code has a race condition that occurs between the momement a lookup is done to the established connections hashtable to check for the existence of a connection for the same client, and the moment that the child socket is added to the established connections hashtable. As a consequence, this race condition can lead to a situation where we add two child sockets to the established connections hashtable and deliver two sockets to the userspace program to the same client. This patch fixes the race condition by checking if an existing child socket exists for the same client when we are adding the second child socket to the established connections socket. If an existing child socket exists, we drop the packet and discard the second child socket to the same client. Signed-off-by: Ricardo Dias <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 1eae77b commit 01770a1

File tree

7 files changed

+91
-16
lines changed

7 files changed

+91
-16
lines changed

include/net/inet_hashtables.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,9 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
247247
unsigned long high_limit);
248248
int inet_hashinfo2_init_mod(struct inet_hashinfo *h);
249249

250-
bool inet_ehash_insert(struct sock *sk, struct sock *osk);
251-
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
250+
bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
251+
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
252+
bool *found_dup_sk);
252253
int __inet_hash(struct sock *sk, struct sock *osk);
253254
int inet_hash(struct sock *sk);
254255
void inet_unhash(struct sock *sk);

net/dccp/ipv4.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
427427

428428
if (__inet_inherit_port(sk, newsk) < 0)
429429
goto put_and_exit;
430-
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
430+
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
431431
if (*own_req)
432432
ireq->ireq_opt = NULL;
433433
else

net/dccp/ipv6.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
533533
dccp_done(newsk);
534534
goto out;
535535
}
536-
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
536+
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
537537
/* Clone pktoptions received with SYN, if we own the req */
538538
if (*own_req && ireq->pktopts) {
539539
newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);

net/ipv4/inet_connection_sock.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
787787
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
788788
mod_timer(&req->rsk_timer, jiffies + timeout);
789789

790-
inet_ehash_insert(req_to_sk(req), NULL);
790+
inet_ehash_insert(req_to_sk(req), NULL, NULL);
791791
/* before letting lookups find us, make sure all req fields
792792
* are committed to memory and refcnt initialized.
793793
*/

net/ipv4/inet_hashtables.c

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
#include <net/addrconf.h>
2121
#include <net/inet_connection_sock.h>
2222
#include <net/inet_hashtables.h>
23+
#if IS_ENABLED(CONFIG_IPV6)
24+
#include <net/inet6_hashtables.h>
25+
#endif
2326
#include <net/secure_seq.h>
2427
#include <net/ip.h>
2528
#include <net/tcp.h>
@@ -508,10 +511,52 @@ static u32 inet_sk_port_offset(const struct sock *sk)
508511
inet->inet_dport);
509512
}
510513

511-
/* insert a socket into ehash, and eventually remove another one
512-
* (The another one can be a SYN_RECV or TIMEWAIT
514+
/* Searches for an exsiting socket in the ehash bucket list.
515+
* Returns true if found, false otherwise.
513516
*/
514-
bool inet_ehash_insert(struct sock *sk, struct sock *osk)
517+
static bool inet_ehash_lookup_by_sk(struct sock *sk,
518+
struct hlist_nulls_head *list)
519+
{
520+
const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
521+
const int sdif = sk->sk_bound_dev_if;
522+
const int dif = sk->sk_bound_dev_if;
523+
const struct hlist_nulls_node *node;
524+
struct net *net = sock_net(sk);
525+
struct sock *esk;
526+
527+
INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
528+
529+
sk_nulls_for_each_rcu(esk, node, list) {
530+
if (esk->sk_hash != sk->sk_hash)
531+
continue;
532+
if (sk->sk_family == AF_INET) {
533+
if (unlikely(INET_MATCH(esk, net, acookie,
534+
sk->sk_daddr,
535+
sk->sk_rcv_saddr,
536+
ports, dif, sdif))) {
537+
return true;
538+
}
539+
}
540+
#if IS_ENABLED(CONFIG_IPV6)
541+
else if (sk->sk_family == AF_INET6) {
542+
if (unlikely(INET6_MATCH(esk, net,
543+
&sk->sk_v6_daddr,
544+
&sk->sk_v6_rcv_saddr,
545+
ports, dif, sdif))) {
546+
return true;
547+
}
548+
}
549+
#endif
550+
}
551+
return false;
552+
}
553+
554+
/* Insert a socket into ehash, and eventually remove another one
555+
* (The another one can be a SYN_RECV or TIMEWAIT)
556+
* If an existing socket already exists, socket sk is not inserted,
557+
* and sets found_dup_sk parameter to true.
558+
*/
559+
bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
515560
{
516561
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
517562
struct hlist_nulls_head *list;
@@ -530,16 +575,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
530575
if (osk) {
531576
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
532577
ret = sk_nulls_del_node_init_rcu(osk);
578+
} else if (found_dup_sk) {
579+
*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
580+
if (*found_dup_sk)
581+
ret = false;
533582
}
583+
534584
if (ret)
535585
__sk_nulls_add_node_rcu(sk, list);
586+
536587
spin_unlock(lock);
588+
537589
return ret;
538590
}
539591

540-
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
592+
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
541593
{
542-
bool ok = inet_ehash_insert(sk, osk);
594+
bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
543595

544596
if (ok) {
545597
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -583,7 +635,7 @@ int __inet_hash(struct sock *sk, struct sock *osk)
583635
int err = 0;
584636

585637
if (sk->sk_state != TCP_LISTEN) {
586-
inet_ehash_nolisten(sk, osk);
638+
inet_ehash_nolisten(sk, osk, NULL);
587639
return 0;
588640
}
589641
WARN_ON(!sk_unhashed(sk));
@@ -679,7 +731,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
679731
tb = inet_csk(sk)->icsk_bind_hash;
680732
spin_lock_bh(&head->lock);
681733
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
682-
inet_ehash_nolisten(sk, NULL);
734+
inet_ehash_nolisten(sk, NULL, NULL);
683735
spin_unlock_bh(&head->lock);
684736
return 0;
685737
}
@@ -758,7 +810,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
758810
inet_bind_hash(sk, tb, port);
759811
if (sk_unhashed(sk)) {
760812
inet_sk(sk)->inet_sport = htons(port);
761-
inet_ehash_nolisten(sk, (struct sock *)tw);
813+
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
762814
}
763815
if (tw)
764816
inet_twsk_bind_unhash(tw, hinfo);

net/ipv4/tcp_ipv4.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,6 +1499,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
14991499
bool *own_req)
15001500
{
15011501
struct inet_request_sock *ireq;
1502+
bool found_dup_sk = false;
15021503
struct inet_sock *newinet;
15031504
struct tcp_sock *newtp;
15041505
struct sock *newsk;
@@ -1576,12 +1577,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
15761577

15771578
if (__inet_inherit_port(sk, newsk) < 0)
15781579
goto put_and_exit;
1579-
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1580+
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1581+
&found_dup_sk);
15801582
if (likely(*own_req)) {
15811583
tcp_move_syn(newtp, req);
15821584
ireq->ireq_opt = NULL;
15831585
} else {
1584-
newinet->inet_opt = NULL;
1586+
if (!req_unhash && found_dup_sk) {
1587+
/* This code path should only be executed in the
1588+
* syncookie case only
1589+
*/
1590+
bh_unlock_sock(newsk);
1591+
sock_put(newsk);
1592+
newsk = NULL;
1593+
} else {
1594+
newinet->inet_opt = NULL;
1595+
}
15851596
}
15861597
return newsk;
15871598

net/ipv6/tcp_ipv6.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1193,6 +1193,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
11931193
const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
11941194
struct ipv6_txoptions *opt;
11951195
struct inet_sock *newinet;
1196+
bool found_dup_sk = false;
11961197
struct tcp_sock *newtp;
11971198
struct sock *newsk;
11981199
#ifdef CONFIG_TCP_MD5SIG
@@ -1368,7 +1369,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
13681369
tcp_done(newsk);
13691370
goto out;
13701371
}
1371-
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1372+
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1373+
&found_dup_sk);
13721374
if (*own_req) {
13731375
tcp_move_syn(newtp, req);
13741376

@@ -1383,6 +1385,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
13831385
skb_set_owner_r(newnp->pktoptions, newsk);
13841386
}
13851387
}
1388+
} else {
1389+
if (!req_unhash && found_dup_sk) {
1390+
/* This code path should only be executed in the
1391+
* syncookie case only
1392+
*/
1393+
bh_unlock_sock(newsk);
1394+
sock_put(newsk);
1395+
newsk = NULL;
1396+
}
13861397
}
13871398

13881399
return newsk;

0 commit comments

Comments
 (0)