|
15 | 15 | #include <linux/random.h> |
16 | 16 | #include <linux/slab.h> |
17 | 17 | #include <linux/string.h> |
| 18 | +#include <linux/tcp.h> |
18 | 19 | #include <linux/vmalloc.h> |
19 | 20 |
|
20 | 21 | #include <net/request_sock.h> |
@@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) |
130 | 131 | kfree(lopt); |
131 | 132 | } |
132 | 133 |
|
| 134 | +/* |
| 135 | + * This function is called to set a Fast Open socket's "fastopen_rsk" field |
| 136 | + * to NULL when a TFO socket no longer needs to access the request_sock. |
| 137 | + * This happens only after 3WHS has been either completed or aborted (e.g., |
| 138 | + * RST is received). |
| 139 | + * |
| 140 | + * Before TFO, a child socket is created only after 3WHS is completed, |
| 141 | + * hence it never needs to access the request_sock. things get a lot more |
| 142 | + * complex with TFO. A child socket, accepted or not, has to access its |
| 143 | + * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts, |
| 144 | + * until 3WHS is either completed or aborted. Afterwards the req will stay |
| 145 | + * until either the child socket is accepted, or in the rare case when the |
| 146 | + * listener is closed before the child is accepted. |
| 147 | + * |
| 148 | + * In short, a request socket is only freed after BOTH 3WHS has completed |
| 149 | + * (or aborted) and the child socket has been accepted (or listener closed). |
| 150 | + * When a child socket is accepted, its corresponding req->sk is set to |
| 151 | + * NULL since it's no longer needed. More importantly, "req->sk == NULL" |
| 152 | + * will be used by the code below to determine if a child socket has been |
| 153 | + * accepted or not, and the check is protected by the fastopenq->lock |
| 154 | + * described below. |
| 155 | + * |
| 156 | + * Note that fastopen_rsk is only accessed from the child socket's context |
| 157 | + * with its socket lock held. But a request_sock (req) can be accessed by |
| 158 | + * both its child socket through fastopen_rsk, and a listener socket through |
| 159 | + * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin |
| 160 | + * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created. |
| 161 | + * only in the rare case when both the listener and the child locks are held, |
| 162 | + * e.g., in inet_csk_listen_stop() do we not need to acquire the lock. |
| 163 | + * The lock also protects other fields such as fastopenq->qlen, which is |
| 164 | + * decremented by this function when fastopen_rsk is no longer needed. |
| 165 | + * |
| 166 | + * Note that another solution was to simply use the existing socket lock |
| 167 | + * from the listener. But first socket lock is difficult to use. It is not |
| 168 | + * a simple spin lock - one must consider sock_owned_by_user() and arrange |
| 169 | + * to use sk_add_backlog() stuff. But what really makes it infeasible is the |
| 170 | + * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to |
| 171 | + * acquire a child's lock while holding listener's socket lock. A corner |
| 172 | + * case might also exist in tcp_v4_hnd_req() that will trigger this locking |
| 173 | + * order. |
| 174 | + * |
| 175 | + * When a TFO req is created, it needs to sock_hold its listener to prevent |
| 176 | + * the latter data structure from going away. |
| 177 | + * |
| 178 | + * This function also sets "treq->listener" to NULL and unreference listener |
| 179 | + * socket. treq->listener is used by the listener so it is protected by the |
| 180 | + * fastopenq->lock in this function. |
| 181 | + */ |
| 182 | +void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, |
| 183 | + bool reset) |
| 184 | +{ |
| 185 | + struct sock *lsk = tcp_rsk(req)->listener; |
| 186 | + struct fastopen_queue *fastopenq = |
| 187 | + inet_csk(lsk)->icsk_accept_queue.fastopenq; |
| 188 | + |
| 189 | + BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk)); |
| 190 | + |
| 191 | + tcp_sk(sk)->fastopen_rsk = NULL; |
| 192 | + spin_lock_bh(&fastopenq->lock); |
| 193 | + fastopenq->qlen--; |
| 194 | + tcp_rsk(req)->listener = NULL; |
| 195 | + if (req->sk) /* the child socket hasn't been accepted yet */ |
| 196 | + goto out; |
| 197 | + |
| 198 | + if (!reset || lsk->sk_state != TCP_LISTEN) { |
| 199 | + /* If the listener has been closed don't bother with the |
| 200 | + * special RST handling below. |
| 201 | + */ |
| 202 | + spin_unlock_bh(&fastopenq->lock); |
| 203 | + sock_put(lsk); |
| 204 | + reqsk_free(req); |
| 205 | + return; |
| 206 | + } |
| 207 | + /* Wait for 60secs before removing a req that has triggered RST. |
| 208 | + * This is a simple defense against TFO spoofing attack - by |
| 209 | + * counting the req against fastopen.max_qlen, and disabling |
| 210 | + * TFO when the qlen exceeds max_qlen. |
| 211 | + * |
| 212 | + * For more details see CoNext'11 "TCP Fast Open" paper. |
| 213 | + */ |
| 214 | + req->expires = jiffies + 60*HZ; |
| 215 | + if (fastopenq->rskq_rst_head == NULL) |
| 216 | + fastopenq->rskq_rst_head = req; |
| 217 | + else |
| 218 | + fastopenq->rskq_rst_tail->dl_next = req; |
| 219 | + |
| 220 | + req->dl_next = NULL; |
| 221 | + fastopenq->rskq_rst_tail = req; |
| 222 | + fastopenq->qlen++; |
| 223 | +out: |
| 224 | + spin_unlock_bh(&fastopenq->lock); |
| 225 | + sock_put(lsk); |
| 226 | + return; |
| 227 | +} |
0 commit comments