Skip to content

Commit 7c13f97

Browse files
Paolo Abenidavem330
Paolo Abeni
authored andcommitted
udp: do fwd memory scheduling on dequeue
A new argument is added to __skb_recv_datagram to provide an explicit skb destructor, invoked under the receive queue lock. The UDP protocol uses such argument to perform memory reclaiming on dequeue, so that the UDP protocol does not set anymore skb->desctructor. Instead explicit memory reclaiming is performed at close() time and when skbs are removed from the receive queue. The in kernel UDP protocol users now need to call a skb_recv_udp() variant instead of skb_recv_datagram() to properly perform memory accounting on dequeue. Overall, this allows acquiring only once the receive queue lock on dequeue. Tested using pktgen with random src port, 64 bytes packet, wire-speed on a 10G link as sender and udp_sink as the receiver, using an l4 tuple rxhash to stress the contention, and one or more udp_sink instances with reuseport. nr sinks vanilla patched 1 440 560 3 2150 2300 6 3650 3800 9 4450 4600 12 6250 6450 v1 -> v2: - do rmem and allocated memory scheduling under the receive lock - do bulk scheduling in first_packet_length() and in udp_destruct_sock() - avoid the typdef for the dequeue callback Suggested-by: Eric Dumazet <[email protected]> Acked-by: Hannes Frederic Sowa <[email protected]> Signed-off-by: Paolo Abeni <[email protected]> Acked-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent ad95903 commit 7c13f97

File tree

9 files changed

+63
-33
lines changed

9 files changed

+63
-33
lines changed

include/linux/skbuff.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3033,9 +3033,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
30333033
int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
30343034
const struct sk_buff *skb);
30353035
struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags,
3036+
void (*destructor)(struct sock *sk,
3037+
struct sk_buff *skb),
30363038
int *peeked, int *off, int *err,
30373039
struct sk_buff **last);
30383040
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
3041+
void (*destructor)(struct sock *sk,
3042+
struct sk_buff *skb),
30393043
int *peeked, int *off, int *err);
30403044
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
30413045
int *err);

include/net/udp.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,21 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
248248
/* net/ipv4/udp.c */
249249
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
250250
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
251+
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
252+
static inline struct sk_buff *
253+
__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked,
254+
int *off, int *err)
255+
{
256+
return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
257+
udp_skb_destructor, peeked, off, err);
258+
}
259+
static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
260+
int noblock, int *err)
261+
{
262+
int peeked, off = 0;
263+
264+
return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
265+
}
251266

252267
void udp_v4_early_demux(struct sk_buff *skb);
253268
int udp_get_port(struct sock *sk, unsigned short snum,

net/core/datagram.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
165165
* __skb_try_recv_datagram - Receive a datagram skbuff
166166
* @sk: socket
167167
* @flags: MSG_ flags
168+
* @destructor: invoked under the receive lock on successful dequeue
168169
* @peeked: returns non-zero if this packet has been seen before
169170
* @off: an offset in bytes to peek skb from. Returns an offset
170171
* within an skb where data actually starts
@@ -197,6 +198,8 @@ static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
197198
* the standard around please.
198199
*/
199200
struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
201+
void (*destructor)(struct sock *sk,
202+
struct sk_buff *skb),
200203
int *peeked, int *off, int *err,
201204
struct sk_buff **last)
202205
{
@@ -241,9 +244,11 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
241244
}
242245

243246
atomic_inc(&skb->users);
244-
} else
247+
} else {
245248
__skb_unlink(skb, queue);
246-
249+
if (destructor)
250+
destructor(sk, skb);
251+
}
247252
spin_unlock_irqrestore(&queue->lock, cpu_flags);
248253
*off = _off;
249254
return skb;
@@ -262,6 +267,8 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
262267
EXPORT_SYMBOL(__skb_try_recv_datagram);
263268

264269
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
270+
void (*destructor)(struct sock *sk,
271+
struct sk_buff *skb),
265272
int *peeked, int *off, int *err)
266273
{
267274
struct sk_buff *skb, *last;
@@ -270,8 +277,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
270277
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
271278

272279
do {
273-
skb = __skb_try_recv_datagram(sk, flags, peeked, off, err,
274-
&last);
280+
skb = __skb_try_recv_datagram(sk, flags, destructor, peeked,
281+
off, err, &last);
275282
if (skb)
276283
return skb;
277284

@@ -290,7 +297,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
290297
int peeked, off = 0;
291298

292299
return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
293-
&peeked, &off, err);
300+
NULL, &peeked, &off, err);
294301
}
295302
EXPORT_SYMBOL(skb_recv_datagram);
296303

net/ipv4/udp.c

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,26 +1173,26 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
11731173
return ret;
11741174
}
11751175

1176+
/* fully reclaim rmem/fwd memory allocated for skb */
11761177
static void udp_rmem_release(struct sock *sk, int size, int partial)
11771178
{
11781179
int amt;
11791180

11801181
atomic_sub(size, &sk->sk_rmem_alloc);
1181-
1182-
spin_lock_bh(&sk->sk_receive_queue.lock);
11831182
sk->sk_forward_alloc += size;
11841183
amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
11851184
sk->sk_forward_alloc -= amt;
1186-
spin_unlock_bh(&sk->sk_receive_queue.lock);
11871185

11881186
if (amt)
11891187
__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
11901188
}
11911189

1192-
static void udp_rmem_free(struct sk_buff *skb)
1190+
/* Note: called with sk_receive_queue.lock held */
1191+
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
11931192
{
1194-
udp_rmem_release(skb->sk, skb->truesize, 1);
1193+
udp_rmem_release(sk, skb->truesize, 1);
11951194
}
1195+
EXPORT_SYMBOL(udp_skb_destructor);
11961196

11971197
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
11981198
{
@@ -1229,9 +1229,9 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
12291229

12301230
sk->sk_forward_alloc -= size;
12311231

1232-
/* the skb owner in now the udp socket */
1233-
skb->sk = sk;
1234-
skb->destructor = udp_rmem_free;
1232+
/* no need to setup a destructor, we will explicitly release the
1233+
* forward allocated memory on dequeue
1234+
*/
12351235
skb->dev = NULL;
12361236
sock_skb_set_dropcount(sk, skb);
12371237

@@ -1255,8 +1255,15 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
12551255
static void udp_destruct_sock(struct sock *sk)
12561256
{
12571257
/* reclaim completely the forward allocated memory */
1258-
__skb_queue_purge(&sk->sk_receive_queue);
1259-
udp_rmem_release(sk, 0, 0);
1258+
unsigned int total = 0;
1259+
struct sk_buff *skb;
1260+
1261+
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1262+
total += skb->truesize;
1263+
kfree_skb(skb);
1264+
}
1265+
udp_rmem_release(sk, total, 0);
1266+
12601267
inet_sock_destruct(sk);
12611268
}
12621269

@@ -1288,12 +1295,11 @@ EXPORT_SYMBOL_GPL(skb_consume_udp);
12881295
*/
12891296
static int first_packet_length(struct sock *sk)
12901297
{
1291-
struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
1298+
struct sk_buff_head *rcvq = &sk->sk_receive_queue;
12921299
struct sk_buff *skb;
1300+
int total = 0;
12931301
int res;
12941302

1295-
__skb_queue_head_init(&list_kill);
1296-
12971303
spin_lock_bh(&rcvq->lock);
12981304
while ((skb = skb_peek(rcvq)) != NULL &&
12991305
udp_lib_checksum_complete(skb)) {
@@ -1303,12 +1309,13 @@ static int first_packet_length(struct sock *sk)
13031309
IS_UDPLITE(sk));
13041310
atomic_inc(&sk->sk_drops);
13051311
__skb_unlink(skb, rcvq);
1306-
__skb_queue_tail(&list_kill, skb);
1312+
total += skb->truesize;
1313+
kfree_skb(skb);
13071314
}
13081315
res = skb ? skb->len : -1;
1316+
if (total)
1317+
udp_rmem_release(sk, total, 1);
13091318
spin_unlock_bh(&rcvq->lock);
1310-
1311-
__skb_queue_purge(&list_kill);
13121319
return res;
13131320
}
13141321

@@ -1363,8 +1370,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
13631370

13641371
try_again:
13651372
peeking = off = sk_peek_offset(sk, flags);
1366-
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
1367-
&peeked, &off, &err);
1373+
skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
13681374
if (!skb)
13691375
return err;
13701376

net/ipv6/udp.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
343343

344344
try_again:
345345
peeking = off = sk_peek_offset(sk, flags);
346-
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
347-
&peeked, &off, &err);
346+
skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
348347
if (!skb)
349348
return err;
350349

net/rxrpc/input.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
10531053

10541054
ASSERT(!irqs_disabled());
10551055

1056-
skb = skb_recv_datagram(udp_sk, 0, 1, &ret);
1056+
skb = skb_recv_udp(udp_sk, 0, 1, &ret);
10571057
if (!skb) {
10581058
if (ret == -EAGAIN)
10591059
return;
@@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
10751075

10761076
__UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
10771077

1078-
/* The socket buffer we have is owned by UDP, with UDP's data all over
1079-
* it, but we really want our own data there.
1078+
/* The UDP protocol already released all skb resources;
1079+
* we are free to add our own data there.
10801080
*/
1081-
skb_orphan(skb);
10821081
sp = rxrpc_skb(skb);
10831082

10841083
/* dig out the RxRPC connection details */

net/sunrpc/svcsock.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
547547
err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
548548
0, 0, MSG_PEEK | MSG_DONTWAIT);
549549
if (err >= 0)
550-
skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err);
550+
skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
551551

552552
if (skb == NULL) {
553553
if (err != -EAGAIN) {

net/sunrpc/xprtsock.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1080,7 +1080,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
10801080
if (sk == NULL)
10811081
goto out;
10821082
for (;;) {
1083-
skb = skb_recv_datagram(sk, 0, 1, &err);
1083+
skb = skb_recv_udp(sk, 0, 1, &err);
10841084
if (skb != NULL) {
10851085
xs_udp_data_read_skb(&transport->xprt, sk, skb);
10861086
consume_skb(skb);

net/unix/af_unix.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
21132113
mutex_lock(&u->iolock);
21142114

21152115
skip = sk_peek_offset(sk, flags);
2116-
skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
2117-
&last);
2116+
skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2117+
&err, &last);
21182118
if (skb)
21192119
break;
21202120

0 commit comments

Comments
 (0)