Skip to content

Commit 28d6427

Browse files
Eric Dumazetdavem330
Eric Dumazet
authored andcommitted
net: attempt high order allocations in sock_alloc_send_pskb()
Adding paged frags skbs to af_unix sockets introduced a performance regression on large sends because of additional page allocations, even if each skb could carry at least 100% more payload than before. We can instruct sock_alloc_send_pskb() to attempt high order allocations. Most of the time, it does a single page allocation instead of 8. I added an additional parameter to sock_alloc_send_pskb() to let other users to opt-in for this new feature on followup patches. Tested: Before patch : $ netperf -t STREAM_STREAM STREAM STREAM TEST Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 2304 212992 212992 10.00 46861.15 After patch : $ netperf -t STREAM_STREAM STREAM STREAM TEST Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 2304 212992 212992 10.00 57981.11 Signed-off-by: Eric Dumazet <[email protected]> Cc: David Rientjes <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent e370a72 commit 28d6427

File tree

6 files changed

+60
-55
lines changed

6 files changed

+60
-55
lines changed

drivers/net/macvtap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
524524
linear = len;
525525

526526
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
527-
err);
527+
err, 0);
528528
if (!skb)
529529
return NULL;
530530

drivers/net/tun.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -949,7 +949,7 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
949949
linear = len;
950950

951951
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
952-
&err);
952+
&err, 0);
953953
if (!skb)
954954
return ERR_PTR(err);
955955

include/net/sock.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1539,7 +1539,8 @@ extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
15391539
unsigned long header_len,
15401540
unsigned long data_len,
15411541
int noblock,
1542-
int *errcode);
1542+
int *errcode,
1543+
int max_page_order);
15431544
extern void *sock_kmalloc(struct sock *sk, int size,
15441545
gfp_t priority);
15451546
extern void sock_kfree_s(struct sock *sk, void *mem, int size);

net/core/sock.c

Lines changed: 51 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,24 +1741,23 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
17411741

17421742
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
17431743
unsigned long data_len, int noblock,
1744-
int *errcode)
1744+
int *errcode, int max_page_order)
17451745
{
1746-
struct sk_buff *skb;
1746+
struct sk_buff *skb = NULL;
1747+
unsigned long chunk;
17471748
gfp_t gfp_mask;
17481749
long timeo;
17491750
int err;
17501751
int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1752+
struct page *page;
1753+
int i;
17511754

17521755
err = -EMSGSIZE;
17531756
if (npages > MAX_SKB_FRAGS)
17541757
goto failure;
17551758

1756-
gfp_mask = sk->sk_allocation;
1757-
if (gfp_mask & __GFP_WAIT)
1758-
gfp_mask |= __GFP_REPEAT;
1759-
17601759
timeo = sock_sndtimeo(sk, noblock);
1761-
while (1) {
1760+
while (!skb) {
17621761
err = sock_error(sk);
17631762
if (err != 0)
17641763
goto failure;
@@ -1767,50 +1766,52 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
17671766
if (sk->sk_shutdown & SEND_SHUTDOWN)
17681767
goto failure;
17691768

1770-
if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1771-
skb = alloc_skb(header_len, gfp_mask);
1772-
if (skb) {
1773-
int i;
1774-
1775-
/* No pages, we're done... */
1776-
if (!data_len)
1777-
break;
1778-
1779-
skb->truesize += data_len;
1780-
skb_shinfo(skb)->nr_frags = npages;
1781-
for (i = 0; i < npages; i++) {
1782-
struct page *page;
1783-
1784-
page = alloc_pages(sk->sk_allocation, 0);
1785-
if (!page) {
1786-
err = -ENOBUFS;
1787-
skb_shinfo(skb)->nr_frags = i;
1788-
kfree_skb(skb);
1789-
goto failure;
1790-
}
1791-
1792-
__skb_fill_page_desc(skb, i,
1793-
page, 0,
1794-
(data_len >= PAGE_SIZE ?
1795-
PAGE_SIZE :
1796-
data_len));
1797-
data_len -= PAGE_SIZE;
1798-
}
1769+
if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
1770+
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1771+
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1772+
err = -EAGAIN;
1773+
if (!timeo)
1774+
goto failure;
1775+
if (signal_pending(current))
1776+
goto interrupted;
1777+
timeo = sock_wait_for_wmem(sk, timeo);
1778+
continue;
1779+
}
17991780

1800-
/* Full success... */
1801-
break;
1802-
}
1803-
err = -ENOBUFS;
1781+
err = -ENOBUFS;
1782+
gfp_mask = sk->sk_allocation;
1783+
if (gfp_mask & __GFP_WAIT)
1784+
gfp_mask |= __GFP_REPEAT;
1785+
1786+
skb = alloc_skb(header_len, gfp_mask);
1787+
if (!skb)
18041788
goto failure;
1789+
1790+
skb->truesize += data_len;
1791+
1792+
for (i = 0; npages > 0; i++) {
1793+
int order = max_page_order;
1794+
1795+
while (order) {
1796+
if (npages >= 1 << order) {
1797+
page = alloc_pages(sk->sk_allocation |
1798+
__GFP_COMP | __GFP_NOWARN,
1799+
order);
1800+
if (page)
1801+
goto fill_page;
1802+
}
1803+
order--;
1804+
}
1805+
page = alloc_page(sk->sk_allocation);
1806+
if (!page)
1807+
goto failure;
1808+
fill_page:
1809+
chunk = min_t(unsigned long, data_len,
1810+
PAGE_SIZE << order);
1811+
skb_fill_page_desc(skb, i, page, 0, chunk);
1812+
data_len -= chunk;
1813+
npages -= 1 << order;
18051814
}
1806-
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1807-
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1808-
err = -EAGAIN;
1809-
if (!timeo)
1810-
goto failure;
1811-
if (signal_pending(current))
1812-
goto interrupted;
1813-
timeo = sock_wait_for_wmem(sk, timeo);
18141815
}
18151816

18161817
skb_set_owner_w(skb, sk);
@@ -1819,6 +1820,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
18191820
interrupted:
18201821
err = sock_intr_errno(timeo);
18211822
failure:
1823+
kfree_skb(skb);
18221824
*errcode = err;
18231825
return NULL;
18241826
}
@@ -1827,7 +1829,7 @@ EXPORT_SYMBOL(sock_alloc_send_pskb);
18271829
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
18281830
int noblock, int *errcode)
18291831
{
1830-
return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1832+
return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
18311833
}
18321834
EXPORT_SYMBOL(sock_alloc_send_skb);
18331835

net/packet/af_packet.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2181,7 +2181,7 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
21812181
linear = len;
21822182

21832183
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2184-
err);
2184+
err, 0);
21852185
if (!skb)
21862186
return NULL;
21872187

net/unix/af_unix.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,7 +1479,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
14791479
MAX_SKB_FRAGS * PAGE_SIZE);
14801480

14811481
skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1482-
msg->msg_flags & MSG_DONTWAIT, &err);
1482+
msg->msg_flags & MSG_DONTWAIT, &err,
1483+
PAGE_ALLOC_COSTLY_ORDER);
14831484
if (skb == NULL)
14841485
goto out;
14851486

@@ -1651,7 +1652,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
16511652
data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
16521653

16531654
skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1654-
msg->msg_flags & MSG_DONTWAIT, &err);
1655+
msg->msg_flags & MSG_DONTWAIT, &err,
1656+
get_order(UNIX_SKB_FRAGS_SZ));
16551657
if (!skb)
16561658
goto out_err;
16571659

0 commit comments

Comments
 (0)