Skip to content

Commit 1104671

Browse files
mfijalkoKernel Patches Daemon
authored andcommitted
veth: update mem type in xdp_buff
When skb's headroom is not sufficient for XDP purposes, skb_pp_cow_data() returns new skb with requested headroom space. This skb was provided by page_pool. For CONFIG_DEBUG_VM=y and XDP program that uses bpf_xdp_adjust_tail() against a skb with frags, and mentioned helper consumed enough amount of bytes that in turn released the page, following splat was observed: [ 32.204881] BUG: Bad page state in process test_progs pfn:11c98b [ 32.207167] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11c98b [ 32.210084] flags: 0x1fffe0000000000(node=0|zone=1|lastcpupid=0x7fff) [ 32.212493] raw: 01fffe0000000000 dead000000000040 ff11000123c9b000 0000000000000000 [ 32.218056] raw: 0000000000000000 0000000000000001 00000000ffffffff 0000000000000000 [ 32.220900] page dumped because: page_pool leak [ 32.222636] Modules linked in: bpf_testmod(O) bpf_preload [ 32.224632] CPU: 6 UID: 0 PID: 3612 Comm: test_progs Tainted: G O 6.17.0-rc5-gfec474d29325 #6969 PREEMPT [ 32.224638] Tainted: [O]=OOT_MODULE [ 32.224639] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 32.224641] Call Trace: [ 32.224644] <IRQ> [ 32.224646] dump_stack_lvl+0x4b/0x70 [ 32.224653] bad_page.cold+0xbd/0xe0 [ 32.224657] __free_frozen_pages+0x838/0x10b0 [ 32.224660] ? skb_pp_cow_data+0x782/0xc30 [ 32.224665] bpf_xdp_shrink_data+0x221/0x530 [ 32.224668] ? skb_pp_cow_data+0x6d1/0xc30 [ 32.224671] bpf_xdp_adjust_tail+0x598/0x810 [ 32.224673] ? xsk_destruct_skb+0x321/0x800 [ 32.224678] bpf_prog_004ac6bb21de57a7_xsk_xdp_adjust_tail+0x52/0xd6 [ 32.224681] veth_xdp_rcv_skb+0x45d/0x15a0 [ 32.224684] ? get_stack_info_noinstr+0x16/0xe0 [ 32.224688] ? veth_set_channels+0x920/0x920 [ 32.224691] ? get_stack_info+0x2f/0x80 [ 32.224693] ? unwind_next_frame+0x3af/0x1df0 [ 32.224697] veth_xdp_rcv.constprop.0+0x38a/0xbe0 [ 32.224700] ? common_startup_64+0x13e/0x148 [ 32.224703] ? veth_xdp_rcv_one+0xcd0/0xcd0 [ 32.224706] ? stack_trace_save+0x84/0xa0 [ 32.224709] ? stack_depot_save_flags+0x28/0x820 [ 32.224713] ? __resched_curr.constprop.0+0x332/0x3b0 [ 32.224716] ? timerqueue_add+0x217/0x320 [ 32.224719] veth_poll+0x115/0x5e0 [ 32.224722] ? veth_xdp_rcv.constprop.0+0xbe0/0xbe0 [ 32.224726] ? update_load_avg+0x1cb/0x12d0 [ 32.224730] ? update_cfs_group+0x121/0x2c0 [ 32.224733] __napi_poll+0xa0/0x420 [ 32.224736] net_rx_action+0x901/0xe90 [ 32.224740] ? run_backlog_napi+0x50/0x50 [ 32.224743] ? clockevents_program_event+0x1cc/0x280 [ 32.224746] ? hrtimer_interrupt+0x31e/0x7c0 [ 32.224749] handle_softirqs+0x151/0x430 [ 32.224752] do_softirq+0x3f/0x60 [ 32.224755] </IRQ> It's because xdp_rxq with mem model set to MEM_TYPE_PAGE_SHARED was used when initializing xdp_buff. Fix this by using new helper xdp_convert_skb_to_buff() that, besides init/prepare xdp_buff, will check if page used for linear part of xdp_buff comes from page_pool. We assume that linear data and frags will have same memory provider as currently XDP API does not provide us a way to distinguish it (the mem model is registered for *whole* Rx queue and here we speak about single buffer granularity). Before releasing xdp_buff out of veth via XDP_{TX,REDIRECT}, mem type on xdp_rxq associated with xdp_buff is restored to its original model. We need to respect previous setting at least until buff is converted to frame, as frame carries the mem_type. Add a page_pool variant of veth_xdp_get() so that we avoid refcount underflow when draining page frag. Fixes: 0ebab78 ("net: veth: add page_pool for page recycling") Reported-by: Alexei Starovoitov <[email protected]> Closes: https://lore.kernel.org/bpf/CAADnVQ+bBofJDfieyOYzSmSujSfJwDTQhiz3aJw7hE+4E2_iPA@mail.gmail.com/ Signed-off-by: Maciej Fijalkowski <[email protected]> Reviewed-by: Toke Høiland-Jørgensen <[email protected]>
1 parent 12c5caf commit 1104671

File tree

1 file changed

+26
-17
lines changed

1 file changed

+26
-17
lines changed

drivers/net/veth.c

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,7 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
733733
}
734734
}
735735

736-
static void veth_xdp_get(struct xdp_buff *xdp)
736+
static void veth_xdp_get_shared(struct xdp_buff *xdp)
737737
{
738738
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
739739
int i;
@@ -746,12 +746,33 @@ static void veth_xdp_get(struct xdp_buff *xdp)
746746
__skb_frag_ref(&sinfo->frags[i]);
747747
}
748748

749+
static void veth_xdp_get_pp(struct xdp_buff *xdp)
750+
{
751+
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
752+
int i;
753+
754+
page_pool_ref_page(virt_to_page(xdp->data));
755+
if (likely(!xdp_buff_has_frags(xdp)))
756+
return;
757+
758+
for (i = 0; i < sinfo->nr_frags; i++) {
759+
skb_frag_t *frag = &sinfo->frags[i];
760+
761+
page_pool_ref_page(netmem_to_page(frag->netmem));
762+
}
763+
}
764+
765+
static void veth_xdp_get(struct xdp_buff *xdp)
766+
{
767+
xdp->rxq->mem.type == MEM_TYPE_PAGE_POOL ?
768+
veth_xdp_get_pp(xdp) : veth_xdp_get_shared(xdp);
769+
}
770+
749771
static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
750772
struct xdp_buff *xdp,
751773
struct sk_buff **pskb)
752774
{
753775
struct sk_buff *skb = *pskb;
754-
u32 frame_sz;
755776

756777
if (skb_shared(skb) || skb_head_is_locked(skb) ||
757778
skb_shinfo(skb)->nr_frags ||
@@ -762,19 +783,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
762783
skb = *pskb;
763784
}
764785

765-
/* SKB "head" area always have tailroom for skb_shared_info */
766-
frame_sz = skb_end_pointer(skb) - skb->head;
767-
frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
768-
xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
769-
xdp_prepare_buff(xdp, skb->head, skb_headroom(skb),
770-
skb_headlen(skb), true);
771-
772-
if (skb_is_nonlinear(skb)) {
773-
skb_shinfo(skb)->xdp_frags_size = skb->data_len;
774-
xdp_buff_set_frags_flag(xdp);
775-
} else {
776-
xdp_buff_clear_frags_flag(xdp);
777-
}
786+
xdp_convert_skb_to_buff(skb, xdp, &rq->xdp_rxq);
778787
*pskb = skb;
779788

780789
return 0;
@@ -822,24 +831,24 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
822831
case XDP_TX:
823832
veth_xdp_get(xdp);
824833
consume_skb(skb);
825-
xdp->rxq->mem = rq->xdp_mem;
826834
if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
827835
trace_xdp_exception(rq->dev, xdp_prog, act);
828836
stats->rx_drops++;
829837
goto err_xdp;
830838
}
831839
stats->xdp_tx++;
840+
rq->xdp_rxq.mem = rq->xdp_mem;
832841
rcu_read_unlock();
833842
goto xdp_xmit;
834843
case XDP_REDIRECT:
835844
veth_xdp_get(xdp);
836845
consume_skb(skb);
837-
xdp->rxq->mem = rq->xdp_mem;
838846
if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
839847
stats->rx_drops++;
840848
goto err_xdp;
841849
}
842850
stats->xdp_redirect++;
851+
rq->xdp_rxq.mem = rq->xdp_mem;
843852
rcu_read_unlock();
844853
goto xdp_xmit;
845854
default:

0 commit comments

Comments
 (0)