Skip to content

Commit ba452c9

Browse files
tohojoborkmann
authored andcommitted
bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop
Based on the discussion in [0], update the bpf_redirect_neigh() helper to accept an optional parameter specifying the nexthop information. This makes it possible to combine bpf_fib_lookup() and bpf_redirect_neigh() without incurring a duplicate FIB lookup - since the FIB lookup helper will return the nexthop information even if no neighbour is present, this can simply be passed on to bpf_redirect_neigh() if bpf_fib_lookup() returns BPF_FIB_LKUP_RET_NO_NEIGH. Thus fix & extend it before helper API is frozen. [0] https://lore.kernel.org/bpf/[email protected]/ Signed-off-by: Toke Høiland-Jørgensen <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Reviewed-by: David Ahern <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent c5eb48e commit ba452c9

File tree

5 files changed

+145
-67
lines changed

5 files changed

+145
-67
lines changed

include/linux/filter.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,12 +607,21 @@ struct bpf_skb_data_end {
607607
void *data_end;
608608
};
609609

610+
struct bpf_nh_params {
611+
u32 nh_family;
612+
union {
613+
u32 ipv4_nh;
614+
struct in6_addr ipv6_nh;
615+
};
616+
};
617+
610618
struct bpf_redirect_info {
611619
u32 flags;
612620
u32 tgt_index;
613621
void *tgt_value;
614622
struct bpf_map *map;
615623
u32 kern_flags;
624+
struct bpf_nh_params nh;
616625
};
617626

618627
DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);

include/uapi/linux/bpf.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3677,15 +3677,19 @@ union bpf_attr {
36773677
* Return
36783678
* The id is returned or 0 in case the id could not be retrieved.
36793679
*
3680-
* long bpf_redirect_neigh(u32 ifindex, u64 flags)
3680+
* long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
36813681
* Description
36823682
* Redirect the packet to another net device of index *ifindex*
36833683
* and fill in L2 addresses from neighboring subsystem. This helper
36843684
* is somewhat similar to **bpf_redirect**\ (), except that it
36853685
* populates L2 addresses as well, meaning, internally, the helper
3686-
* performs a FIB lookup based on the skb's networking header to
3687-
* get the address of the next hop and then relies on the neighbor
3688-
* lookup for the L2 address of the nexthop.
3686+
* relies on the neighbor lookup for the L2 address of the nexthop.
3687+
*
3688+
* The helper will perform a FIB lookup based on the skb's
3689+
* networking header to get the address of the next hop, unless
3690+
* this is supplied by the caller in the *params* argument. The
3691+
* *plen* argument indicates the len of *params* and should be set
3692+
* to 0 if *params* is NULL.
36893693
*
36903694
* The *flags* argument is reserved and must be 0. The helper is
36913695
* currently only supported for tc BPF program types, and enabled
@@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
49064910
__u8 dmac[6]; /* ETH_ALEN */
49074911
};
49084912

4913+
struct bpf_redir_neigh {
4914+
/* network family for lookup (AF_INET, AF_INET6) */
4915+
__u32 nh_family;
4916+
/* network address of nexthop; skips fib lookup to find gateway */
4917+
union {
4918+
__be32 ipv4_nh;
4919+
__u32 ipv6_nh[4]; /* in6_addr; network order */
4920+
};
4921+
};
4922+
49094923
enum bpf_task_fd_type {
49104924
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
49114925
BPF_FD_TYPE_TRACEPOINT, /* tp name */

net/core/filter.c

Lines changed: 99 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2165,12 +2165,12 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
21652165
}
21662166

21672167
#if IS_ENABLED(CONFIG_IPV6)
2168-
static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
2168+
static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
2169+
struct net_device *dev, struct bpf_nh_params *nh)
21692170
{
2170-
struct dst_entry *dst = skb_dst(skb);
2171-
struct net_device *dev = dst->dev;
21722171
u32 hh_len = LL_RESERVED_SPACE(dev);
21732172
const struct in6_addr *nexthop;
2173+
struct dst_entry *dst = NULL;
21742174
struct neighbour *neigh;
21752175

21762176
if (dev_xmit_recursion()) {
@@ -2196,8 +2196,13 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
21962196
}
21972197

21982198
rcu_read_lock_bh();
2199-
nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
2200-
&ipv6_hdr(skb)->daddr);
2199+
if (!nh) {
2200+
dst = skb_dst(skb);
2201+
nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
2202+
&ipv6_hdr(skb)->daddr);
2203+
} else {
2204+
nexthop = &nh->ipv6_nh;
2205+
}
22012206
neigh = ip_neigh_gw6(dev, nexthop);
22022207
if (likely(!IS_ERR(neigh))) {
22032208
int ret;
@@ -2210,36 +2215,43 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
22102215
return ret;
22112216
}
22122217
rcu_read_unlock_bh();
2213-
IP6_INC_STATS(dev_net(dst->dev),
2214-
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
2218+
if (dst)
2219+
IP6_INC_STATS(dev_net(dst->dev),
2220+
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
22152221
out_drop:
22162222
kfree_skb(skb);
22172223
return -ENETDOWN;
22182224
}
22192225

2220-
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
2226+
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
2227+
struct bpf_nh_params *nh)
22212228
{
22222229
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
22232230
struct net *net = dev_net(dev);
22242231
int err, ret = NET_XMIT_DROP;
2225-
struct dst_entry *dst;
2226-
struct flowi6 fl6 = {
2227-
.flowi6_flags = FLOWI_FLAG_ANYSRC,
2228-
.flowi6_mark = skb->mark,
2229-
.flowlabel = ip6_flowinfo(ip6h),
2230-
.flowi6_oif = dev->ifindex,
2231-
.flowi6_proto = ip6h->nexthdr,
2232-
.daddr = ip6h->daddr,
2233-
.saddr = ip6h->saddr,
2234-
};
22352232

2236-
dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
2237-
if (IS_ERR(dst))
2238-
goto out_drop;
2233+
if (!nh) {
2234+
struct dst_entry *dst;
2235+
struct flowi6 fl6 = {
2236+
.flowi6_flags = FLOWI_FLAG_ANYSRC,
2237+
.flowi6_mark = skb->mark,
2238+
.flowlabel = ip6_flowinfo(ip6h),
2239+
.flowi6_oif = dev->ifindex,
2240+
.flowi6_proto = ip6h->nexthdr,
2241+
.daddr = ip6h->daddr,
2242+
.saddr = ip6h->saddr,
2243+
};
2244+
2245+
dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
2246+
if (IS_ERR(dst))
2247+
goto out_drop;
22392248

2240-
skb_dst_set(skb, dst);
2249+
skb_dst_set(skb, dst);
2250+
} else if (nh->nh_family != AF_INET6) {
2251+
goto out_drop;
2252+
}
22412253

2242-
err = bpf_out_neigh_v6(net, skb);
2254+
err = bpf_out_neigh_v6(net, skb, dev, nh);
22432255
if (unlikely(net_xmit_eval(err)))
22442256
dev->stats.tx_errors++;
22452257
else
@@ -2252,19 +2264,18 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
22522264
return ret;
22532265
}
22542266
#else
2255-
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
2267+
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
2268+
struct bpf_nh_params *nh)
22562269
{
22572270
kfree_skb(skb);
22582271
return NET_XMIT_DROP;
22592272
}
22602273
#endif /* CONFIG_IPV6 */
22612274

22622275
#if IS_ENABLED(CONFIG_INET)
2263-
static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
2276+
static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
2277+
struct net_device *dev, struct bpf_nh_params *nh)
22642278
{
2265-
struct dst_entry *dst = skb_dst(skb);
2266-
struct rtable *rt = container_of(dst, struct rtable, dst);
2267-
struct net_device *dev = dst->dev;
22682279
u32 hh_len = LL_RESERVED_SPACE(dev);
22692280
struct neighbour *neigh;
22702281
bool is_v6gw = false;
@@ -2292,7 +2303,21 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
22922303
}
22932304

22942305
rcu_read_lock_bh();
2295-
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
2306+
if (!nh) {
2307+
struct dst_entry *dst = skb_dst(skb);
2308+
struct rtable *rt = container_of(dst, struct rtable, dst);
2309+
2310+
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
2311+
} else if (nh->nh_family == AF_INET6) {
2312+
neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
2313+
is_v6gw = true;
2314+
} else if (nh->nh_family == AF_INET) {
2315+
neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
2316+
} else {
2317+
rcu_read_unlock_bh();
2318+
goto out_drop;
2319+
}
2320+
22962321
if (likely(!IS_ERR(neigh))) {
22972322
int ret;
22982323

@@ -2309,33 +2334,37 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
23092334
return -ENETDOWN;
23102335
}
23112336

2312-
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
2337+
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
2338+
struct bpf_nh_params *nh)
23132339
{
23142340
const struct iphdr *ip4h = ip_hdr(skb);
23152341
struct net *net = dev_net(dev);
23162342
int err, ret = NET_XMIT_DROP;
2317-
struct rtable *rt;
2318-
struct flowi4 fl4 = {
2319-
.flowi4_flags = FLOWI_FLAG_ANYSRC,
2320-
.flowi4_mark = skb->mark,
2321-
.flowi4_tos = RT_TOS(ip4h->tos),
2322-
.flowi4_oif = dev->ifindex,
2323-
.flowi4_proto = ip4h->protocol,
2324-
.daddr = ip4h->daddr,
2325-
.saddr = ip4h->saddr,
2326-
};
23272343

2328-
rt = ip_route_output_flow(net, &fl4, NULL);
2329-
if (IS_ERR(rt))
2330-
goto out_drop;
2331-
if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
2332-
ip_rt_put(rt);
2333-
goto out_drop;
2334-
}
2344+
if (!nh) {
2345+
struct flowi4 fl4 = {
2346+
.flowi4_flags = FLOWI_FLAG_ANYSRC,
2347+
.flowi4_mark = skb->mark,
2348+
.flowi4_tos = RT_TOS(ip4h->tos),
2349+
.flowi4_oif = dev->ifindex,
2350+
.flowi4_proto = ip4h->protocol,
2351+
.daddr = ip4h->daddr,
2352+
.saddr = ip4h->saddr,
2353+
};
2354+
struct rtable *rt;
2355+
2356+
rt = ip_route_output_flow(net, &fl4, NULL);
2357+
if (IS_ERR(rt))
2358+
goto out_drop;
2359+
if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
2360+
ip_rt_put(rt);
2361+
goto out_drop;
2362+
}
23352363

2336-
skb_dst_set(skb, &rt->dst);
2364+
skb_dst_set(skb, &rt->dst);
2365+
}
23372366

2338-
err = bpf_out_neigh_v4(net, skb);
2367+
err = bpf_out_neigh_v4(net, skb, dev, nh);
23392368
if (unlikely(net_xmit_eval(err)))
23402369
dev->stats.tx_errors++;
23412370
else
@@ -2348,14 +2377,16 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
23482377
return ret;
23492378
}
23502379
#else
2351-
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
2380+
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
2381+
struct bpf_nh_params *nh)
23522382
{
23532383
kfree_skb(skb);
23542384
return NET_XMIT_DROP;
23552385
}
23562386
#endif /* CONFIG_INET */
23572387

2358-
static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
2388+
static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
2389+
struct bpf_nh_params *nh)
23592390
{
23602391
struct ethhdr *ethh = eth_hdr(skb);
23612392

@@ -2370,9 +2401,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
23702401
skb_reset_network_header(skb);
23712402

23722403
if (skb->protocol == htons(ETH_P_IP))
2373-
return __bpf_redirect_neigh_v4(skb, dev);
2404+
return __bpf_redirect_neigh_v4(skb, dev, nh);
23742405
else if (skb->protocol == htons(ETH_P_IPV6))
2375-
return __bpf_redirect_neigh_v6(skb, dev);
2406+
return __bpf_redirect_neigh_v6(skb, dev, nh);
23762407
out:
23772408
kfree_skb(skb);
23782409
return -ENOTSUPP;
@@ -2382,7 +2413,8 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
23822413
enum {
23832414
BPF_F_NEIGH = (1ULL << 1),
23842415
BPF_F_PEER = (1ULL << 2),
2385-
#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER)
2416+
BPF_F_NEXTHOP = (1ULL << 3),
2417+
#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
23862418
};
23872419

23882420
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
@@ -2455,7 +2487,8 @@ int skb_do_redirect(struct sk_buff *skb)
24552487
return -EAGAIN;
24562488
}
24572489
return flags & BPF_F_NEIGH ?
2458-
__bpf_redirect_neigh(skb, dev) :
2490+
__bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
2491+
&ri->nh : NULL) :
24592492
__bpf_redirect(skb, dev, flags);
24602493
out_drop:
24612494
kfree_skb(skb);
@@ -2504,16 +2537,21 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = {
25042537
.arg2_type = ARG_ANYTHING,
25052538
};
25062539

2507-
BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
2540+
BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
2541+
int, plen, u64, flags)
25082542
{
25092543
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
25102544

2511-
if (unlikely(flags))
2545+
if (unlikely((plen && plen < sizeof(*params)) || flags))
25122546
return TC_ACT_SHOT;
25132547

2514-
ri->flags = BPF_F_NEIGH;
2548+
ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
25152549
ri->tgt_index = ifindex;
25162550

2551+
BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
2552+
if (plen)
2553+
memcpy(&ri->nh, params, sizeof(ri->nh));
2554+
25172555
return TC_ACT_REDIRECT;
25182556
}
25192557

@@ -2522,7 +2560,9 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = {
25222560
.gpl_only = false,
25232561
.ret_type = RET_INTEGER,
25242562
.arg1_type = ARG_ANYTHING,
2525-
.arg2_type = ARG_ANYTHING,
2563+
.arg2_type = ARG_PTR_TO_MEM_OR_NULL,
2564+
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
2565+
.arg4_type = ARG_ANYTHING,
25262566
};
25272567

25282568
BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)

scripts/bpf_helpers_doc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,7 @@ class PrinterHelpers(Printer):
453453
'struct bpf_perf_event_data',
454454
'struct bpf_perf_event_value',
455455
'struct bpf_pidns_info',
456+
'struct bpf_redir_neigh',
456457
'struct bpf_sk_lookup',
457458
'struct bpf_sock',
458459
'struct bpf_sock_addr',

tools/include/uapi/linux/bpf.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3677,15 +3677,19 @@ union bpf_attr {
36773677
* Return
36783678
* The id is returned or 0 in case the id could not be retrieved.
36793679
*
3680-
* long bpf_redirect_neigh(u32 ifindex, u64 flags)
3680+
* long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
36813681
* Description
36823682
* Redirect the packet to another net device of index *ifindex*
36833683
* and fill in L2 addresses from neighboring subsystem. This helper
36843684
* is somewhat similar to **bpf_redirect**\ (), except that it
36853685
* populates L2 addresses as well, meaning, internally, the helper
3686-
* performs a FIB lookup based on the skb's networking header to
3687-
* get the address of the next hop and then relies on the neighbor
3688-
* lookup for the L2 address of the nexthop.
3686+
* relies on the neighbor lookup for the L2 address of the nexthop.
3687+
*
3688+
* The helper will perform a FIB lookup based on the skb's
3689+
* networking header to get the address of the next hop, unless
3690+
* this is supplied by the caller in the *params* argument. The
3691+
* *plen* argument indicates the len of *params* and should be set
3692+
* to 0 if *params* is NULL.
36893693
*
36903694
* The *flags* argument is reserved and must be 0. The helper is
36913695
* currently only supported for tc BPF program types, and enabled
@@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
49064910
__u8 dmac[6]; /* ETH_ALEN */
49074911
};
49084912

4913+
struct bpf_redir_neigh {
4914+
/* network family for lookup (AF_INET, AF_INET6) */
4915+
__u32 nh_family;
4916+
/* network address of nexthop; skips fib lookup to find gateway */
4917+
union {
4918+
__be32 ipv4_nh;
4919+
__u32 ipv6_nh[4]; /* in6_addr; network order */
4920+
};
4921+
};
4922+
49094923
enum bpf_task_fd_type {
49104924
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
49114925
BPF_FD_TYPE_TRACEPOINT, /* tp name */

0 commit comments

Comments
 (0)