Skip to content

Commit 7a3f5b0

Browse files
proelbtnummakynes
authored andcommitted
netfilter: add netfilter hooks to SRv6 data plane
This patch introduces netfilter hooks for solving the problem that conntrack couldn't record both inner flows and outer flows. This patch also introduces a new sysctl toggle for enabling lightweight tunnel netfilter hooks. Signed-off-by: Ryoga Saito <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent 7bc416f commit 7a3f5b0

File tree

9 files changed

+241
-36
lines changed

9 files changed

+241
-36
lines changed

Documentation/networking/nf_conntrack-sysctl.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds)
184184
This extended timeout will be used in case there is an GRE stream
185185
detected.
186186

187+
nf_hooks_lwtunnel - BOOLEAN
188+
- 0 - disabled (default)
189+
- not 0 - enabled
190+
191+
If this option is enabled, the lightweight tunnel netfilter hooks are
192+
enabled. This option cannot be disabled once it is enabled.
193+
187194
nf_flowtable_tcp_timeout - INTEGER (seconds)
188195
default 30
189196

include/net/lwtunnel.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ struct lwtunnel_encap_ops {
5151
};
5252

5353
#ifdef CONFIG_LWTUNNEL
54+
55+
DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
56+
5457
void lwtstate_free(struct lwtunnel_state *lws);
5558

5659
static inline struct lwtunnel_state *
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#include <linux/sysctl.h>
2+
#include <linux/types.h>
3+
4+
#ifdef CONFIG_SYSCTL
5+
int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
6+
void *buffer, size_t *lenp, loff_t *ppos);
7+
#endif

net/core/lwtunnel.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
#include <net/ip6_fib.h>
2424
#include <net/rtnh.h>
2525

26+
DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
27+
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
28+
2629
#ifdef CONFIG_MODULES
2730

2831
static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)

net/ipv6/seg6_iptunnel.c

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#ifdef CONFIG_IPV6_SEG6_HMAC
2727
#include <net/seg6_hmac.h>
2828
#endif
29+
#include <net/lwtunnel.h>
30+
#include <linux/netfilter.h>
2931

3032
static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
3133
{
@@ -295,11 +297,19 @@ static int seg6_do_srh(struct sk_buff *skb)
295297

296298
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
297299
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
300+
nf_reset_ct(skb);
298301

299302
return 0;
300303
}
301304

302-
static int seg6_input(struct sk_buff *skb)
305+
static int seg6_input_finish(struct net *net, struct sock *sk,
306+
struct sk_buff *skb)
307+
{
308+
return dst_input(skb);
309+
}
310+
311+
static int seg6_input_core(struct net *net, struct sock *sk,
312+
struct sk_buff *skb)
303313
{
304314
struct dst_entry *orig_dst = skb_dst(skb);
305315
struct dst_entry *dst = NULL;
@@ -337,10 +347,41 @@ static int seg6_input(struct sk_buff *skb)
337347
if (unlikely(err))
338348
return err;
339349

340-
return dst_input(skb);
350+
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
351+
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
352+
dev_net(skb->dev), NULL, skb, NULL,
353+
skb_dst(skb)->dev, seg6_input_finish);
354+
355+
return seg6_input_finish(dev_net(skb->dev), NULL, skb);
341356
}
342357

343-
static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
358+
static int seg6_input_nf(struct sk_buff *skb)
359+
{
360+
struct net_device *dev = skb_dst(skb)->dev;
361+
struct net *net = dev_net(skb->dev);
362+
363+
switch (skb->protocol) {
364+
case htons(ETH_P_IP):
365+
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
366+
skb, NULL, dev, seg6_input_core);
367+
case htons(ETH_P_IPV6):
368+
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
369+
skb, NULL, dev, seg6_input_core);
370+
}
371+
372+
return -EINVAL;
373+
}
374+
375+
static int seg6_input(struct sk_buff *skb)
376+
{
377+
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
378+
return seg6_input_nf(skb);
379+
380+
return seg6_input_core(dev_net(skb->dev), NULL, skb);
381+
}
382+
383+
static int seg6_output_core(struct net *net, struct sock *sk,
384+
struct sk_buff *skb)
344385
{
345386
struct dst_entry *orig_dst = skb_dst(skb);
346387
struct dst_entry *dst = NULL;
@@ -387,12 +428,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
387428
if (unlikely(err))
388429
goto drop;
389430

431+
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
432+
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
433+
NULL, skb_dst(skb)->dev, dst_output);
434+
390435
return dst_output(net, sk, skb);
391436
drop:
392437
kfree_skb(skb);
393438
return err;
394439
}
395440

441+
static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
442+
{
443+
struct net_device *dev = skb_dst(skb)->dev;
444+
445+
switch (skb->protocol) {
446+
case htons(ETH_P_IP):
447+
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
448+
NULL, dev, seg6_output_core);
449+
case htons(ETH_P_IPV6):
450+
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
451+
NULL, dev, seg6_output_core);
452+
}
453+
454+
return -EINVAL;
455+
}
456+
457+
static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
458+
{
459+
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
460+
return seg6_output_nf(net, sk, skb);
461+
462+
return seg6_output_core(net, sk, skb);
463+
}
464+
396465
static int seg6_build_state(struct net *net, struct nlattr *nla,
397466
unsigned int family, const void *cfg,
398467
struct lwtunnel_state **ts,

net/ipv6/seg6_local.c

Lines changed: 78 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include <net/seg6_local.h>
3131
#include <linux/etherdevice.h>
3232
#include <linux/bpf.h>
33+
#include <net/lwtunnel.h>
34+
#include <linux/netfilter.h>
3335

3436
#define SEG6_F_ATTR(i) BIT(i)
3537

@@ -413,12 +415,33 @@ static int input_action_end_dx2(struct sk_buff *skb,
413415
return -EINVAL;
414416
}
415417

418+
static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
419+
struct sk_buff *skb)
420+
{
421+
struct dst_entry *orig_dst = skb_dst(skb);
422+
struct in6_addr *nhaddr = NULL;
423+
struct seg6_local_lwt *slwt;
424+
425+
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
426+
427+
/* The inner packet is not associated to any local interface,
428+
* so we do not call netif_rx().
429+
*
430+
* If slwt->nh6 is set to ::, then lookup the nexthop for the
431+
* inner packet's DA. Otherwise, use the specified nexthop.
432+
*/
433+
if (!ipv6_addr_any(&slwt->nh6))
434+
nhaddr = &slwt->nh6;
435+
436+
seg6_lookup_nexthop(skb, nhaddr, 0);
437+
438+
return dst_input(skb);
439+
}
440+
416441
/* decapsulate and forward to specified nexthop */
417442
static int input_action_end_dx6(struct sk_buff *skb,
418443
struct seg6_local_lwt *slwt)
419444
{
420-
struct in6_addr *nhaddr = NULL;
421-
422445
/* this function accepts IPv6 encapsulated packets, with either
423446
* an SRH with SL=0, or no SRH.
424447
*/
@@ -429,55 +452,65 @@ static int input_action_end_dx6(struct sk_buff *skb,
429452
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
430453
goto drop;
431454

432-
/* The inner packet is not associated to any local interface,
433-
* so we do not call netif_rx().
434-
*
435-
* If slwt->nh6 is set to ::, then lookup the nexthop for the
436-
* inner packet's DA. Otherwise, use the specified nexthop.
437-
*/
438-
439-
if (!ipv6_addr_any(&slwt->nh6))
440-
nhaddr = &slwt->nh6;
441-
442455
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
456+
nf_reset_ct(skb);
443457

444-
seg6_lookup_nexthop(skb, nhaddr, 0);
458+
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
459+
return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
460+
dev_net(skb->dev), NULL, skb, NULL,
461+
skb_dst(skb)->dev, input_action_end_dx6_finish);
445462

446-
return dst_input(skb);
463+
return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
447464
drop:
448465
kfree_skb(skb);
449466
return -EINVAL;
450467
}
451468

452-
static int input_action_end_dx4(struct sk_buff *skb,
453-
struct seg6_local_lwt *slwt)
469+
static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
470+
struct sk_buff *skb)
454471
{
472+
struct dst_entry *orig_dst = skb_dst(skb);
473+
struct seg6_local_lwt *slwt;
455474
struct iphdr *iph;
456475
__be32 nhaddr;
457476
int err;
458477

459-
if (!decap_and_validate(skb, IPPROTO_IPIP))
460-
goto drop;
461-
462-
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
463-
goto drop;
464-
465-
skb->protocol = htons(ETH_P_IP);
478+
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
466479

467480
iph = ip_hdr(skb);
468481

469482
nhaddr = slwt->nh4.s_addr ?: iph->daddr;
470483

471484
skb_dst_drop(skb);
472485

473-
skb_set_transport_header(skb, sizeof(struct iphdr));
474-
475486
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
476-
if (err)
477-
goto drop;
487+
if (err) {
488+
kfree_skb(skb);
489+
return -EINVAL;
490+
}
478491

479492
return dst_input(skb);
493+
}
494+
495+
static int input_action_end_dx4(struct sk_buff *skb,
496+
struct seg6_local_lwt *slwt)
497+
{
498+
if (!decap_and_validate(skb, IPPROTO_IPIP))
499+
goto drop;
500+
501+
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
502+
goto drop;
503+
504+
skb->protocol = htons(ETH_P_IP);
505+
skb_set_transport_header(skb, sizeof(struct iphdr));
506+
nf_reset_ct(skb);
507+
508+
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
509+
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
510+
dev_net(skb->dev), NULL, skb, NULL,
511+
skb_dst(skb)->dev, input_action_end_dx4_finish);
480512

513+
return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
481514
drop:
482515
kfree_skb(skb);
483516
return -EINVAL;
@@ -645,6 +678,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
645678
skb_dst_drop(skb);
646679

647680
skb_set_transport_header(skb, hdrlen);
681+
nf_reset_ct(skb);
648682

649683
return end_dt_vrf_rcv(skb, family, vrf);
650684

@@ -1078,19 +1112,15 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
10781112
u64_stats_update_end(&pcounters->syncp);
10791113
}
10801114

1081-
static int seg6_local_input(struct sk_buff *skb)
1115+
static int seg6_local_input_core(struct net *net, struct sock *sk,
1116+
struct sk_buff *skb)
10821117
{
10831118
struct dst_entry *orig_dst = skb_dst(skb);
10841119
struct seg6_action_desc *desc;
10851120
struct seg6_local_lwt *slwt;
10861121
unsigned int len = skb->len;
10871122
int rc;
10881123

1089-
if (skb->protocol != htons(ETH_P_IPV6)) {
1090-
kfree_skb(skb);
1091-
return -EINVAL;
1092-
}
1093-
10941124
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
10951125
desc = slwt->desc;
10961126

@@ -1104,6 +1134,21 @@ static int seg6_local_input(struct sk_buff *skb)
11041134
return rc;
11051135
}
11061136

1137+
static int seg6_local_input(struct sk_buff *skb)
1138+
{
1139+
if (skb->protocol != htons(ETH_P_IPV6)) {
1140+
kfree_skb(skb);
1141+
return -EINVAL;
1142+
}
1143+
1144+
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
1145+
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
1146+
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
1147+
seg6_local_input_core);
1148+
1149+
return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
1150+
}
1151+
11071152
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
11081153
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
11091154
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },

net/netfilter/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/
212212

213213
# IPVS
214214
obj-$(CONFIG_IP_VS) += ipvs/
215+
216+
# lwtunnel
217+
obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o

net/netfilter/nf_conntrack_standalone.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#include <net/netfilter/nf_conntrack_acct.h>
2323
#include <net/netfilter/nf_conntrack_zones.h>
2424
#include <net/netfilter/nf_conntrack_timestamp.h>
25+
#ifdef CONFIG_LWTUNNEL
26+
#include <net/netfilter/nf_hooks_lwtunnel.h>
27+
#endif
2528
#include <linux/rculist_nulls.h>
2629

2730
static bool enable_hooks __read_mostly;
@@ -612,6 +615,9 @@ enum nf_ct_sysctl_index {
612615
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
613616
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
614617
#endif
618+
#ifdef CONFIG_LWTUNNEL
619+
NF_SYSCTL_CT_LWTUNNEL,
620+
#endif
615621

616622
__NF_SYSCTL_CT_LAST_SYSCTL,
617623
};
@@ -958,6 +964,15 @@ static struct ctl_table nf_ct_sysctl_table[] = {
958964
.mode = 0644,
959965
.proc_handler = proc_dointvec_jiffies,
960966
},
967+
#endif
968+
#ifdef CONFIG_LWTUNNEL
969+
[NF_SYSCTL_CT_LWTUNNEL] = {
970+
.procname = "nf_hooks_lwtunnel",
971+
.data = NULL,
972+
.maxlen = sizeof(int),
973+
.mode = 0644,
974+
.proc_handler = nf_hooks_lwtunnel_sysctl_handler,
975+
},
961976
#endif
962977
{}
963978
};

0 commit comments

Comments
 (0)