Skip to content

Commit a8a572a

Browse files
Dan Streetmanklassert
Dan Streetman
authored andcommitted
xfrm: dst_entries_init() per-net dst_ops
Remove the dst_entries_init/destroy calls for xfrm4 and xfrm6 dst_ops templates; their dst_entries counters will never be used. Move the xfrm dst_ops initialization from the common xfrm/xfrm_policy.c to xfrm4/xfrm4_policy.c and xfrm6/xfrm6_policy.c, and call dst_entries_init and dst_entries_destroy for each net namespace. The ipv4 and ipv6 xfrms each create dst_ops template, and perform dst_entries_init on the templates. The template values are copied to each net namespace's xfrm.xfrm*_dst_ops. The problem there is the dst_ops pcpuc_entries field is a percpu counter and cannot be used correctly by simply copying it to another object. The result of this is a very subtle bug; changes to the dst entries counter from one net namespace may sometimes get applied to a different net namespace dst entries counter. This is because of how the percpu counter works; it has a main count field as well as a pointer to the percpu variables. Each net namespace maintains its own main count variable, but all point to one set of percpu variables. When any net namespace happens to change one of the percpu variables to outside its small batch range, its count is moved to the net namespace's main count variable. So with multiple net namespaces operating concurrently, the dst_ops entries counter can stray from the actual value that it should be; if counts are consistently moved from one net namespace to another (which my testing showed is likely), then one net namespace winds up with a negative dst_ops count while another winds up with a continually increasing count, eventually reaching its gc_thresh limit, which causes all new traffic on the net namespace to fail with -ENOBUFS. Signed-off-by: Dan Streetman <[email protected]> Signed-off-by: Dan Streetman <[email protected]> Signed-off-by: Steffen Klassert <[email protected]>
1 parent 5f6c99e commit a8a572a

File tree

3 files changed

+75
-62
lines changed

3 files changed

+75
-62
lines changed

net/ipv4/xfrm4_policy.c

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
236236
xfrm_dst_ifdown(dst, dev);
237237
}
238238

239-
static struct dst_ops xfrm4_dst_ops = {
239+
static struct dst_ops xfrm4_dst_ops_template = {
240240
.family = AF_INET,
241241
.gc = xfrm4_garbage_collect,
242242
.update_pmtu = xfrm4_update_pmtu,
@@ -250,7 +250,7 @@ static struct dst_ops xfrm4_dst_ops = {
250250

251251
static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
252252
.family = AF_INET,
253-
.dst_ops = &xfrm4_dst_ops,
253+
.dst_ops = &xfrm4_dst_ops_template,
254254
.dst_lookup = xfrm4_dst_lookup,
255255
.get_saddr = xfrm4_get_saddr,
256256
.decode_session = _decode_session4,
@@ -272,7 +272,7 @@ static struct ctl_table xfrm4_policy_table[] = {
272272
{ }
273273
};
274274

275-
static int __net_init xfrm4_net_init(struct net *net)
275+
static int __net_init xfrm4_net_sysctl_init(struct net *net)
276276
{
277277
struct ctl_table *table;
278278
struct ctl_table_header *hdr;
@@ -300,7 +300,7 @@ static int __net_init xfrm4_net_init(struct net *net)
300300
return -ENOMEM;
301301
}
302302

303-
static void __net_exit xfrm4_net_exit(struct net *net)
303+
static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
304304
{
305305
struct ctl_table *table;
306306

@@ -312,12 +312,44 @@ static void __net_exit xfrm4_net_exit(struct net *net)
312312
if (!net_eq(net, &init_net))
313313
kfree(table);
314314
}
315+
#else /* CONFIG_SYSCTL */
316+
static int inline xfrm4_net_sysctl_init(struct net *net)
317+
{
318+
return 0;
319+
}
320+
321+
static void inline xfrm4_net_sysctl_exit(struct net *net)
322+
{
323+
}
324+
#endif
325+
326+
static int __net_init xfrm4_net_init(struct net *net)
327+
{
328+
int ret;
329+
330+
memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template,
331+
sizeof(xfrm4_dst_ops_template));
332+
ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops);
333+
if (ret)
334+
return ret;
335+
336+
ret = xfrm4_net_sysctl_init(net);
337+
if (ret)
338+
dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
339+
340+
return ret;
341+
}
342+
343+
static void __net_exit xfrm4_net_exit(struct net *net)
344+
{
345+
xfrm4_net_sysctl_exit(net);
346+
dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
347+
}
315348

316349
static struct pernet_operations __net_initdata xfrm4_net_ops = {
317350
.init = xfrm4_net_init,
318351
.exit = xfrm4_net_exit,
319352
};
320-
#endif
321353

322354
static void __init xfrm4_policy_init(void)
323355
{
@@ -326,13 +358,9 @@ static void __init xfrm4_policy_init(void)
326358

327359
void __init xfrm4_init(void)
328360
{
329-
dst_entries_init(&xfrm4_dst_ops);
330-
331361
xfrm4_state_init();
332362
xfrm4_policy_init();
333363
xfrm4_protocol_init();
334-
#ifdef CONFIG_SYSCTL
335364
register_pernet_subsys(&xfrm4_net_ops);
336-
#endif
337365
}
338366

net/ipv6/xfrm6_policy.c

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
281281
xfrm_dst_ifdown(dst, dev);
282282
}
283283

284-
static struct dst_ops xfrm6_dst_ops = {
284+
static struct dst_ops xfrm6_dst_ops_template = {
285285
.family = AF_INET6,
286286
.gc = xfrm6_garbage_collect,
287287
.update_pmtu = xfrm6_update_pmtu,
@@ -295,7 +295,7 @@ static struct dst_ops xfrm6_dst_ops = {
295295

296296
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
297297
.family = AF_INET6,
298-
.dst_ops = &xfrm6_dst_ops,
298+
.dst_ops = &xfrm6_dst_ops_template,
299299
.dst_lookup = xfrm6_dst_lookup,
300300
.get_saddr = xfrm6_get_saddr,
301301
.decode_session = _decode_session6,
@@ -327,7 +327,7 @@ static struct ctl_table xfrm6_policy_table[] = {
327327
{ }
328328
};
329329

330-
static int __net_init xfrm6_net_init(struct net *net)
330+
static int __net_init xfrm6_net_sysctl_init(struct net *net)
331331
{
332332
struct ctl_table *table;
333333
struct ctl_table_header *hdr;
@@ -355,7 +355,7 @@ static int __net_init xfrm6_net_init(struct net *net)
355355
return -ENOMEM;
356356
}
357357

358-
static void __net_exit xfrm6_net_exit(struct net *net)
358+
static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
359359
{
360360
struct ctl_table *table;
361361

@@ -367,24 +367,52 @@ static void __net_exit xfrm6_net_exit(struct net *net)
367367
if (!net_eq(net, &init_net))
368368
kfree(table);
369369
}
370+
#else /* CONFIG_SYSCTL */
371+
static int inline xfrm6_net_sysctl_init(struct net *net)
372+
{
373+
return 0;
374+
}
375+
376+
static void inline xfrm6_net_sysctl_exit(struct net *net)
377+
{
378+
}
379+
#endif
380+
381+
static int __net_init xfrm6_net_init(struct net *net)
382+
{
383+
int ret;
384+
385+
memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template,
386+
sizeof(xfrm6_dst_ops_template));
387+
ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops);
388+
if (ret)
389+
return ret;
390+
391+
ret = xfrm6_net_sysctl_init(net);
392+
if (ret)
393+
dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
394+
395+
return ret;
396+
}
397+
398+
static void __net_exit xfrm6_net_exit(struct net *net)
399+
{
400+
xfrm6_net_sysctl_exit(net);
401+
dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
402+
}
370403

371404
static struct pernet_operations xfrm6_net_ops = {
372405
.init = xfrm6_net_init,
373406
.exit = xfrm6_net_exit,
374407
};
375-
#endif
376408

377409
int __init xfrm6_init(void)
378410
{
379411
int ret;
380412

381-
dst_entries_init(&xfrm6_dst_ops);
382-
383413
ret = xfrm6_policy_init();
384-
if (ret) {
385-
dst_entries_destroy(&xfrm6_dst_ops);
414+
if (ret)
386415
goto out;
387-
}
388416
ret = xfrm6_state_init();
389417
if (ret)
390418
goto out_policy;
@@ -393,9 +421,7 @@ int __init xfrm6_init(void)
393421
if (ret)
394422
goto out_state;
395423

396-
#ifdef CONFIG_SYSCTL
397424
register_pernet_subsys(&xfrm6_net_ops);
398-
#endif
399425
out:
400426
return ret;
401427
out_state:
@@ -407,11 +433,8 @@ int __init xfrm6_init(void)
407433

408434
void xfrm6_fini(void)
409435
{
410-
#ifdef CONFIG_SYSCTL
411436
unregister_pernet_subsys(&xfrm6_net_ops);
412-
#endif
413437
xfrm6_protocol_fini();
414438
xfrm6_policy_fini();
415439
xfrm6_state_fini();
416-
dst_entries_destroy(&xfrm6_dst_ops);
417440
}

net/xfrm/xfrm_policy.c

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2807,7 +2807,6 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
28072807

28082808
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
28092809
{
2810-
struct net *net;
28112810
int err = 0;
28122811
if (unlikely(afinfo == NULL))
28132812
return -EINVAL;
@@ -2838,26 +2837,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
28382837
}
28392838
spin_unlock(&xfrm_policy_afinfo_lock);
28402839

2841-
rtnl_lock();
2842-
for_each_net(net) {
2843-
struct dst_ops *xfrm_dst_ops;
2844-
2845-
switch (afinfo->family) {
2846-
case AF_INET:
2847-
xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
2848-
break;
2849-
#if IS_ENABLED(CONFIG_IPV6)
2850-
case AF_INET6:
2851-
xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
2852-
break;
2853-
#endif
2854-
default:
2855-
BUG();
2856-
}
2857-
*xfrm_dst_ops = *afinfo->dst_ops;
2858-
}
2859-
rtnl_unlock();
2860-
28612840
return err;
28622841
}
28632842
EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -2893,22 +2872,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
28932872
}
28942873
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
28952874

2896-
static void __net_init xfrm_dst_ops_init(struct net *net)
2897-
{
2898-
struct xfrm_policy_afinfo *afinfo;
2899-
2900-
rcu_read_lock();
2901-
afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
2902-
if (afinfo)
2903-
net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
2904-
#if IS_ENABLED(CONFIG_IPV6)
2905-
afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
2906-
if (afinfo)
2907-
net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
2908-
#endif
2909-
rcu_read_unlock();
2910-
}
2911-
29122875
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
29132876
{
29142877
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -3057,7 +3020,6 @@ static int __net_init xfrm_net_init(struct net *net)
30573020
rv = xfrm_policy_init(net);
30583021
if (rv < 0)
30593022
goto out_policy;
3060-
xfrm_dst_ops_init(net);
30613023
rv = xfrm_sysctl_init(net);
30623024
if (rv < 0)
30633025
goto out_sysctl;

0 commit comments

Comments
 (0)