Skip to content

Commit 86e25f4

Browse files
jdamato-fslykuba-moo
authored andcommitted
net: napi: Add napi_config
Add a persistent NAPI config area for NAPI configuration to the core. Drivers opt-in to setting the persistent config for a NAPI by passing an index when calling netif_napi_add_config. napi_config is allocated in alloc_netdev_mqs, freed in free_netdev (after the NAPIs are deleted). Drivers which call netif_napi_add_config will have persistent per-NAPI settings: NAPI IDs, gro_flush_timeout, and defer_hard_irq settings. Per-NAPI settings are saved in napi_disable and restored in napi_enable. Co-developed-by: Martin Karsten <[email protected]> Signed-off-by: Martin Karsten <[email protected]> Signed-off-by: Joe Damato <[email protected]> Reviewed-by: Jakub Kicinski <[email protected]> Reviewed-by: Eric Dumazet <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 0137891 commit 86e25f4

File tree

4 files changed

+119
-10
lines changed

4 files changed

+119
-10
lines changed

Documentation/networking/net_cachelines/net_device.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ struct dpll_pin* dpll_pin
186186
struct hlist_head page_pools
187187
struct dim_irq_moder* irq_moder
188188
u64 max_pacing_offload_horizon
189+
struct_napi_config* napi_config
189190
unsigned_long gro_flush_timeout
190191
u32 napi_defer_hard_irqs
191192
=================================== =========================== =================== =================== ===================================================================================

include/linux/netdevice.h

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,15 @@ struct gro_list {
342342
*/
343343
#define GRO_HASH_BUCKETS 8
344344

345+
/*
346+
* Structure for per-NAPI config
347+
*/
348+
struct napi_config {
349+
u64 gro_flush_timeout;
350+
u32 defer_hard_irqs;
351+
unsigned int napi_id;
352+
};
353+
345354
/*
346355
* Structure for NAPI scheduling similar to tasklet but with weighting
347356
*/
@@ -379,6 +388,8 @@ struct napi_struct {
379388
struct list_head dev_list;
380389
struct hlist_node napi_hash_node;
381390
int irq;
391+
int index;
392+
struct napi_config *config;
382393
};
383394

384395
enum {
@@ -1868,9 +1879,6 @@ enum netdev_reg_state {
18681879
* allocated at register_netdev() time
18691880
* @real_num_rx_queues: Number of RX queues currently active in device
18701881
* @xdp_prog: XDP sockets filter program pointer
1871-
* @gro_flush_timeout: timeout for GRO layer in NAPI
1872-
* @napi_defer_hard_irqs: If not zero, provides a counter that would
1873-
* allow to avoid NIC hard IRQ, on busy queues.
18741882
*
18751883
* @rx_handler: handler for received packets
18761884
* @rx_handler_data: XXX: need comments on this one
@@ -2020,6 +2028,11 @@ enum netdev_reg_state {
20202028
* where the clock is recovered.
20212029
*
20222030
* @max_pacing_offload_horizon: max EDT offload horizon in nsec.
2031+
* @napi_config: An array of napi_config structures containing per-NAPI
2032+
* settings.
2033+
* @gro_flush_timeout: timeout for GRO layer in NAPI
2034+
* @napi_defer_hard_irqs: If not zero, provides a counter that would
2035+
* allow to avoid NIC hard IRQ, on busy queues.
20232036
*
20242037
* FIXME: cleanup struct net_device such that network protocol info
20252038
* moves out.
@@ -2413,6 +2426,7 @@ struct net_device {
24132426
struct dim_irq_moder *irq_moder;
24142427

24152428
u64 max_pacing_offload_horizon;
2429+
struct napi_config *napi_config;
24162430
unsigned long gro_flush_timeout;
24172431
u32 napi_defer_hard_irqs;
24182432

@@ -2678,6 +2692,22 @@ netif_napi_add_tx_weight(struct net_device *dev,
26782692
netif_napi_add_weight(dev, napi, poll, weight);
26792693
}
26802694

2695+
/**
2696+
* netif_napi_add_config - initialize a NAPI context with persistent config
2697+
* @dev: network device
2698+
* @napi: NAPI context
2699+
* @poll: polling function
2700+
* @index: the NAPI index
2701+
*/
2702+
static inline void
2703+
netif_napi_add_config(struct net_device *dev, struct napi_struct *napi,
2704+
int (*poll)(struct napi_struct *, int), int index)
2705+
{
2706+
napi->index = index;
2707+
napi->config = &dev->napi_config[index];
2708+
netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
2709+
}
2710+
26812711
/**
26822712
* netif_napi_add_tx() - initialize a NAPI context to be used for Tx only
26832713
* @dev: network device

net/core/dev.c

Lines changed: 73 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6505,6 +6505,23 @@ EXPORT_SYMBOL(napi_busy_loop);
65056505

65066506
#endif /* CONFIG_NET_RX_BUSY_POLL */
65076507

6508+
static void __napi_hash_add_with_id(struct napi_struct *napi,
6509+
unsigned int napi_id)
6510+
{
6511+
napi->napi_id = napi_id;
6512+
hlist_add_head_rcu(&napi->napi_hash_node,
6513+
&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
6514+
}
6515+
6516+
static void napi_hash_add_with_id(struct napi_struct *napi,
6517+
unsigned int napi_id)
6518+
{
6519+
spin_lock(&napi_hash_lock);
6520+
WARN_ON_ONCE(napi_by_id(napi_id));
6521+
__napi_hash_add_with_id(napi, napi_id);
6522+
spin_unlock(&napi_hash_lock);
6523+
}
6524+
65086525
static void napi_hash_add(struct napi_struct *napi)
65096526
{
65106527
if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
@@ -6517,10 +6534,8 @@ static void napi_hash_add(struct napi_struct *napi)
65176534
if (unlikely(++napi_gen_id < MIN_NAPI_ID))
65186535
napi_gen_id = MIN_NAPI_ID;
65196536
} while (napi_by_id(napi_gen_id));
6520-
napi->napi_id = napi_gen_id;
65216537

6522-
hlist_add_head_rcu(&napi->napi_hash_node,
6523-
&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
6538+
__napi_hash_add_with_id(napi, napi_gen_id);
65246539

65256540
spin_unlock(&napi_hash_lock);
65266541
}
@@ -6643,6 +6658,28 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
66436658
}
66446659
EXPORT_SYMBOL(netif_queue_set_napi);
66456660

6661+
static void napi_restore_config(struct napi_struct *n)
6662+
{
6663+
n->defer_hard_irqs = n->config->defer_hard_irqs;
6664+
n->gro_flush_timeout = n->config->gro_flush_timeout;
6665+
/* a NAPI ID might be stored in the config, if so use it. if not, use
6666+
* napi_hash_add to generate one for us. It will be saved to the config
6667+
* in napi_disable.
6668+
*/
6669+
if (n->config->napi_id)
6670+
napi_hash_add_with_id(n, n->config->napi_id);
6671+
else
6672+
napi_hash_add(n);
6673+
}
6674+
6675+
static void napi_save_config(struct napi_struct *n)
6676+
{
6677+
n->config->defer_hard_irqs = n->defer_hard_irqs;
6678+
n->config->gro_flush_timeout = n->gro_flush_timeout;
6679+
n->config->napi_id = n->napi_id;
6680+
napi_hash_del(n);
6681+
}
6682+
66466683
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
66476684
int (*poll)(struct napi_struct *, int), int weight)
66486685
{
@@ -6653,8 +6690,6 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
66536690
INIT_HLIST_NODE(&napi->napi_hash_node);
66546691
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
66556692
napi->timer.function = napi_watchdog;
6656-
napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
6657-
napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout));
66586693
init_gro_hash(napi);
66596694
napi->skb = NULL;
66606695
INIT_LIST_HEAD(&napi->rx_list);
@@ -6672,7 +6707,13 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
66726707
set_bit(NAPI_STATE_SCHED, &napi->state);
66736708
set_bit(NAPI_STATE_NPSVC, &napi->state);
66746709
list_add_rcu(&napi->dev_list, &dev->napi_list);
6675-
napi_hash_add(napi);
6710+
6711+
/* default settings from sysfs are applied to all NAPIs. any per-NAPI
6712+
* configuration will be loaded in napi_enable
6713+
*/
6714+
napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
6715+
napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout));
6716+
66766717
napi_get_frags_check(napi);
66776718
/* Create kthread for this napi if dev->threaded is set.
66786719
* Clear dev->threaded if kthread creation failed so that
@@ -6704,6 +6745,11 @@ void napi_disable(struct napi_struct *n)
67046745

67056746
hrtimer_cancel(&n->timer);
67066747

6748+
if (n->config)
6749+
napi_save_config(n);
6750+
else
6751+
napi_hash_del(n);
6752+
67076753
clear_bit(NAPI_STATE_DISABLE, &n->state);
67086754
}
67096755
EXPORT_SYMBOL(napi_disable);
@@ -6719,6 +6765,11 @@ void napi_enable(struct napi_struct *n)
67196765
{
67206766
unsigned long new, val = READ_ONCE(n->state);
67216767

6768+
if (n->config)
6769+
napi_restore_config(n);
6770+
else
6771+
napi_hash_add(n);
6772+
67226773
do {
67236774
BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
67246775

@@ -6748,7 +6799,11 @@ void __netif_napi_del(struct napi_struct *napi)
67486799
if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
67496800
return;
67506801

6751-
napi_hash_del(napi);
6802+
if (napi->config) {
6803+
napi->index = -1;
6804+
napi->config = NULL;
6805+
}
6806+
67526807
list_del_rcu(&napi->dev_list);
67536808
napi_free_frags(napi);
67546809

@@ -11085,6 +11140,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
1108511140
unsigned int txqs, unsigned int rxqs)
1108611141
{
1108711142
struct net_device *dev;
11143+
size_t napi_config_sz;
11144+
unsigned int maxqs;
1108811145

1108911146
BUG_ON(strlen(name) >= sizeof(dev->name));
1109011147

@@ -11098,6 +11155,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
1109811155
return NULL;
1109911156
}
1110011157

11158+
maxqs = max(txqs, rxqs);
11159+
1110111160
dev = kvzalloc(struct_size(dev, priv, sizeof_priv),
1110211161
GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
1110311162
if (!dev)
@@ -11174,6 +11233,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
1117411233
if (!dev->ethtool)
1117511234
goto free_all;
1117611235

11236+
napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config));
11237+
dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT);
11238+
if (!dev->napi_config)
11239+
goto free_all;
11240+
1117711241
strscpy(dev->name, name);
1117811242
dev->name_assign_type = name_assign_type;
1117911243
dev->group = INIT_NETDEV_GROUP;
@@ -11237,6 +11301,8 @@ void free_netdev(struct net_device *dev)
1123711301
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
1123811302
netif_napi_del(p);
1123911303

11304+
kvfree(dev->napi_config);
11305+
1124011306
ref_tracker_dir_exit(&dev->refcnt_tracker);
1124111307
#ifdef CONFIG_PCPU_DEV_REFCNT
1124211308
free_percpu(dev->pcpu_refcnt);

net/core/dev.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,17 @@ static inline void napi_set_defer_hard_irqs(struct napi_struct *n, u32 defer)
177177
static inline void netdev_set_defer_hard_irqs(struct net_device *netdev,
178178
u32 defer)
179179
{
180+
unsigned int count = max(netdev->num_rx_queues,
181+
netdev->num_tx_queues);
180182
struct napi_struct *napi;
183+
int i;
181184

182185
WRITE_ONCE(netdev->napi_defer_hard_irqs, defer);
183186
list_for_each_entry(napi, &netdev->napi_list, dev_list)
184187
napi_set_defer_hard_irqs(napi, defer);
188+
189+
for (i = 0; i < count; i++)
190+
netdev->napi_config[i].defer_hard_irqs = defer;
185191
}
186192

187193
/**
@@ -217,11 +223,17 @@ static inline void napi_set_gro_flush_timeout(struct napi_struct *n,
217223
static inline void netdev_set_gro_flush_timeout(struct net_device *netdev,
218224
unsigned long timeout)
219225
{
226+
unsigned int count = max(netdev->num_rx_queues,
227+
netdev->num_tx_queues);
220228
struct napi_struct *napi;
229+
int i;
221230

222231
WRITE_ONCE(netdev->gro_flush_timeout, timeout);
223232
list_for_each_entry(napi, &netdev->napi_list, dev_list)
224233
napi_set_gro_flush_timeout(napi, timeout);
234+
235+
for (i = 0; i < count; i++)
236+
netdev->napi_config[i].gro_flush_timeout = timeout;
225237
}
226238

227239
int rps_cpumask_housekeeping(struct cpumask *mask);

0 commit comments

Comments
 (0)