Skip to content

Commit cfee879

Browse files
Wen GongKalle Valo
Wen Gong
authored and
Kalle Valo
committed
ath10k: enable napi on RX path for sdio
For tcp RX, the quantity of tcp acks to remote is 1/2 of the quantity of tcp data from remote, then it will have many small length packets on TX path of sdio bus, then it reduce the RX packets's bandwidth of tcp. This patch enable napi on RX path, then the RX packet of tcp will not feed to tcp stack immeditely from mac80211 since GRO is enabled by default, it will feed to tcp stack after napi complete, if rx bundle is enabled, then it will feed to tcp stack one time for each bundle of RX. For example, RX bundle size is 32, then tcp stack will receive one large length packet, its length is neary 1500*32, then tcp stack will send a tcp ack for this large packet, this will reduce the tcp acks ratio from 1/2 to 1/32. This results in significant performance improvement for tcp RX. Tcp rx throughout is 240Mbps without this patch, and it arrive 390Mbps with this patch. The cpu usage has no obvious difference with and without NAPI. call stack for each RX packet on GRO path: (skb length is about 1500 bytes) skb_gro_receive ([kernel.kallsyms]) tcp4_gro_receive ([kernel.kallsyms]) inet_gro_receive ([kernel.kallsyms]) dev_gro_receive ([kernel.kallsyms]) napi_gro_receive ([kernel.kallsyms]) ieee80211_deliver_skb ([mac80211]) ieee80211_rx_handlers ([mac80211]) ieee80211_prepare_and_rx_handle ([mac80211]) ieee80211_rx_napi ([mac80211]) ath10k_htt_rx_proc_rx_ind_hl ([ath10k_core]) ath10k_htt_rx_pktlog_completion_handler ([ath10k_core]) ath10k_sdio_napi_poll ([ath10k_sdio]) net_rx_action ([kernel.kallsyms]) softirqentry_text_start ([kernel.kallsyms]) do_softirq ([kernel.kallsyms]) call stack for napi complete and send tcp ack from tcp stack: (skb length is about 1500*32 bytes) _tcp_ack_snd_check ([kernel.kallsyms]) tcp_v4_do_rcv ([kernel.kallsyms]) tcp_v4_rcv ([kernel.kallsyms]) local_deliver_finish ([kernel.kallsyms]) ip_local_deliver ([kernel.kallsyms]) ip_rcv_finish ([kernel.kallsyms]) ip_rcv ([kernel.kallsyms]) netif_receive_skb_core ([kernel.kallsyms]) netif_receive_skb_one_core([kernel.kallsyms]) netif_receive_skb ([kernel.kallsyms]) netif_receive_skb_internal ([kernel.kallsyms]) napi_gro_complete ([kernel.kallsyms]) napi_gro_flush ([kernel.kallsyms]) napi_complete_done ([kernel.kallsyms]) ath10k_sdio_napi_poll ([ath10k_sdio]) net_rx_action ([kernel.kallsyms]) __softirqentry_text_start ([kernel.kallsyms]) do_softirq ([kernel.kallsyms]) Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00017-QCARMSWP-1. Signed-off-by: Wen Gong <[email protected]> Signed-off-by: Kalle Valo <[email protected]>
1 parent fcaf49d commit cfee879

File tree

4 files changed

+73
-8
lines changed

4 files changed

+73
-8
lines changed

drivers/net/wireless/ath/ath10k/core.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3220,6 +3220,8 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
32203220
init_waitqueue_head(&ar->htt.empty_tx_wq);
32213221
init_waitqueue_head(&ar->wmi.tx_credits_wq);
32223222

3223+
skb_queue_head_init(&ar->htt.rx_indication_head);
3224+
32233225
init_completion(&ar->offchan_tx_completed);
32243226
INIT_WORK(&ar->offchan_tx_work, ath10k_offchan_tx_work);
32253227
skb_queue_head_init(&ar->offchan_tx_queue);

drivers/net/wireless/ath/ath10k/htt.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1869,6 +1869,8 @@ struct ath10k_htt {
18691869
struct ath10k *ar;
18701870
enum ath10k_htc_ep_id eid;
18711871

1872+
struct sk_buff_head rx_indication_head;
1873+
18721874
u8 target_version_major;
18731875
u8 target_version_minor;
18741876
struct completion target_version_received;
@@ -2283,6 +2285,7 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu);
22832285
void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar,
22842286
struct sk_buff *skb);
22852287
int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget);
2288+
int ath10k_htt_rx_hl_indication(struct ath10k *ar, int budget);
22862289
void ath10k_htt_set_tx_ops(struct ath10k_htt *htt);
22872290
void ath10k_htt_set_rx_ops(struct ath10k_htt *htt);
22882291
#endif

drivers/net/wireless/ath/ath10k/htt_rx.c

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2359,7 +2359,10 @@ static bool ath10k_htt_rx_proc_rx_ind_hl(struct ath10k_htt *htt,
23592359
memcpy(skb->data + offset, &qos_ctrl, IEEE80211_QOS_CTL_LEN);
23602360
}
23612361

2362-
ieee80211_rx_ni(ar->hw, skb);
2362+
if (ar->napi.dev)
2363+
ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi);
2364+
else
2365+
ieee80211_rx_ni(ar->hw, skb);
23632366

23642367
/* We have delivered the skb to the upper layers (mac80211) so we
23652368
* must not free it.
@@ -3760,14 +3763,12 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
37603763
break;
37613764
}
37623765
case HTT_T2H_MSG_TYPE_RX_IND:
3763-
if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL)
3764-
return ath10k_htt_rx_proc_rx_ind_hl(htt,
3765-
&resp->rx_ind_hl,
3766-
skb,
3767-
HTT_RX_PN_CHECK,
3768-
HTT_RX_NON_TKIP_MIC);
3769-
else
3766+
if (ar->bus_param.dev_type != ATH10K_DEV_TYPE_HL) {
37703767
ath10k_htt_rx_proc_rx_ind_ll(htt, &resp->rx_ind);
3768+
} else {
3769+
skb_queue_tail(&htt->rx_indication_head, skb);
3770+
return false;
3771+
}
37713772
break;
37723773
case HTT_T2H_MSG_TYPE_PEER_MAP: {
37733774
struct htt_peer_map_event ev = {
@@ -3957,6 +3958,37 @@ static int ath10k_htt_rx_deliver_msdu(struct ath10k *ar, int quota, int budget)
39573958
return quota;
39583959
}
39593960

3961+
int ath10k_htt_rx_hl_indication(struct ath10k *ar, int budget)
3962+
{
3963+
struct htt_resp *resp;
3964+
struct ath10k_htt *htt = &ar->htt;
3965+
struct sk_buff *skb;
3966+
bool release;
3967+
int quota;
3968+
3969+
for (quota = 0; quota < budget; quota++) {
3970+
skb = skb_dequeue(&htt->rx_indication_head);
3971+
if (!skb)
3972+
break;
3973+
3974+
resp = (struct htt_resp *)skb->data;
3975+
3976+
release = ath10k_htt_rx_proc_rx_ind_hl(htt,
3977+
&resp->rx_ind_hl,
3978+
skb,
3979+
HTT_RX_PN_CHECK,
3980+
HTT_RX_NON_TKIP_MIC);
3981+
3982+
if (release)
3983+
dev_kfree_skb_any(skb);
3984+
3985+
ath10k_dbg(ar, ATH10K_DBG_HTT, "rx indication poll pending count:%d\n",
3986+
skb_queue_len(&htt->rx_indication_head));
3987+
}
3988+
return quota;
3989+
}
3990+
EXPORT_SYMBOL(ath10k_htt_rx_hl_indication);
3991+
39603992
int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget)
39613993
{
39623994
struct ath10k_htt *htt = &ar->htt;

drivers/net/wireless/ath/ath10k/sdio.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1339,6 +1339,9 @@ static void ath10k_rx_indication_async_work(struct work_struct *work)
13391339
ep = &ar->htc.endpoint[cb->eid];
13401340
ep->ep_ops.ep_rx_complete(ar, skb);
13411341
}
1342+
1343+
if (test_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags))
1344+
napi_schedule(&ar->napi);
13421345
}
13431346

13441347
static void ath10k_sdio_write_async_work(struct work_struct *work)
@@ -1729,6 +1732,8 @@ static int ath10k_sdio_hif_start(struct ath10k *ar)
17291732
struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar);
17301733
int ret;
17311734

1735+
napi_enable(&ar->napi);
1736+
17321737
/* Sleep 20 ms before HIF interrupts are disabled.
17331738
* This will give target plenty of time to process the BMI done
17341739
* request before interrupts are disabled.
@@ -1853,6 +1858,9 @@ static void ath10k_sdio_hif_stop(struct ath10k *ar)
18531858
}
18541859

18551860
spin_unlock_bh(&ar_sdio->wr_async_lock);
1861+
1862+
napi_synchronize(&ar->napi);
1863+
napi_disable(&ar->napi);
18561864
}
18571865

18581866
#ifdef CONFIG_PM
@@ -2047,6 +2055,20 @@ static SIMPLE_DEV_PM_OPS(ath10k_sdio_pm_ops, ath10k_sdio_pm_suspend,
20472055

20482056
#endif /* CONFIG_PM_SLEEP */
20492057

2058+
static int ath10k_sdio_napi_poll(struct napi_struct *ctx, int budget)
2059+
{
2060+
struct ath10k *ar = container_of(ctx, struct ath10k, napi);
2061+
int done;
2062+
2063+
done = ath10k_htt_rx_hl_indication(ar, budget);
2064+
ath10k_dbg(ar, ATH10K_DBG_SDIO, "napi poll: done: %d, budget:%d\n", done, budget);
2065+
2066+
if (done < budget)
2067+
napi_complete_done(ctx, done);
2068+
2069+
return done;
2070+
}
2071+
20502072
static int ath10k_sdio_probe(struct sdio_func *func,
20512073
const struct sdio_device_id *id)
20522074
{
@@ -2072,6 +2094,9 @@ static int ath10k_sdio_probe(struct sdio_func *func,
20722094
return -ENOMEM;
20732095
}
20742096

2097+
netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_sdio_napi_poll,
2098+
ATH10K_NAPI_BUDGET);
2099+
20752100
ath10k_dbg(ar, ATH10K_DBG_BOOT,
20762101
"sdio new func %d vendor 0x%x device 0x%x block 0x%x/0x%x\n",
20772102
func->num, func->vendor, func->device,
@@ -2184,6 +2209,9 @@ static void ath10k_sdio_remove(struct sdio_func *func)
21842209
func->num, func->vendor, func->device);
21852210

21862211
ath10k_core_unregister(ar);
2212+
2213+
netif_napi_del(&ar->napi);
2214+
21872215
ath10k_core_destroy(ar);
21882216

21892217
flush_workqueue(ar_sdio->workqueue);

0 commit comments

Comments
 (0)