Skip to content

Commit 2094200

Browse files
Mingming CaoPaolo Abeni
authored andcommitted
ibmveth: Add multi buffers rx replenishment hcall support
This patch enables batched RX buffer replenishment in ibmveth by using the new firmware-supported h_add_logical_lan_buffers() hcall to submit up to 8 RX buffers in a single call, instead of repeatedly calling the single-buffer h_add_logical_lan_buffer() hcall. During the probe, with the patch, the driver queries ILLAN attributes to detect IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT bit. If the attribute is present, rx_buffers_per_hcall is set to 8, enabling batched replenishment. Otherwise, it defaults to 1, preserving the original upstream behavior with no change in code flow for unsupported systems. The core rx replenish logic remains the same. But when batching is enabled, the driver aggregates up to 8 fully prepared descriptors into a single h_add_logical_lan_buffers() hypercall. If any allocation or DMA mapping fails while preparing a batch, only the successfully prepared buffers are submitted, and the remaining are deferred for the next replenish cycle. If at runtime the firmware stops accepting the batched hcall—e,g, after a Live Partition Migration (LPM) to a host that does not support h_add_logical_lan_buffers(), the hypercall returns H_FUNCTION. In that case, the driver transparently disables batching, resets rx_buffers_per_hcall to 1, and falls back to the single-buffer hcall in next future replenishments to take care of these and future buffers. Test were done on systems with firmware that both supports and does not support the new h_add_logical_lan_buffers hcall. On supported firmware, this reduces hypercall overhead significantly over multiple buffers. SAR measurements showed about a 15% improvement in packet processing rate under moderate RX load, with heavier traffic seeing gains more than 30% Signed-off-by: Mingming Cao <[email protected]> Reviewed-by: Brian King <[email protected]> Reviewed-by: Haren Myneni <[email protected]> Reviewed-by: Dave Marquardt <[email protected]> Reviewed-by: Simon Horman <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Paolo Abeni <[email protected]>
1 parent db8a514 commit 2094200

File tree

3 files changed

+174
-68
lines changed

3 files changed

+174
-68
lines changed

arch/powerpc/include/asm/hvcall.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@
270270
#define H_QUERY_INT_STATE 0x1E4
271271
#define H_POLL_PENDING 0x1D8
272272
#define H_ILLAN_ATTRIBUTES 0x244
273+
#define H_ADD_LOGICAL_LAN_BUFFERS 0x248
273274
#define H_MODIFY_HEA_QP 0x250
274275
#define H_QUERY_HEA_QP 0x254
275276
#define H_QUERY_HEA 0x258

drivers/net/ethernet/ibm/ibmveth.c

Lines changed: 152 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -211,98 +211,169 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
211211
static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
212212
struct ibmveth_buff_pool *pool)
213213
{
214-
u32 i;
215-
u32 count = pool->size - atomic_read(&pool->available);
216-
u32 buffers_added = 0;
217-
struct sk_buff *skb;
218-
unsigned int free_index, index;
219-
u64 correlator;
214+
union ibmveth_buf_desc descs[IBMVETH_MAX_RX_PER_HCALL] = {0};
215+
u32 remaining = pool->size - atomic_read(&pool->available);
216+
u64 correlators[IBMVETH_MAX_RX_PER_HCALL] = {0};
220217
unsigned long lpar_rc;
218+
u32 buffers_added = 0;
219+
u32 i, filled, batch;
220+
struct vio_dev *vdev;
221221
dma_addr_t dma_addr;
222+
struct device *dev;
223+
u32 index;
224+
225+
vdev = adapter->vdev;
226+
dev = &vdev->dev;
222227

223228
mb();
224229

225-
for (i = 0; i < count; ++i) {
226-
union ibmveth_buf_desc desc;
230+
batch = adapter->rx_buffers_per_hcall;
227231

228-
free_index = pool->consumer_index;
229-
index = pool->free_map[free_index];
230-
skb = NULL;
232+
while (remaining > 0) {
233+
unsigned int free_index = pool->consumer_index;
231234

232-
if (WARN_ON(index == IBM_VETH_INVALID_MAP)) {
233-
schedule_work(&adapter->work);
234-
goto bad_index_failure;
235-
}
235+
/* Fill a batch of descriptors */
236+
for (filled = 0; filled < min(remaining, batch); filled++) {
237+
index = pool->free_map[free_index];
238+
if (WARN_ON(index == IBM_VETH_INVALID_MAP)) {
239+
adapter->replenish_add_buff_failure++;
240+
netdev_info(adapter->netdev,
241+
"Invalid map index %u, reset\n",
242+
index);
243+
schedule_work(&adapter->work);
244+
break;
245+
}
246+
247+
if (!pool->skbuff[index]) {
248+
struct sk_buff *skb = NULL;
236249

237-
/* are we allocating a new buffer or recycling an old one */
238-
if (pool->skbuff[index])
239-
goto reuse;
250+
skb = netdev_alloc_skb(adapter->netdev,
251+
pool->buff_size);
252+
if (!skb) {
253+
adapter->replenish_no_mem++;
254+
adapter->replenish_add_buff_failure++;
255+
break;
256+
}
257+
258+
dma_addr = dma_map_single(dev, skb->data,
259+
pool->buff_size,
260+
DMA_FROM_DEVICE);
261+
if (dma_mapping_error(dev, dma_addr)) {
262+
dev_kfree_skb_any(skb);
263+
adapter->replenish_add_buff_failure++;
264+
break;
265+
}
240266

241-
skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
267+
pool->dma_addr[index] = dma_addr;
268+
pool->skbuff[index] = skb;
269+
} else {
270+
/* re-use case */
271+
dma_addr = pool->dma_addr[index];
272+
}
242273

243-
if (!skb) {
244-
netdev_dbg(adapter->netdev,
245-
"replenish: unable to allocate skb\n");
246-
adapter->replenish_no_mem++;
247-
break;
248-
}
274+
if (rx_flush) {
275+
unsigned int len;
249276

250-
dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
251-
pool->buff_size, DMA_FROM_DEVICE);
277+
len = adapter->netdev->mtu + IBMVETH_BUFF_OH;
278+
len = min(pool->buff_size, len);
279+
ibmveth_flush_buffer(pool->skbuff[index]->data,
280+
len);
281+
}
252282

253-
if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
254-
goto failure;
283+
descs[filled].fields.flags_len = IBMVETH_BUF_VALID |
284+
pool->buff_size;
285+
descs[filled].fields.address = dma_addr;
255286

256-
pool->dma_addr[index] = dma_addr;
257-
pool->skbuff[index] = skb;
287+
correlators[filled] = ((u64)pool->index << 32) | index;
288+
*(u64 *)pool->skbuff[index]->data = correlators[filled];
258289

259-
if (rx_flush) {
260-
unsigned int len = min(pool->buff_size,
261-
adapter->netdev->mtu +
262-
IBMVETH_BUFF_OH);
263-
ibmveth_flush_buffer(skb->data, len);
290+
free_index++;
291+
if (free_index >= pool->size)
292+
free_index = 0;
264293
}
265-
reuse:
266-
dma_addr = pool->dma_addr[index];
267-
desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
268-
desc.fields.address = dma_addr;
269-
270-
correlator = ((u64)pool->index << 32) | index;
271-
*(u64 *)pool->skbuff[index]->data = correlator;
272294

273-
lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address,
274-
desc.desc);
295+
if (!filled)
296+
break;
275297

298+
/* single buffer case*/
299+
if (filled == 1)
300+
lpar_rc = h_add_logical_lan_buffer(vdev->unit_address,
301+
descs[0].desc);
302+
else
303+
/* Multi-buffer hcall */
304+
lpar_rc = h_add_logical_lan_buffers(vdev->unit_address,
305+
descs[0].desc,
306+
descs[1].desc,
307+
descs[2].desc,
308+
descs[3].desc,
309+
descs[4].desc,
310+
descs[5].desc,
311+
descs[6].desc,
312+
descs[7].desc);
276313
if (lpar_rc != H_SUCCESS) {
277-
netdev_warn(adapter->netdev,
278-
"%sadd_logical_lan failed %lu\n",
279-
skb ? "" : "When recycling: ", lpar_rc);
280-
goto failure;
314+
dev_warn_ratelimited(dev,
315+
"RX h_add_logical_lan failed: filled=%u, rc=%lu, batch=%u\n",
316+
filled, lpar_rc, batch);
317+
goto hcall_failure;
281318
}
282319

283-
pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
284-
pool->consumer_index++;
285-
if (pool->consumer_index >= pool->size)
286-
pool->consumer_index = 0;
320+
/* Only update pool state after hcall succeeds */
321+
for (i = 0; i < filled; i++) {
322+
free_index = pool->consumer_index;
323+
pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
287324

288-
buffers_added++;
289-
adapter->replenish_add_buff_success++;
290-
}
325+
pool->consumer_index++;
326+
if (pool->consumer_index >= pool->size)
327+
pool->consumer_index = 0;
328+
}
291329

292-
mb();
293-
atomic_add(buffers_added, &(pool->available));
294-
return;
330+
buffers_added += filled;
331+
adapter->replenish_add_buff_success += filled;
332+
remaining -= filled;
295333

296-
failure:
334+
memset(&descs, 0, sizeof(descs));
335+
memset(&correlators, 0, sizeof(correlators));
336+
continue;
297337

298-
if (dma_addr && !dma_mapping_error(&adapter->vdev->dev, dma_addr))
299-
dma_unmap_single(&adapter->vdev->dev,
300-
pool->dma_addr[index], pool->buff_size,
301-
DMA_FROM_DEVICE);
302-
dev_kfree_skb_any(pool->skbuff[index]);
303-
pool->skbuff[index] = NULL;
304-
bad_index_failure:
305-
adapter->replenish_add_buff_failure++;
338+
hcall_failure:
339+
for (i = 0; i < filled; i++) {
340+
index = correlators[i] & 0xffffffffUL;
341+
dma_addr = pool->dma_addr[index];
342+
343+
if (pool->skbuff[index]) {
344+
if (dma_addr &&
345+
!dma_mapping_error(dev, dma_addr))
346+
dma_unmap_single(dev, dma_addr,
347+
pool->buff_size,
348+
DMA_FROM_DEVICE);
349+
350+
dev_kfree_skb_any(pool->skbuff[index]);
351+
pool->skbuff[index] = NULL;
352+
}
353+
}
354+
adapter->replenish_add_buff_failure += filled;
355+
356+
/*
357+
* If multi rx buffers hcall is no longer supported by FW
358+
* e.g. in the case of Live Parttion Migration
359+
*/
360+
if (batch > 1 && lpar_rc == H_FUNCTION) {
361+
/*
362+
* Instead of retry submit single buffer individually
363+
* here just set the max rx buffer per hcall to 1
364+
* buffers will be respleshed next time
365+
* when ibmveth_replenish_buffer_pool() is called again
366+
* with single-buffer case
367+
*/
368+
netdev_info(adapter->netdev,
369+
"RX Multi buffers not supported by FW, rc=%lu\n",
370+
lpar_rc);
371+
adapter->rx_buffers_per_hcall = 1;
372+
netdev_info(adapter->netdev,
373+
"Next rx replesh will fall back to single-buffer hcall\n");
374+
}
375+
break;
376+
}
306377

307378
mb();
308379
atomic_add(buffers_added, &(pool->available));
@@ -1783,6 +1854,19 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
17831854
netdev->features |= NETIF_F_FRAGLIST;
17841855
}
17851856

1857+
if (ret == H_SUCCESS &&
1858+
(ret_attr & IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT)) {
1859+
adapter->rx_buffers_per_hcall = IBMVETH_MAX_RX_PER_HCALL;
1860+
netdev_dbg(netdev,
1861+
"RX Multi-buffer hcall supported by FW, batch set to %u\n",
1862+
adapter->rx_buffers_per_hcall);
1863+
} else {
1864+
adapter->rx_buffers_per_hcall = 1;
1865+
netdev_dbg(netdev,
1866+
"RX Single-buffer hcall mode, batch set to %u\n",
1867+
adapter->rx_buffers_per_hcall);
1868+
}
1869+
17861870
netdev->min_mtu = IBMVETH_MIN_MTU;
17871871
netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
17881872

drivers/net/ethernet/ibm/ibmveth.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#define IbmVethMcastRemoveFilter 0x2UL
2929
#define IbmVethMcastClearFilterTable 0x3UL
3030

31+
#define IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT 0x0000000000040000UL
3132
#define IBMVETH_ILLAN_LRG_SR_ENABLED 0x0000000000010000UL
3233
#define IBMVETH_ILLAN_LRG_SND_SUPPORT 0x0000000000008000UL
3334
#define IBMVETH_ILLAN_PADDED_PKT_CSUM 0x0000000000002000UL
@@ -46,6 +47,24 @@
4647
#define h_add_logical_lan_buffer(ua, buf) \
4748
plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf)
4849

50+
static inline long h_add_logical_lan_buffers(unsigned long unit_address,
51+
unsigned long desc1,
52+
unsigned long desc2,
53+
unsigned long desc3,
54+
unsigned long desc4,
55+
unsigned long desc5,
56+
unsigned long desc6,
57+
unsigned long desc7,
58+
unsigned long desc8)
59+
{
60+
unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
61+
62+
return plpar_hcall9(H_ADD_LOGICAL_LAN_BUFFERS,
63+
retbuf, unit_address,
64+
desc1, desc2, desc3, desc4,
65+
desc5, desc6, desc7, desc8);
66+
}
67+
4968
/* FW allows us to send 6 descriptors but we only use one so mark
5069
* the other 5 as unused (0)
5170
*/
@@ -101,6 +120,7 @@ static inline long h_illan_attributes(unsigned long unit_address,
101120
#define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64)
102121
#define IBMVETH_MAX_QUEUES 16U
103122
#define IBMVETH_DEFAULT_QUEUES 8U
123+
#define IBMVETH_MAX_RX_PER_HCALL 8U
104124

105125
static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
106126
static int pool_count[] = { 256, 512, 256, 256, 256 };
@@ -151,6 +171,7 @@ struct ibmveth_adapter {
151171
int rx_csum;
152172
int large_send;
153173
bool is_active_trunk;
174+
unsigned int rx_buffers_per_hcall;
154175

155176
u64 fw_ipv6_csum_support;
156177
u64 fw_ipv4_csum_support;

0 commit comments

Comments
 (0)