Skip to content

Commit b7cf080

Browse files
Ofer LeviSaeed Mahameed
Ofer Levi
authored and
Saeed Mahameed
committed
net/mlx5e: Add CQE compression support for multi-strides packets
Add CQE compression support for completions of packets that span multiple strides in a Striding RQ, per the HW capability. In our memory model, we use small strides (256B as of today) for the non-linear SKB mode. This feature allows CQE compression to work also for multiple strides packets. In this case decompressing the mini CQE array will use stride index provided by HW as part of the mini CQE. Before this feature, compression was possible only for single-strided packets, i.e. for packets of size up to 256 bytes when in non-linear mode, and the index was maintained by SW. This feature is supported for ConnectX-5 and above. Feature performance test: This was whitebox-tested, we reduced the PCI speed from 125Gb/s to 62.5Gb/s to overload pci and manipulated mlx5 driver to drop incoming packets before building the SKB to achieve low cpu utilization. Outcome is low cpu utilization and bottleneck on pci only. Test setup: Server: Intel(R) Xeon(R) Silver 4108 CPU @ 1.80GHz server, 32 cores NIC: ConnectX-6 DX. Sender side generates 300 byte packets at full pci bandwidth. Receiver side configuration: Single channel, one cpu processing with one ring allocated. Cpu utilization is ~20% while pci bandwidth is fully utilized. For the generated traffic and interface MTU of 4500B (to activate the non-linear SKB mode), packet rate improvement is about 19% from ~17.6Mpps to ~21Mpps. Without this feature, counters show no CQE compression blocks for this setup, while with the feature, counters show ~20.7Mpps compressed CQEs in ~500K compression blocks. Signed-off-by: Ofer Levi <[email protected]> Reviewed-by: Tariq Toukan <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent 748cde9 commit b7cf080

File tree

4 files changed

+24
-3
lines changed

4 files changed

+24
-3
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ enum {
265265
MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
266266
MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
267267
MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */
268+
MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX /* set when mini_cqe_resp_stride_index cap is used */
268269
};
269270

270271
struct mlx5e_cq {

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,13 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
848848
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
849849
__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
850850

851+
/* For CQE compression on striding RQ, use stride index provided by
852+
* HW if capability is supported.
853+
*/
854+
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) &&
855+
MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index))
856+
__set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state);
857+
851858
return 0;
852859

853860
err_destroy_rq:
@@ -2182,21 +2189,24 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
21822189
struct mlx5e_cq_param *param)
21832190
{
21842191
struct mlx5_core_dev *mdev = priv->mdev;
2192+
bool hw_stridx = false;
21852193
void *cqc = param->cqc;
21862194
u8 log_cq_size;
21872195

21882196
switch (params->rq_wq_type) {
21892197
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
21902198
log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
21912199
mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
2200+
hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
21922201
break;
21932202
default: /* MLX5_WQ_TYPE_CYCLIC */
21942203
log_cq_size = params->log_rq_mtu_frames;
21952204
}
21962205

21972206
MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
21982207
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
2199-
MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
2208+
MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
2209+
MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
22002210
MLX5_SET(cqc, cqc, cqe_comp_en, 1);
22012211
}
22022212

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,17 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
137137
title->check_sum = mini_cqe->checksum;
138138
title->op_own &= 0xf0;
139139
title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz);
140-
title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
141140

141+
/* state bit set implies linked-list striding RQ wq type and
142+
* HW stride index capability supported
143+
*/
144+
if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) {
145+
title->wqe_counter = mini_cqe->stridx;
146+
return;
147+
}
148+
149+
/* HW stride index capability not supported */
150+
title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
142151
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
143152
cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title);
144153
else

include/linux/mlx5/device.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -816,7 +816,7 @@ struct mlx5_mini_cqe8 {
816816
__be32 rx_hash_result;
817817
struct {
818818
__be16 checksum;
819-
__be16 rsvd;
819+
__be16 stridx;
820820
};
821821
struct {
822822
__be16 wqe_counter;
@@ -836,6 +836,7 @@ enum {
836836

837837
enum {
838838
MLX5_CQE_FORMAT_CSUM = 0x1,
839+
MLX5_CQE_FORMAT_CSUM_STRIDX = 0x3,
839840
};
840841

841842
#define MLX5_MINI_CQE_ARRAY_SIZE 8

0 commit comments

Comments
 (0)