Skip to content

Commit dacce2b

Browse files
Ronak Doshidavem330
authored andcommitted
vmxnet3: add geneve and vxlan tunnel offload support
Vmxnet3 version 3 device supports checksum/TSO offload. Thus, vNIC to pNIC traffic can leverage hardware checksum/TSO offloads. However, vmxnet3 does not support checksum/TSO offload for Geneve/VXLAN encapsulated packets. Thus, for a vNIC configured with an overlay, the guest stack must first segment the inner packet, compute the inner checksum for each segment and encapsulate each segment before transmitting the packet via the vNIC. This results in significant performance penalty. This patch will enhance vmxnet3 to support Geneve/VXLAN TSO as well as checksum offload. Signed-off-by: Ronak Doshi <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent d3a8a9e commit dacce2b

File tree

5 files changed

+161
-33
lines changed

5 files changed

+161
-33
lines changed

drivers/net/vmxnet3/upt1_defs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,5 +92,8 @@ enum {
9292
UPT1_F_RSS = cpu_to_le64(0x0002),
9393
UPT1_F_RXVLAN = cpu_to_le64(0x0004), /* VLAN tag stripping */
9494
UPT1_F_LRO = cpu_to_le64(0x0008),
95+
UPT1_F_RXINNEROFLD = cpu_to_le64(0x00010), /* Geneve/Vxlan rx csum
96+
* offloading
97+
*/
9598
};
9699
#endif

drivers/net/vmxnet3/vmxnet3_defs.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,14 +103,14 @@ enum {
103103
/*
104104
* Little Endian layout of bitfields -
105105
* Byte 0 : 7.....len.....0
106-
* Byte 1 : rsvd gen 13.len.8
106+
* Byte 1 : oco gen 13.len.8
107107
* Byte 2 : 5.msscof.0 ext1 dtype
108108
* Byte 3 : 13...msscof...6
109109
*
110110
* Big Endian layout of bitfields -
111111
* Byte 0: 13...msscof...6
112112
* Byte 1 : 5.msscof.0 ext1 dtype
113-
* Byte 2 : rsvd gen 13.len.8
113+
* Byte 2 : oco gen 13.len.8
114114
* Byte 3 : 7.....len.....0
115115
*
116116
* Thus, le32_to_cpu on the dword will allow the big endian driver to read
@@ -125,13 +125,13 @@ struct Vmxnet3_TxDesc {
125125
u32 msscof:14; /* MSS, checksum offset, flags */
126126
u32 ext1:1;
127127
u32 dtype:1; /* descriptor type */
128-
u32 rsvd:1;
128+
u32 oco:1;
129129
u32 gen:1; /* generation bit */
130130
u32 len:14;
131131
#else
132132
u32 len:14;
133133
u32 gen:1; /* generation bit */
134-
u32 rsvd:1;
134+
u32 oco:1;
135135
u32 dtype:1; /* descriptor type */
136136
u32 ext1:1;
137137
u32 msscof:14; /* MSS, checksum offset, flags */
@@ -157,9 +157,10 @@ struct Vmxnet3_TxDesc {
157157
};
158158

159159
/* TxDesc.OM values */
160-
#define VMXNET3_OM_NONE 0
161-
#define VMXNET3_OM_CSUM 2
162-
#define VMXNET3_OM_TSO 3
160+
#define VMXNET3_OM_NONE 0
161+
#define VMXNET3_OM_ENCAP 1
162+
#define VMXNET3_OM_CSUM 2
163+
#define VMXNET3_OM_TSO 3
163164

164165
/* fields in TxDesc we access w/o using bit fields */
165166
#define VMXNET3_TXD_EOP_SHIFT 12
@@ -226,6 +227,8 @@ struct Vmxnet3_RxDesc {
226227
#define VMXNET3_RXD_BTYPE_SHIFT 14
227228
#define VMXNET3_RXD_GEN_SHIFT 31
228229

230+
#define VMXNET3_RCD_HDR_INNER_SHIFT 13
231+
229232
struct Vmxnet3_RxCompDesc {
230233
#ifdef __BIG_ENDIAN_BITFIELD
231234
u32 ext2:1;

drivers/net/vmxnet3/vmxnet3_drv.c

Lines changed: 98 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -842,12 +842,22 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
842842
u8 protocol = 0;
843843

844844
if (ctx->mss) { /* TSO */
845-
ctx->eth_ip_hdr_size = skb_transport_offset(skb);
846-
ctx->l4_hdr_size = tcp_hdrlen(skb);
847-
ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
845+
if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) {
846+
ctx->l4_offset = skb_inner_transport_offset(skb);
847+
ctx->l4_hdr_size = inner_tcp_hdrlen(skb);
848+
ctx->copy_size = ctx->l4_offset + ctx->l4_hdr_size;
849+
} else {
850+
ctx->l4_offset = skb_transport_offset(skb);
851+
ctx->l4_hdr_size = tcp_hdrlen(skb);
852+
ctx->copy_size = ctx->l4_offset + ctx->l4_hdr_size;
853+
}
848854
} else {
849855
if (skb->ip_summed == CHECKSUM_PARTIAL) {
850-
ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
856+
/* For encap packets, skb_checksum_start_offset refers
857+
* to inner L4 offset. Thus, below works for encap as
858+
* well as non-encap case
859+
*/
860+
ctx->l4_offset = skb_checksum_start_offset(skb);
851861

852862
if (ctx->ipv4) {
853863
const struct iphdr *iph = ip_hdr(skb);
@@ -871,10 +881,10 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
871881
break;
872882
}
873883

874-
ctx->copy_size = min(ctx->eth_ip_hdr_size +
884+
ctx->copy_size = min(ctx->l4_offset +
875885
ctx->l4_hdr_size, skb->len);
876886
} else {
877-
ctx->eth_ip_hdr_size = 0;
887+
ctx->l4_offset = 0;
878888
ctx->l4_hdr_size = 0;
879889
/* copy as much as allowed */
880890
ctx->copy_size = min_t(unsigned int,
@@ -929,6 +939,25 @@ vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
929939
}
930940

931941

942+
static void
943+
vmxnet3_prepare_inner_tso(struct sk_buff *skb,
944+
struct vmxnet3_tx_ctx *ctx)
945+
{
946+
struct tcphdr *tcph = inner_tcp_hdr(skb);
947+
struct iphdr *iph = inner_ip_hdr(skb);
948+
949+
if (ctx->ipv4) {
950+
iph->check = 0;
951+
tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
952+
IPPROTO_TCP, 0);
953+
} else if (ctx->ipv6) {
954+
struct ipv6hdr *iph = inner_ipv6_hdr(skb);
955+
956+
tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
957+
IPPROTO_TCP, 0);
958+
}
959+
}
960+
932961
static void
933962
vmxnet3_prepare_tso(struct sk_buff *skb,
934963
struct vmxnet3_tx_ctx *ctx)
@@ -987,6 +1016,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
9871016
/* Use temporary descriptor to avoid touching bits multiple times */
9881017
union Vmxnet3_GenericDesc tempTxDesc;
9891018
#endif
1019+
struct udphdr *udph;
9901020

9911021
count = txd_estimate(skb);
9921022

@@ -1003,7 +1033,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
10031033
}
10041034
tq->stats.copy_skb_header++;
10051035
}
1006-
vmxnet3_prepare_tso(skb, &ctx);
1036+
if (skb->encapsulation) {
1037+
vmxnet3_prepare_inner_tso(skb, &ctx);
1038+
} else {
1039+
vmxnet3_prepare_tso(skb, &ctx);
1040+
}
10071041
} else {
10081042
if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
10091043

@@ -1026,14 +1060,14 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
10261060
BUG_ON(ret <= 0 && ctx.copy_size != 0);
10271061
/* hdrs parsed, check against other limits */
10281062
if (ctx.mss) {
1029-
if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
1063+
if (unlikely(ctx.l4_offset + ctx.l4_hdr_size >
10301064
VMXNET3_MAX_TX_BUF_SIZE)) {
10311065
tq->stats.drop_oversized_hdr++;
10321066
goto drop_pkt;
10331067
}
10341068
} else {
10351069
if (skb->ip_summed == CHECKSUM_PARTIAL) {
1036-
if (unlikely(ctx.eth_ip_hdr_size +
1070+
if (unlikely(ctx.l4_offset +
10371071
skb->csum_offset >
10381072
VMXNET3_MAX_CSUM_OFFSET)) {
10391073
tq->stats.drop_oversized_hdr++;
@@ -1080,16 +1114,34 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
10801114
#endif
10811115
tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred);
10821116
if (ctx.mss) {
1083-
gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1084-
gdesc->txd.om = VMXNET3_OM_TSO;
1085-
gdesc->txd.msscof = ctx.mss;
1117+
if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) {
1118+
gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size;
1119+
gdesc->txd.om = VMXNET3_OM_ENCAP;
1120+
gdesc->txd.msscof = ctx.mss;
1121+
1122+
udph = udp_hdr(skb);
1123+
if (udph->check)
1124+
gdesc->txd.oco = 1;
1125+
} else {
1126+
gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size;
1127+
gdesc->txd.om = VMXNET3_OM_TSO;
1128+
gdesc->txd.msscof = ctx.mss;
1129+
}
10861130
num_pkts = (skb->len - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss;
10871131
} else {
10881132
if (skb->ip_summed == CHECKSUM_PARTIAL) {
1089-
gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1090-
gdesc->txd.om = VMXNET3_OM_CSUM;
1091-
gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1092-
skb->csum_offset;
1133+
if (VMXNET3_VERSION_GE_4(adapter) &&
1134+
skb->encapsulation) {
1135+
gdesc->txd.hlen = ctx.l4_offset +
1136+
ctx.l4_hdr_size;
1137+
gdesc->txd.om = VMXNET3_OM_ENCAP;
1138+
gdesc->txd.msscof = 0; /* Reserved */
1139+
} else {
1140+
gdesc->txd.hlen = ctx.l4_offset;
1141+
gdesc->txd.om = VMXNET3_OM_CSUM;
1142+
gdesc->txd.msscof = ctx.l4_offset +
1143+
skb->csum_offset;
1144+
}
10931145
} else {
10941146
gdesc->txd.om = 0;
10951147
gdesc->txd.msscof = 0;
@@ -1168,13 +1220,21 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
11681220
(le32_to_cpu(gdesc->dword[3]) &
11691221
VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) {
11701222
skb->ip_summed = CHECKSUM_UNNECESSARY;
1171-
BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1172-
BUG_ON(gdesc->rcd.frg);
1223+
WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
1224+
!(le32_to_cpu(gdesc->dword[0]) &
1225+
(1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
1226+
WARN_ON_ONCE(gdesc->rcd.frg &&
1227+
!(le32_to_cpu(gdesc->dword[0]) &
1228+
(1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
11731229
} else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) &
11741230
(1 << VMXNET3_RCD_TUC_SHIFT))) {
11751231
skb->ip_summed = CHECKSUM_UNNECESSARY;
1176-
BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1177-
BUG_ON(gdesc->rcd.frg);
1232+
WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
1233+
!(le32_to_cpu(gdesc->dword[0]) &
1234+
(1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
1235+
WARN_ON_ONCE(gdesc->rcd.frg &&
1236+
!(le32_to_cpu(gdesc->dword[0]) &
1237+
(1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
11781238
} else {
11791239
if (gdesc->rcd.csum) {
11801240
skb->csum = htons(gdesc->rcd.csum);
@@ -2429,6 +2489,10 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
24292489
if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
24302490
devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
24312491

2492+
if (adapter->netdev->features & (NETIF_F_GSO_UDP_TUNNEL |
2493+
NETIF_F_GSO_UDP_TUNNEL_CSUM))
2494+
devRead->misc.uptFeatures |= UPT1_F_RXINNEROFLD;
2495+
24322496
devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
24332497
devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
24342498
devRead->misc.queueDescLen = cpu_to_le32(
@@ -2561,8 +2625,8 @@ vmxnet3_init_rssfields(struct vmxnet3_adapter *adapter)
25612625
union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
25622626
unsigned long flags;
25632627

2564-
if (!VMXNET3_VERSION_GE_4(adapter))
2565-
return;
2628+
if (!VMXNET3_VERSION_GE_4(adapter))
2629+
return;
25662630

25672631
spin_lock_irqsave(&adapter->cmd_lock, flags);
25682632

@@ -3073,6 +3137,18 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
30733137
NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
30743138
NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
30753139
NETIF_F_LRO;
3140+
3141+
if (VMXNET3_VERSION_GE_4(adapter)) {
3142+
netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
3143+
NETIF_F_GSO_UDP_TUNNEL_CSUM;
3144+
3145+
netdev->hw_enc_features = NETIF_F_SG | NETIF_F_RXCSUM |
3146+
NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
3147+
NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
3148+
NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL |
3149+
NETIF_F_GSO_UDP_TUNNEL_CSUM;
3150+
}
3151+
30763152
if (dma64)
30773153
netdev->hw_features |= NETIF_F_HIGHDMA;
30783154
netdev->vlan_features = netdev->hw_features &

drivers/net/vmxnet3/vmxnet3_ethtool.c

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,14 +267,43 @@ netdev_features_t vmxnet3_fix_features(struct net_device *netdev,
267267
return features;
268268
}
269269

270+
static void vmxnet3_enable_encap_offloads(struct net_device *netdev)
271+
{
272+
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
273+
274+
if (VMXNET3_VERSION_GE_4(adapter)) {
275+
netdev->hw_enc_features |= NETIF_F_SG | NETIF_F_RXCSUM |
276+
NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
277+
NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
278+
NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL |
279+
NETIF_F_GSO_UDP_TUNNEL_CSUM;
280+
}
281+
}
282+
283+
static void vmxnet3_disable_encap_offloads(struct net_device *netdev)
284+
{
285+
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
286+
287+
if (VMXNET3_VERSION_GE_4(adapter)) {
288+
netdev->hw_enc_features &= ~(NETIF_F_SG | NETIF_F_RXCSUM |
289+
NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
290+
NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
291+
NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL |
292+
NETIF_F_GSO_UDP_TUNNEL_CSUM);
293+
}
294+
}
295+
270296
int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features)
271297
{
272298
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
273299
unsigned long flags;
274300
netdev_features_t changed = features ^ netdev->features;
301+
netdev_features_t tun_offload_mask = NETIF_F_GSO_UDP_TUNNEL |
302+
NETIF_F_GSO_UDP_TUNNEL_CSUM;
303+
u8 udp_tun_enabled = (netdev->features & tun_offload_mask) != 0;
275304

276305
if (changed & (NETIF_F_RXCSUM | NETIF_F_LRO |
277-
NETIF_F_HW_VLAN_CTAG_RX)) {
306+
NETIF_F_HW_VLAN_CTAG_RX | tun_offload_mask)) {
278307
if (features & NETIF_F_RXCSUM)
279308
adapter->shared->devRead.misc.uptFeatures |=
280309
UPT1_F_RXCSUM;
@@ -297,6 +326,17 @@ int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features)
297326
adapter->shared->devRead.misc.uptFeatures &=
298327
~UPT1_F_RXVLAN;
299328

329+
if ((features & tun_offload_mask) != 0 && !udp_tun_enabled) {
330+
vmxnet3_enable_encap_offloads(netdev);
331+
adapter->shared->devRead.misc.uptFeatures |=
332+
UPT1_F_RXINNEROFLD;
333+
} else if ((features & tun_offload_mask) == 0 &&
334+
udp_tun_enabled) {
335+
vmxnet3_disable_encap_offloads(netdev);
336+
adapter->shared->devRead.misc.uptFeatures &=
337+
~UPT1_F_RXINNEROFLD;
338+
}
339+
300340
spin_lock_irqsave(&adapter->cmd_lock, flags);
301341
VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
302342
VMXNET3_CMD_UPDATE_FEATURE);

drivers/net/vmxnet3/vmxnet3_int.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,10 +219,16 @@ struct vmxnet3_tx_ctx {
219219
bool ipv4;
220220
bool ipv6;
221221
u16 mss;
222-
u32 eth_ip_hdr_size; /* only valid for pkts requesting tso or csum
223-
* offloading
222+
u32 l4_offset; /* only valid for pkts requesting tso or csum
223+
* offloading. For encap offload, it refers to
224+
* inner L4 offset i.e. it includes outer header
225+
* encap header and inner eth and ip header size
226+
*/
227+
228+
u32 l4_hdr_size; /* only valid if mss != 0
229+
* Refers to inner L4 hdr size for encap
230+
* offload
224231
*/
225-
u32 l4_hdr_size; /* only valid if mss != 0 */
226232
u32 copy_size; /* # of bytes copied into the data ring */
227233
union Vmxnet3_GenericDesc *sop_txd;
228234
union Vmxnet3_GenericDesc *eop_txd;

0 commit comments

Comments
 (0)