Skip to content

Commit 9bf9abe

Browse files
Ursula Braundavem330
Ursula Braun
authored andcommitted
smc: link layer control (LLC)
send and receive LLC messages CONFIRM_LINK (via IB message send and CQE) Signed-off-by: Ursula Braun <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent bd4ad57 commit 9bf9abe

File tree

7 files changed

+330
-3
lines changed

7 files changed

+330
-3
lines changed

net/smc/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
obj-$(CONFIG_SMC) += smc.o
2-
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o
2+
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o

net/smc/af_smc.c

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#include "smc.h"
3333
#include "smc_clc.h"
34+
#include "smc_llc.h"
3435
#include "smc_core.h"
3536
#include "smc_ib.h"
3637
#include "smc_pnet.h"
@@ -245,6 +246,41 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
245246
return rc;
246247
}
247248

249+
static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
250+
{
251+
struct smc_link_group *lgr = smc->conn.lgr;
252+
struct smc_link *link;
253+
int rest;
254+
int rc;
255+
256+
link = &lgr->lnk[SMC_SINGLE_LINK];
257+
/* receive CONFIRM LINK request from server over RoCE fabric */
258+
rest = wait_for_completion_interruptible_timeout(
259+
&link->llc_confirm,
260+
SMC_LLC_WAIT_FIRST_TIME);
261+
if (rest <= 0) {
262+
struct smc_clc_msg_decline dclc;
263+
264+
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
265+
SMC_CLC_DECLINE);
266+
return rc;
267+
}
268+
269+
rc = smc_ib_modify_qp_rts(link);
270+
if (rc)
271+
return SMC_CLC_DECL_INTERR;
272+
273+
smc_wr_remember_qp_attr(link);
274+
/* send CONFIRM LINK response over RoCE fabric */
275+
rc = smc_llc_send_confirm_link(link,
276+
link->smcibdev->mac[link->ibport - 1],
277+
gid, SMC_LLC_RESP);
278+
if (rc < 0)
279+
return SMC_CLC_DECL_TCL;
280+
281+
return rc;
282+
}
283+
248284
static void smc_conn_save_peer_info(struct smc_sock *smc,
249285
struct smc_clc_msg_accept_confirm *clc)
250286
{
@@ -358,7 +394,17 @@ static int smc_connect_rdma(struct smc_sock *smc)
358394
if (rc)
359395
goto out_err_unlock;
360396

361-
/* tbd in follow-on patch: llc_confirm */
397+
if (local_contact == SMC_FIRST_CONTACT) {
398+
/* QP confirmation over RoCE fabric */
399+
reason_code = smc_clnt_conf_first_link(
400+
smc, &smcibdev->gid[ibport - 1]);
401+
if (reason_code < 0) {
402+
rc = reason_code;
403+
goto out_err_unlock;
404+
}
405+
if (reason_code > 0)
406+
goto decline_rdma_unlock;
407+
}
362408

363409
mutex_unlock(&smc_create_lgr_pending);
364410
out_connected:
@@ -543,6 +589,36 @@ static void smc_close_non_accepted(struct sock *sk)
543589
sock_put(sk);
544590
}
545591

592+
static int smc_serv_conf_first_link(struct smc_sock *smc)
593+
{
594+
struct smc_link_group *lgr = smc->conn.lgr;
595+
struct smc_link *link;
596+
int rest;
597+
int rc;
598+
599+
link = &lgr->lnk[SMC_SINGLE_LINK];
600+
/* send CONFIRM LINK request to client over the RoCE fabric */
601+
rc = smc_llc_send_confirm_link(link,
602+
link->smcibdev->mac[link->ibport - 1],
603+
&link->smcibdev->gid[link->ibport - 1],
604+
SMC_LLC_REQ);
605+
if (rc < 0)
606+
return SMC_CLC_DECL_TCL;
607+
608+
/* receive CONFIRM LINK response from client over the RoCE fabric */
609+
rest = wait_for_completion_interruptible_timeout(
610+
&link->llc_confirm_resp,
611+
SMC_LLC_WAIT_FIRST_TIME);
612+
if (rest <= 0) {
613+
struct smc_clc_msg_decline dclc;
614+
615+
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
616+
SMC_CLC_DECLINE);
617+
}
618+
619+
return rc;
620+
}
621+
546622
/* setup for RDMA connection of server */
547623
static void smc_listen_work(struct work_struct *work)
548624
{
@@ -655,13 +731,21 @@ static void smc_listen_work(struct work_struct *work)
655731
goto decline_rdma;
656732
}
657733

658-
/* tbd in follow-on patch: modify_qp, llc_confirm */
659734
if (local_contact == SMC_FIRST_CONTACT) {
660735
rc = smc_ib_ready_link(link);
661736
if (rc) {
662737
reason_code = SMC_CLC_DECL_INTERR;
663738
goto decline_rdma;
664739
}
740+
/* QP confirmation over RoCE fabric */
741+
reason_code = smc_serv_conf_first_link(new_smc);
742+
if (reason_code < 0) {
743+
/* peer is not aware of a problem */
744+
rc = reason_code;
745+
goto out_err;
746+
}
747+
if (reason_code > 0)
748+
goto decline_rdma;
665749
}
666750

667751
out_connected:
@@ -1111,6 +1195,12 @@ static int __init smc_init(void)
11111195
if (rc)
11121196
return rc;
11131197

1198+
rc = smc_llc_init();
1199+
if (rc) {
1200+
pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
1201+
goto out_pnet;
1202+
}
1203+
11141204
rc = proto_register(&smc_proto, 1);
11151205
if (rc) {
11161206
pr_err("%s: proto_register fails with %d\n", __func__, rc);

net/smc/smc_clc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
3333
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
3434
#define SMC_CLC_DECL_REPLY 0x06000000 /* reply to a received decline */
3535
#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
36+
#define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */
37+
#define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */
3638

3739
struct smc_clc_msg_hdr { /* header1 of clc messages */
3840
u8 eyecatcher[4]; /* eye catcher */

net/smc/smc_core.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,13 @@
2121
#include "smc_core.h"
2222
#include "smc_ib.h"
2323
#include "smc_wr.h"
24+
#include "smc_llc.h"
2425

26+
#define SMC_LGR_NUM_INCR 256
2527
#define SMC_LGR_FREE_DELAY (600 * HZ)
2628

29+
static u32 smc_lgr_num; /* unique link group number */
30+
2731
/* Register connection's alert token in our lookup structure.
2832
* To use rbtrees we have to implement our own insert core.
2933
* Requires @conns_lock
@@ -152,6 +156,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
152156
INIT_LIST_HEAD(&lgr->sndbufs[i]);
153157
INIT_LIST_HEAD(&lgr->rmbs[i]);
154158
}
159+
smc_lgr_num += SMC_LGR_NUM_INCR;
160+
memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE);
155161
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
156162
lgr->conns_all = RB_ROOT;
157163

@@ -177,6 +183,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
177183
rc = smc_wr_create_link(lnk);
178184
if (rc)
179185
goto destroy_qp;
186+
init_completion(&lnk->llc_confirm);
187+
init_completion(&lnk->llc_confirm_resp);
180188

181189
smc->conn.lgr = lgr;
182190
rwlock_init(&lgr->conns_lock);

net/smc/smc_core.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ struct smc_link {
7373
u32 peer_psn; /* QP rx initial packet seqno */
7474
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
7575
u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/
76+
u8 link_id; /* unique # within link group */
77+
struct completion llc_confirm; /* wait for rx of conf link */
78+
struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
7679
};
7780

7881
/* For now we just allow one parallel link per link group. The SMC protocol
@@ -102,6 +105,8 @@ struct smc_rtoken { /* address/key of remote RMB */
102105
u32 rkey;
103106
};
104107

108+
#define SMC_LGR_ID_SIZE 4
109+
105110
struct smc_link_group {
106111
struct list_head list;
107112
enum smc_lgr_role role; /* client or server */
@@ -125,6 +130,7 @@ struct smc_link_group {
125130
SMC_RMBS_PER_LGR_MAX)];
126131
/* used rtoken elements */
127132

133+
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
128134
struct delayed_work free_work; /* delayed freeing of an lgr */
129135
bool sync_err; /* lgr no longer fits to peer */
130136
};

net/smc/smc_llc.c

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
/*
2+
* Shared Memory Communications over RDMA (SMC-R) and RoCE
3+
*
4+
* Link Layer Control (LLC)
5+
*
6+
* For now, we only support the necessary "confirm link" functionality
7+
* which happens for the first RoCE link after successful CLC handshake.
8+
*
9+
* Copyright IBM Corp. 2016
10+
*
11+
* Author(s): Klaus Wacker <[email protected]>
12+
* Ursula Braun <[email protected]>
13+
*/
14+
15+
#include <net/tcp.h>
16+
#include <rdma/ib_verbs.h>
17+
18+
#include "smc.h"
19+
#include "smc_core.h"
20+
#include "smc_clc.h"
21+
#include "smc_llc.h"
22+
23+
/********************************** send *************************************/
24+
25+
struct smc_llc_tx_pend {
26+
};
27+
28+
/* handler for send/transmission completion of an LLC msg */
29+
static void smc_llc_tx_handler(struct smc_wr_tx_pend_priv *pend,
30+
struct smc_link *link,
31+
enum ib_wc_status wc_status)
32+
{
33+
/* future work: handle wc_status error for recovery and failover */
34+
}
35+
36+
/**
37+
* smc_llc_add_pending_send() - add LLC control message to pending WQE transmits
38+
* @link: Pointer to SMC link used for sending LLC control message.
39+
* @wr_buf: Out variable returning pointer to work request payload buffer.
40+
* @pend: Out variable returning pointer to private pending WR tracking.
41+
* It's the context the transmit complete handler will get.
42+
*
43+
* Reserves and pre-fills an entry for a pending work request send/tx.
44+
* Used by mid-level smc_llc_send_msg() to prepare for later actual send/tx.
45+
* Can sleep due to smc_get_ctrl_buf (if not in softirq context).
46+
*
47+
* Return: 0 on success, otherwise an error value.
48+
*/
49+
static int smc_llc_add_pending_send(struct smc_link *link,
50+
struct smc_wr_buf **wr_buf,
51+
struct smc_wr_tx_pend_priv **pend)
52+
{
53+
int rc;
54+
55+
rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend);
56+
if (rc < 0)
57+
return rc;
58+
BUILD_BUG_ON_MSG(
59+
sizeof(union smc_llc_msg) > SMC_WR_BUF_SIZE,
60+
"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_llc_msg)");
61+
BUILD_BUG_ON_MSG(
62+
sizeof(union smc_llc_msg) != SMC_WR_TX_SIZE,
63+
"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_llc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
64+
BUILD_BUG_ON_MSG(
65+
sizeof(struct smc_llc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
66+
"must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_llc_tx_pend)");
67+
return 0;
68+
}
69+
70+
/* high-level API to send LLC confirm link */
71+
int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
72+
union ib_gid *gid,
73+
enum smc_llc_reqresp reqresp)
74+
{
75+
struct smc_link_group *lgr = container_of(link, struct smc_link_group,
76+
lnk[SMC_SINGLE_LINK]);
77+
struct smc_llc_msg_confirm_link *confllc;
78+
struct smc_wr_tx_pend_priv *pend;
79+
struct smc_wr_buf *wr_buf;
80+
int rc;
81+
82+
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
83+
if (rc)
84+
return rc;
85+
confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
86+
memset(confllc, 0, sizeof(*confllc));
87+
confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
88+
confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
89+
if (reqresp == SMC_LLC_RESP)
90+
confllc->hd.flags |= SMC_LLC_FLAG_RESP;
91+
memcpy(confllc->sender_mac, mac, ETH_ALEN);
92+
memcpy(confllc->sender_gid, gid, SMC_GID_SIZE);
93+
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
94+
/* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
95+
memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
96+
confllc->max_links = SMC_LINKS_PER_LGR_MAX;
97+
/* send llc message */
98+
rc = smc_wr_tx_send(link, pend);
99+
return rc;
100+
}
101+
102+
/********************************* receive ***********************************/
103+
104+
static void smc_llc_rx_confirm_link(struct smc_link *link,
105+
struct smc_llc_msg_confirm_link *llc)
106+
{
107+
struct smc_link_group *lgr;
108+
109+
lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
110+
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
111+
if (lgr->role == SMC_SERV)
112+
complete(&link->llc_confirm_resp);
113+
} else {
114+
if (lgr->role == SMC_CLNT) {
115+
link->link_id = llc->link_num;
116+
complete(&link->llc_confirm);
117+
}
118+
}
119+
}
120+
121+
static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
122+
{
123+
struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
124+
union smc_llc_msg *llc = buf;
125+
126+
if (wc->byte_len < sizeof(*llc))
127+
return; /* short message */
128+
if (llc->raw.hdr.length != sizeof(*llc))
129+
return; /* invalid message */
130+
if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
131+
smc_llc_rx_confirm_link(link, &llc->confirm_link);
132+
}
133+
134+
/***************************** init, exit, misc ******************************/
135+
136+
static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
137+
{
138+
.handler = smc_llc_rx_handler,
139+
.type = SMC_LLC_CONFIRM_LINK
140+
},
141+
{
142+
.handler = NULL,
143+
}
144+
};
145+
146+
int __init smc_llc_init(void)
147+
{
148+
struct smc_wr_rx_handler *handler;
149+
int rc = 0;
150+
151+
for (handler = smc_llc_rx_handlers; handler->handler; handler++) {
152+
INIT_HLIST_NODE(&handler->list);
153+
rc = smc_wr_rx_register_handler(handler);
154+
if (rc)
155+
break;
156+
}
157+
return rc;
158+
}

0 commit comments

Comments
 (0)