Skip to content

Commit f231cbf

Browse files
pcacjrheynemax
authored andcommitted
smb: client: fix potential deadlock when reconnecting channels
[ Upstream commit 711741f ] Fix cifs_signal_cifsd_for_reconnect() to take the correct lock order and prevent the following deadlock from happening ====================================================== WARNING: possible circular locking dependency detected 6.16.0-rc3-build2+ #1301 Tainted: G S W ------------------------------------------------------ cifsd/6055 is trying to acquire lock: ffff88810ad56038 (&tcp_ses->srv_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x134/0x200 but task is already holding lock: ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&ret_buf->chan_lock){+.+.}-{3:3}: validate_chain+0x1cf/0x270 __lock_acquire+0x60e/0x780 lock_acquire.part.0+0xb4/0x1f0 _raw_spin_lock+0x2f/0x40 cifs_setup_session+0x81/0x4b0 cifs_get_smb_ses+0x771/0x900 cifs_mount_get_session+0x7e/0x170 cifs_mount+0x92/0x2d0 cifs_smb3_do_mount+0x161/0x460 smb3_get_tree+0x55/0x90 vfs_get_tree+0x46/0x180 do_new_mount+0x1b0/0x2e0 path_mount+0x6ee/0x740 do_mount+0x98/0xe0 __do_sys_mount+0x148/0x180 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #1 (&ret_buf->ses_lock){+.+.}-{3:3}: validate_chain+0x1cf/0x270 __lock_acquire+0x60e/0x780 lock_acquire.part.0+0xb4/0x1f0 _raw_spin_lock+0x2f/0x40 cifs_match_super+0x101/0x320 sget+0xab/0x270 cifs_smb3_do_mount+0x1e0/0x460 smb3_get_tree+0x55/0x90 vfs_get_tree+0x46/0x180 do_new_mount+0x1b0/0x2e0 path_mount+0x6ee/0x740 do_mount+0x98/0xe0 __do_sys_mount+0x148/0x180 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (&tcp_ses->srv_lock){+.+.}-{3:3}: check_noncircular+0x95/0xc0 check_prev_add+0x115/0x2f0 validate_chain+0x1cf/0x270 __lock_acquire+0x60e/0x780 lock_acquire.part.0+0xb4/0x1f0 _raw_spin_lock+0x2f/0x40 cifs_signal_cifsd_for_reconnect+0x134/0x200 __cifs_reconnect+0x8f/0x500 cifs_handle_standard+0x112/0x280 cifs_demultiplex_thread+0x64d/0xbc0 kthread+0x2f7/0x310 ret_from_fork+0x2a/0x230 ret_from_fork_asm+0x1a/0x30 other info that might help us debug this: Chain exists of: &tcp_ses->srv_lock --> &ret_buf->ses_lock --> &ret_buf->chan_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ret_buf->chan_lock); lock(&ret_buf->ses_lock); lock(&ret_buf->chan_lock); lock(&tcp_ses->srv_lock); *** DEADLOCK *** 3 locks held by cifsd/6055: #0: ffffffff857de398 (&cifs_tcp_ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x7b/0x200 #1: ffff888119c64060 (&ret_buf->ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x9c/0x200 #2: ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200 Cc: [email protected] Reported-by: David Howells <[email protected]> Fixes: d7d7a66 ("cifs: avoid use of global locks for high contention data") Reviewed-by: David Howells <[email protected]> Tested-by: David Howells <[email protected]> Signed-off-by: Paulo Alcantara (Red Hat) <[email protected]> Signed-off-by: David Howells <[email protected]> Signed-off-by: Steve French <[email protected]> Signed-off-by: Sasha Levin <[email protected]> [v6.1: replaced SERVER_IS_CHAN with CIFS_SERVER_IS_CHAN; resolved contextual conflicts in cifs_signal_cifsd_for_reconnect()] Signed-off-by: Shaoying Xu <[email protected]>
1 parent f54f5db commit f231cbf

File tree

2 files changed

+38
-19
lines changed

2 files changed

+38
-19
lines changed

fs/smb/client/cifsglob.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,7 @@ inc_rfc1001_len(void *buf, int count)
614614
struct TCP_Server_Info {
615615
struct list_head tcp_ses_list;
616616
struct list_head smb_ses_list;
617+
struct list_head rlist; /* reconnect list */
617618
spinlock_t srv_lock; /* protect anything here that is not protected */
618619
__u64 conn_id; /* connection identifier (useful for debugging) */
619620
int srv_count; /* reference counter */

fs/smb/client/connect.c

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,14 @@ static void cifs_resolve_server(struct work_struct *work)
185185
cifs_server_unlock(server);
186186
}
187187

188+
#define set_need_reco(server) \
189+
do { \
190+
spin_lock(&server->srv_lock); \
191+
if (server->tcpStatus != CifsExiting) \
192+
server->tcpStatus = CifsNeedReconnect; \
193+
spin_unlock(&server->srv_lock); \
194+
} while (0)
195+
188196
/*
189197
* Update the tcpStatus for the server.
190198
* This is used to signal the cifsd thread to call cifs_reconnect
@@ -198,35 +206,45 @@ void
198206
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
199207
bool all_channels)
200208
{
201-
struct TCP_Server_Info *pserver;
209+
struct TCP_Server_Info *nserver;
202210
struct cifs_ses *ses;
211+
LIST_HEAD(reco);
203212
int i;
204213

205-
/* If server is a channel, select the primary channel */
206-
pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
207-
208214
/* if we need to signal just this channel */
209215
if (!all_channels) {
210-
spin_lock(&server->srv_lock);
211-
if (server->tcpStatus != CifsExiting)
212-
server->tcpStatus = CifsNeedReconnect;
213-
spin_unlock(&server->srv_lock);
216+
set_need_reco(server);
214217
return;
215218
}
216219

217-
spin_lock(&cifs_tcp_ses_lock);
218-
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
219-
if (cifs_ses_exiting(ses))
220-
continue;
221-
spin_lock(&ses->chan_lock);
222-
for (i = 0; i < ses->chan_count; i++) {
223-
spin_lock(&ses->chans[i].server->srv_lock);
224-
ses->chans[i].server->tcpStatus = CifsNeedReconnect;
225-
spin_unlock(&ses->chans[i].server->srv_lock);
220+
if (CIFS_SERVER_IS_CHAN(server))
221+
server = server->primary_server;
222+
scoped_guard(spinlock, &cifs_tcp_ses_lock) {
223+
set_need_reco(server);
224+
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
225+
spin_lock(&ses->ses_lock);
226+
if (ses->ses_status == SES_EXITING) {
227+
spin_unlock(&ses->ses_lock);
228+
continue;
229+
}
230+
spin_lock(&ses->chan_lock);
231+
for (i = 1; i < ses->chan_count; i++) {
232+
nserver = ses->chans[i].server;
233+
if (!nserver)
234+
continue;
235+
nserver->srv_count++;
236+
list_add(&nserver->rlist, &reco);
237+
}
238+
spin_unlock(&ses->chan_lock);
239+
spin_unlock(&ses->ses_lock);
226240
}
227-
spin_unlock(&ses->chan_lock);
228241
}
229-
spin_unlock(&cifs_tcp_ses_lock);
242+
243+
list_for_each_entry_safe(server, nserver, &reco, rlist) {
244+
list_del_init(&server->rlist);
245+
set_need_reco(server);
246+
cifs_put_tcp_session(server, 0);
247+
}
230248
}
231249

232250
/*

0 commit comments

Comments
 (0)