|
| 1 | +smb: client: fix potential deadlock when reconnecting channels |
| 2 | + |
| 3 | +jira LE-4669 |
| 4 | +cve CVE-2025-38244 |
| 5 | +Rebuild_History Non-Buildable kernel-4.18.0-553.82.1.el8_10 |
| 6 | +commit-author Paulo Alcantara < [email protected]> |
| 7 | +commit 711741f94ac3cf9f4e3aa73aa171e76d188c0819 |
| 8 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 9 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 10 | +ciq/ciq_backports/kernel-4.18.0-553.82.1.el8_10/711741f9.failed |
| 11 | + |
| 12 | +Fix cifs_signal_cifsd_for_reconnect() to take the correct lock order |
| 13 | +and prevent the following deadlock from happening |
| 14 | + |
| 15 | +====================================================== |
| 16 | +WARNING: possible circular locking dependency detected |
| 17 | +6.16.0-rc3-build2+ #1301 Tainted: G S W |
| 18 | +------------------------------------------------------ |
| 19 | +cifsd/6055 is trying to acquire lock: |
| 20 | +ffff88810ad56038 (&tcp_ses->srv_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x134/0x200 |
| 21 | + |
| 22 | +but task is already holding lock: |
| 23 | +ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200 |
| 24 | + |
| 25 | +which lock already depends on the new lock. |
| 26 | + |
| 27 | +the existing dependency chain (in reverse order) is: |
| 28 | + |
| 29 | +-> #2 (&ret_buf->chan_lock){+.+.}-{3:3}: |
| 30 | + validate_chain+0x1cf/0x270 |
| 31 | + __lock_acquire+0x60e/0x780 |
| 32 | + lock_acquire.part.0+0xb4/0x1f0 |
| 33 | + _raw_spin_lock+0x2f/0x40 |
| 34 | + cifs_setup_session+0x81/0x4b0 |
| 35 | + cifs_get_smb_ses+0x771/0x900 |
| 36 | + cifs_mount_get_session+0x7e/0x170 |
| 37 | + cifs_mount+0x92/0x2d0 |
| 38 | + cifs_smb3_do_mount+0x161/0x460 |
| 39 | + smb3_get_tree+0x55/0x90 |
| 40 | + vfs_get_tree+0x46/0x180 |
| 41 | + do_new_mount+0x1b0/0x2e0 |
| 42 | + path_mount+0x6ee/0x740 |
| 43 | + do_mount+0x98/0xe0 |
| 44 | + __do_sys_mount+0x148/0x180 |
| 45 | + do_syscall_64+0xa4/0x260 |
| 46 | + entry_SYSCALL_64_after_hwframe+0x76/0x7e |
| 47 | + |
| 48 | +-> #1 (&ret_buf->ses_lock){+.+.}-{3:3}: |
| 49 | + validate_chain+0x1cf/0x270 |
| 50 | + __lock_acquire+0x60e/0x780 |
| 51 | + lock_acquire.part.0+0xb4/0x1f0 |
| 52 | + _raw_spin_lock+0x2f/0x40 |
| 53 | + cifs_match_super+0x101/0x320 |
| 54 | + sget+0xab/0x270 |
| 55 | + cifs_smb3_do_mount+0x1e0/0x460 |
| 56 | + smb3_get_tree+0x55/0x90 |
| 57 | + vfs_get_tree+0x46/0x180 |
| 58 | + do_new_mount+0x1b0/0x2e0 |
| 59 | + path_mount+0x6ee/0x740 |
| 60 | + do_mount+0x98/0xe0 |
| 61 | + __do_sys_mount+0x148/0x180 |
| 62 | + do_syscall_64+0xa4/0x260 |
| 63 | + entry_SYSCALL_64_after_hwframe+0x76/0x7e |
| 64 | + |
| 65 | +-> #0 (&tcp_ses->srv_lock){+.+.}-{3:3}: |
| 66 | + check_noncircular+0x95/0xc0 |
| 67 | + check_prev_add+0x115/0x2f0 |
| 68 | + validate_chain+0x1cf/0x270 |
| 69 | + __lock_acquire+0x60e/0x780 |
| 70 | + lock_acquire.part.0+0xb4/0x1f0 |
| 71 | + _raw_spin_lock+0x2f/0x40 |
| 72 | + cifs_signal_cifsd_for_reconnect+0x134/0x200 |
| 73 | + __cifs_reconnect+0x8f/0x500 |
| 74 | + cifs_handle_standard+0x112/0x280 |
| 75 | + cifs_demultiplex_thread+0x64d/0xbc0 |
| 76 | + kthread+0x2f7/0x310 |
| 77 | + ret_from_fork+0x2a/0x230 |
| 78 | + ret_from_fork_asm+0x1a/0x30 |
| 79 | + |
| 80 | +other info that might help us debug this: |
| 81 | + |
| 82 | +Chain exists of: |
| 83 | + &tcp_ses->srv_lock --> &ret_buf->ses_lock --> &ret_buf->chan_lock |
| 84 | + |
| 85 | + Possible unsafe locking scenario: |
| 86 | + |
| 87 | + CPU0 CPU1 |
| 88 | + ---- ---- |
| 89 | + lock(&ret_buf->chan_lock); |
| 90 | + lock(&ret_buf->ses_lock); |
| 91 | + lock(&ret_buf->chan_lock); |
| 92 | + lock(&tcp_ses->srv_lock); |
| 93 | + |
| 94 | + *** DEADLOCK *** |
| 95 | + |
| 96 | +3 locks held by cifsd/6055: |
| 97 | + #0: ffffffff857de398 (&cifs_tcp_ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x7b/0x200 |
| 98 | + #1: ffff888119c64060 (&ret_buf->ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x9c/0x200 |
| 99 | + #2: ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200 |
| 100 | + |
| 101 | + |
| 102 | + Reported-by: David Howells < [email protected]> |
| 103 | +Fixes: d7d7a66aacd6 ("cifs: avoid use of global locks for high contention data") |
| 104 | + Reviewed-by: David Howells < [email protected]> |
| 105 | + Tested-by: David Howells < [email protected]> |
| 106 | + Signed-off-by: Paulo Alcantara (Red Hat) < [email protected]> |
| 107 | + Signed-off-by: David Howells < [email protected]> |
| 108 | + Signed-off-by: Steve French < [email protected]> |
| 109 | +(cherry picked from commit 711741f94ac3cf9f4e3aa73aa171e76d188c0819) |
| 110 | + Signed-off-by: Jonathan Maple < [email protected]> |
| 111 | + |
| 112 | +# Conflicts: |
| 113 | +# fs/cifs/cifsglob.h |
| 114 | +# fs/cifs/connect.c |
| 115 | +diff --cc fs/cifs/cifsglob.h |
| 116 | +index 3c5d8dfc52dd,318a8405d475..000000000000 |
| 117 | +--- a/fs/cifs/cifsglob.h |
| 118 | ++++ b/fs/cifs/cifsglob.h |
| 119 | +@@@ -587,8 -709,12 +587,13 @@@ inc_rfc1001_len(void *buf, int count |
| 120 | + struct TCP_Server_Info { |
| 121 | + struct list_head tcp_ses_list; |
| 122 | + struct list_head smb_ses_list; |
| 123 | +++<<<<<<< HEAD:fs/cifs/cifsglob.h |
| 124 | +++======= |
| 125 | ++ struct list_head rlist; /* reconnect list */ |
| 126 | ++ spinlock_t srv_lock; /* protect anything here that is not protected */ |
| 127 | +++>>>>>>> 711741f94ac3 (smb: client: fix potential deadlock when reconnecting channels):fs/smb/client/cifsglob.h |
| 128 | + __u64 conn_id; /* connection identifier (useful for debugging) */ |
| 129 | + int srv_count; /* reference counter */ |
| 130 | + - int rfc1001_sessinit; /* whether to estasblish netbios session */ |
| 131 | + - bool with_rfc1001; /* if netbios session is used */ |
| 132 | + /* 15 character server name + 0x20 16th byte indicating type = srv */ |
| 133 | + char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; |
| 134 | + struct smb_version_operations *ops; |
| 135 | +diff --cc fs/cifs/connect.c |
| 136 | +index ca2926c7b59e,685c65dcb8c4..000000000000 |
| 137 | +--- a/fs/cifs/connect.c |
| 138 | ++++ b/fs/cifs/connect.c |
| 139 | +@@@ -164,45 -124,86 +164,108 @@@ static void smb2_query_server_interface |
| 140 | + (SMB_INTERFACE_POLL_INTERVAL * HZ)); |
| 141 | + } |
| 142 | + |
| 143 | +++<<<<<<< HEAD:fs/cifs/connect.c |
| 144 | + +static void cifs_resolve_server(struct work_struct *work) |
| 145 | + +{ |
| 146 | + + int rc; |
| 147 | + + struct TCP_Server_Info *server = container_of(work, |
| 148 | + + struct TCP_Server_Info, resolve.work); |
| 149 | + + |
| 150 | + + mutex_lock(&server->srv_mutex); |
| 151 | + + |
| 152 | + + /* |
| 153 | + + * Resolve the hostname again to make sure that IP address is up-to-date. |
| 154 | + + */ |
| 155 | + + rc = reconn_set_ipaddr_from_hostname(server); |
| 156 | + + if (rc) { |
| 157 | + + cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", |
| 158 | + + __func__, rc); |
| 159 | + + } |
| 160 | + + |
| 161 | + + mutex_unlock(&server->srv_mutex); |
| 162 | +++======= |
| 163 | ++ #define set_need_reco(server) \ |
| 164 | ++ do { \ |
| 165 | ++ spin_lock(&server->srv_lock); \ |
| 166 | ++ if (server->tcpStatus != CifsExiting) \ |
| 167 | ++ server->tcpStatus = CifsNeedReconnect; \ |
| 168 | ++ spin_unlock(&server->srv_lock); \ |
| 169 | ++ } while (0) |
| 170 | ++ |
| 171 | ++ /* |
| 172 | ++ * Update the tcpStatus for the server. |
| 173 | ++ * This is used to signal the cifsd thread to call cifs_reconnect |
| 174 | ++ * ONLY cifsd thread should call cifs_reconnect. For any other |
| 175 | ++ * thread, use this function |
| 176 | ++ * |
| 177 | ++ * @server: the tcp ses for which reconnect is needed |
| 178 | ++ * @all_channels: if this needs to be done for all channels |
| 179 | ++ */ |
| 180 | ++ void |
| 181 | ++ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, |
| 182 | ++ bool all_channels) |
| 183 | ++ { |
| 184 | ++ struct TCP_Server_Info *nserver; |
| 185 | ++ struct cifs_ses *ses; |
| 186 | ++ LIST_HEAD(reco); |
| 187 | ++ int i; |
| 188 | ++ |
| 189 | ++ /* if we need to signal just this channel */ |
| 190 | ++ if (!all_channels) { |
| 191 | ++ set_need_reco(server); |
| 192 | ++ return; |
| 193 | ++ } |
| 194 | ++ |
| 195 | ++ if (SERVER_IS_CHAN(server)) |
| 196 | ++ server = server->primary_server; |
| 197 | ++ scoped_guard(spinlock, &cifs_tcp_ses_lock) { |
| 198 | ++ set_need_reco(server); |
| 199 | ++ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { |
| 200 | ++ spin_lock(&ses->ses_lock); |
| 201 | ++ if (ses->ses_status == SES_EXITING) { |
| 202 | ++ spin_unlock(&ses->ses_lock); |
| 203 | ++ continue; |
| 204 | ++ } |
| 205 | ++ spin_lock(&ses->chan_lock); |
| 206 | ++ for (i = 1; i < ses->chan_count; i++) { |
| 207 | ++ nserver = ses->chans[i].server; |
| 208 | ++ if (!nserver) |
| 209 | ++ continue; |
| 210 | ++ nserver->srv_count++; |
| 211 | ++ list_add(&nserver->rlist, &reco); |
| 212 | ++ } |
| 213 | ++ spin_unlock(&ses->chan_lock); |
| 214 | ++ spin_unlock(&ses->ses_lock); |
| 215 | ++ } |
| 216 | ++ } |
| 217 | ++ |
| 218 | ++ list_for_each_entry_safe(server, nserver, &reco, rlist) { |
| 219 | ++ list_del_init(&server->rlist); |
| 220 | ++ set_need_reco(server); |
| 221 | ++ cifs_put_tcp_session(server, 0); |
| 222 | ++ } |
| 223 | +++>>>>>>> 711741f94ac3 (smb: client: fix potential deadlock when reconnecting channels):fs/smb/client/connect.c |
| 224 | + } |
| 225 | + |
| 226 | + -/* |
| 227 | + +/** |
| 228 | + * Mark all sessions and tcons for reconnect. |
| 229 | + - * IMPORTANT: make sure that this gets called only from |
| 230 | + - * cifsd thread. For any other thread, use |
| 231 | + - * cifs_signal_cifsd_for_reconnect |
| 232 | + * |
| 233 | + - * @server: the tcp ses for which reconnect is needed |
| 234 | + * @server needs to be previously set to CifsNeedReconnect. |
| 235 | + - * @mark_smb_session: whether even sessions need to be marked |
| 236 | + */ |
| 237 | + -void |
| 238 | + -cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, |
| 239 | + - bool mark_smb_session) |
| 240 | + +static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server) |
| 241 | + { |
| 242 | + - struct TCP_Server_Info *pserver; |
| 243 | + - struct cifs_ses *ses, *nses; |
| 244 | + + unsigned int num_sessions = 0; |
| 245 | + + struct cifs_ses *ses; |
| 246 | + struct cifs_tcon *tcon; |
| 247 | + + struct mid_q_entry *mid, *nmid; |
| 248 | + + struct list_head retry_list; |
| 249 | + + struct TCP_Server_Info *pserver; |
| 250 | + |
| 251 | + + server->maxBuf = 0; |
| 252 | + + server->max_read = 0; |
| 253 | + + |
| 254 | + + cifs_dbg(FYI, "Mark tcp session as need reconnect\n"); |
| 255 | + + trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname); |
| 256 | + /* |
| 257 | + * before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they |
| 258 | + * are not used until reconnected. |
| 259 | +* Unmerged path fs/cifs/cifsglob.h |
| 260 | +* Unmerged path fs/cifs/connect.c |
0 commit comments