From ab08de335d157c3dcaeca15d88b77d11e070f152 Mon Sep 17 00:00:00 2001 From: qxy11 Date: Mon, 14 Jul 2025 19:56:20 -0700 Subject: [PATCH 1/6] Fix a deadlock in ModuleList when starting a standalone lldb client Summary: There was a deadlock was introduced by [PR #146441](https://github.com/llvm/llvm-project/pull/146441) which changed `CurrentThreadIsPrivateStateThread()` to `CurrentThreadPosesAsPrivateStateThread()`. This change caused the execution path in `ExecutionContextRef::SetTargetPtr()` to now enter a code block that was previously skipped, triggering `GetSelectedFrame()` which leads to a deadlock. In particular, one thread held `m_modules_mutex` and tried to acquire `m_language_runtimes_mutex` (via the notifier call chain), and another thread held `m_language_runtimes_mutex` and tried to acquire `m_modules_mutex` (via `ScanForGNUstepObjCLibraryCandidate`) This fixes the deadlock by adding a scoped block around the mutex lock before the call to the notifier, and moved the notifier call outside of the mutex-guarded section. Test Plan: Tested manually --- lldb/source/Core/ModuleList.cpp | 46 ++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index d5ddf6e846112..4ec093b5bc5b4 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -215,30 +215,34 @@ ModuleList::~ModuleList() = default; void ModuleList::AppendImpl(const ModuleSP &module_sp, bool use_notifier) { if (module_sp) { - std::lock_guard guard(m_modules_mutex); - // We are required to keep the first element of the Module List as the - // executable module. So check here and if the first module is NOT an - // but the new one is, we insert this module at the beginning, rather than - // at the end. - // We don't need to do any of this if the list is empty: - if (m_modules.empty()) { - m_modules.push_back(module_sp); - } else { - // Since producing the ObjectFile may take some work, first check the 0th - // element, and only if that's NOT an executable look at the incoming - // ObjectFile. That way in the normal case we only look at the element - // 0 ObjectFile. - const bool elem_zero_is_executable - = m_modules[0]->GetObjectFile()->GetType() - == ObjectFile::Type::eTypeExecutable; - lldb_private::ObjectFile *obj = module_sp->GetObjectFile(); - if (!elem_zero_is_executable && obj - && obj->GetType() == ObjectFile::Type::eTypeExecutable) { - m_modules.insert(m_modules.begin(), module_sp); - } else { + { + std::lock_guard guard(m_modules_mutex); + // We are required to keep the first element of the Module List as the + // executable module. So check here and if the first module is NOT an + // but the new one is, we insert this module at the beginning, rather than + // at the end. + // We don't need to do any of this if the list is empty: + if (m_modules.empty()) { m_modules.push_back(module_sp); + } else { + // Since producing the ObjectFile may take some work, first check the + // 0th element, and only if that's NOT an executable look at the + // incoming ObjectFile. That way in the normal case we only look at the + // element 0 ObjectFile. + const bool elem_zero_is_executable = + m_modules[0]->GetObjectFile()->GetType() == + ObjectFile::Type::eTypeExecutable; + lldb_private::ObjectFile *obj = module_sp->GetObjectFile(); + if (!elem_zero_is_executable && obj && + obj->GetType() == ObjectFile::Type::eTypeExecutable) { + m_modules.insert(m_modules.begin(), module_sp); + } else { + m_modules.push_back(module_sp); + } } } + // Release the mutex before calling the notifier to avoid deadlock + // NotifyModuleAdded should be thread-safe if (use_notifier && m_notifier) m_notifier->NotifyModuleAdded(*this, module_sp); } From a50d76a5852652fe61351a147d61c262d809839e Mon Sep 17 00:00:00 2001 From: qxy11 Date: Thu, 17 Jul 2025 15:27:52 -0700 Subject: [PATCH 2/6] Add a unit test Summary: Added a unit test that would've failed prior to the fix and passes now. The test launches lldb-server, and connects the client to it with the statusline enabled to trigger the deadlock. I was not able to reproduce the issue reliably with a mock gdb-server. Test Plan: ``` ninja lldb-dotest && bin/lldb-dotest -f TestStatusline.test_modulelist_deadlock ~/llvm/llvm-project/lldb/test/API/functionalities/statusline/ ``` --- .../statusline/TestStatusline.py | 75 ++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/functionalities/statusline/TestStatusline.py b/lldb/test/API/functionalities/statusline/TestStatusline.py index 087e62b387d77..90eda7d625d48 100644 --- a/lldb/test/API/functionalities/statusline/TestStatusline.py +++ b/lldb/test/API/functionalities/statusline/TestStatusline.py @@ -1,10 +1,14 @@ import lldb +import os import re +import socket +import time +from contextlib import closing from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test.lldbpexpect import PExpectTest - +from lldbgdbserverutils import get_lldb_server_exe # PExpect uses many timeouts internally and doesn't play well # under ASAN on a loaded machine.. @@ -115,3 +119,72 @@ def test_resize(self): # Check for the escape code to resize the scroll window. self.child.expect(re.escape("\x1b[1;19r")) self.child.expect("(lldb)") + + @skipIfRemote + @skipIfWindows + @skipIfDarwin + @add_test_categories(["lldb-server"]) + def test_modulelist_deadlock(self): + """Regression test for a deadlock that occurs when the status line is enabled before connecting + to a gdb-remote server. + """ + if get_lldb_server_exe() is None: + self.skipTest("lldb-server not found") + + MAX_RETRY_ATTEMPTS = 10 + DELAY = 0.25 + + def _find_free_port(host): + for attempt in range(MAX_RETRY_ATTEMPTS): + try: + family, type, proto, _, _ = socket.getaddrinfo( + host, 0, proto=socket.IPPROTO_TCP + )[0] + with closing(socket.socket(family, type, proto)) as sock: + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind((host, 0)) + return sock.getsockname() + except OSError: + time.sleep(DELAY * 2 ** attempt) # Exponential backoff + raise RuntimeError("Could not find a free port on {} after {} attempts.".format(host, MAX_RETRY_ATTEMPTS)) + + def _wait_for_server_ready_in_log(log_file_path, ready_message): + """Check log file for server ready message with retry logic""" + for attempt in range(MAX_RETRY_ATTEMPTS): + if os.path.exists(log_file_path): + with open(log_file_path, 'r') as f: + if ready_message in f.read(): + return + time.sleep(pow(2, attempt) * DELAY) + raise RuntimeError("Server not ready after {} attempts.".format(MAX_RETRY_ATTEMPTS)) + + self.build() + exe_path = self.getBuildArtifact("a.out") + server_log_file = self.getLogBasenameForCurrentTest() + "-lldbserver.log" + self.addTearDownHook(lambda: os.path.exists(server_log_file) and os.remove(server_log_file)) + + # Find a free port for the server + addr = _find_free_port("localhost") + connect_address = "[{}]:{}".format(*addr) + commandline_args = [ + "gdbserver", + connect_address, + exe_path, + "--log-file={}".format(server_log_file), + "--log-channels=lldb conn" + ] + + server_proc = self.spawnSubprocess( + get_lldb_server_exe(), commandline_args, install_remote=False + ) + self.assertIsNotNone(server_proc) + + # Wait for server to be ready by checking log file. + server_ready_message = "Listen to {}".format(connect_address) + _wait_for_server_ready_in_log(server_log_file, server_ready_message) + + # Launch LLDB client and connect to lldb-server with statusline enabled + self.launch(timeout=self.TIMEOUT) + self.resize() + self.expect("settings set show-statusline true", ["no target"]) + self.expect(f"gdb-remote {connect_address}", substrs=["Process", "stopped"]) From 24d8a4a5dd81dc050b969e74da2c1d864db5601c Mon Sep 17 00:00:00 2001 From: qxy11 Date: Thu, 17 Jul 2025 16:04:34 -0700 Subject: [PATCH 3/6] Refactor w/ early return Summary: Address comment to return if !module_sp --- lldb/source/Core/ModuleList.cpp | 57 +++++++++++++++++---------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index 4ec093b5bc5b4..1e23a5983d97e 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -214,38 +214,39 @@ const ModuleList &ModuleList::operator=(const ModuleList &rhs) { ModuleList::~ModuleList() = default; void ModuleList::AppendImpl(const ModuleSP &module_sp, bool use_notifier) { - if (module_sp) { - { - std::lock_guard guard(m_modules_mutex); - // We are required to keep the first element of the Module List as the - // executable module. So check here and if the first module is NOT an - // but the new one is, we insert this module at the beginning, rather than - // at the end. - // We don't need to do any of this if the list is empty: - if (m_modules.empty()) { - m_modules.push_back(module_sp); + if (!module_sp) + return; + { + std::lock_guard guard(m_modules_mutex); + // We are required to keep the first element of the Module List as the + // executable module. So check here and if the first module is NOT an + // but the new one is, we insert this module at the beginning, rather than + // at the end. + // We don't need to do any of this if the list is empty: + if (m_modules.empty()) { + m_modules.push_back(module_sp); + } else { + // Since producing the ObjectFile may take some work, first check the + // 0th element, and only if that's NOT an executable look at the + // incoming ObjectFile. That way in the normal case we only look at the + // element 0 ObjectFile. + const bool elem_zero_is_executable = + m_modules[0]->GetObjectFile()->GetType() == + ObjectFile::Type::eTypeExecutable; + lldb_private::ObjectFile *obj = module_sp->GetObjectFile(); + if (!elem_zero_is_executable && obj && + obj->GetType() == ObjectFile::Type::eTypeExecutable) { + m_modules.insert(m_modules.begin(), module_sp); } else { - // Since producing the ObjectFile may take some work, first check the - // 0th element, and only if that's NOT an executable look at the - // incoming ObjectFile. That way in the normal case we only look at the - // element 0 ObjectFile. - const bool elem_zero_is_executable = - m_modules[0]->GetObjectFile()->GetType() == - ObjectFile::Type::eTypeExecutable; - lldb_private::ObjectFile *obj = module_sp->GetObjectFile(); - if (!elem_zero_is_executable && obj && - obj->GetType() == ObjectFile::Type::eTypeExecutable) { - m_modules.insert(m_modules.begin(), module_sp); - } else { - m_modules.push_back(module_sp); - } + m_modules.push_back(module_sp); } } - // Release the mutex before calling the notifier to avoid deadlock - // NotifyModuleAdded should be thread-safe - if (use_notifier && m_notifier) - m_notifier->NotifyModuleAdded(*this, module_sp); } + // Release the mutex before calling the notifier to avoid deadlock + // NotifyModuleAdded should be thread-safe + if (use_notifier && m_notifier) + m_notifier->NotifyModuleAdded(*this, module_sp); + } void ModuleList::Append(const ModuleSP &module_sp, bool notify) { From a92442459512c1fff46905ea9136a1924530ad09 Mon Sep 17 00:00:00 2001 From: qxy11 Date: Thu, 17 Jul 2025 16:09:04 -0700 Subject: [PATCH 4/6] Fix lints on the test --- .../statusline/TestStatusline.py | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/lldb/test/API/functionalities/statusline/TestStatusline.py b/lldb/test/API/functionalities/statusline/TestStatusline.py index 90eda7d625d48..2f0097b5ea8ab 100644 --- a/lldb/test/API/functionalities/statusline/TestStatusline.py +++ b/lldb/test/API/functionalities/statusline/TestStatusline.py @@ -10,6 +10,7 @@ from lldbsuite.test.lldbpexpect import PExpectTest from lldbgdbserverutils import get_lldb_server_exe + # PExpect uses many timeouts internally and doesn't play well # under ASAN on a loaded machine.. @skipIfAsan @@ -145,23 +146,31 @@ def _find_free_port(host): sock.bind((host, 0)) return sock.getsockname() except OSError: - time.sleep(DELAY * 2 ** attempt) # Exponential backoff - raise RuntimeError("Could not find a free port on {} after {} attempts.".format(host, MAX_RETRY_ATTEMPTS)) - + time.sleep(DELAY * 2**attempt) # Exponential backoff + raise RuntimeError( + "Could not find a free port on {} after {} attempts.".format( + host, MAX_RETRY_ATTEMPTS + ) + ) + def _wait_for_server_ready_in_log(log_file_path, ready_message): """Check log file for server ready message with retry logic""" for attempt in range(MAX_RETRY_ATTEMPTS): if os.path.exists(log_file_path): - with open(log_file_path, 'r') as f: + with open(log_file_path, "r") as f: if ready_message in f.read(): return time.sleep(pow(2, attempt) * DELAY) - raise RuntimeError("Server not ready after {} attempts.".format(MAX_RETRY_ATTEMPTS)) - + raise RuntimeError( + "Server not ready after {} attempts.".format(MAX_RETRY_ATTEMPTS) + ) + self.build() exe_path = self.getBuildArtifact("a.out") server_log_file = self.getLogBasenameForCurrentTest() + "-lldbserver.log" - self.addTearDownHook(lambda: os.path.exists(server_log_file) and os.remove(server_log_file)) + self.addTearDownHook( + lambda: os.path.exists(server_log_file) and os.remove(server_log_file) + ) # Find a free port for the server addr = _find_free_port("localhost") @@ -171,18 +180,18 @@ def _wait_for_server_ready_in_log(log_file_path, ready_message): connect_address, exe_path, "--log-file={}".format(server_log_file), - "--log-channels=lldb conn" + "--log-channels=lldb conn", ] server_proc = self.spawnSubprocess( get_lldb_server_exe(), commandline_args, install_remote=False ) self.assertIsNotNone(server_proc) - + # Wait for server to be ready by checking log file. server_ready_message = "Listen to {}".format(connect_address) _wait_for_server_ready_in_log(server_log_file, server_ready_message) - + # Launch LLDB client and connect to lldb-server with statusline enabled self.launch(timeout=self.TIMEOUT) self.resize() From bca8f55df41111062a51334a56459bf29cf28b53 Mon Sep 17 00:00:00 2001 From: qxy11 Date: Thu, 17 Jul 2025 16:23:33 -0700 Subject: [PATCH 5/6] Fix formatting --- lldb/source/Core/ModuleList.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index 1e23a5983d97e..fd306bf16ca0f 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -214,7 +214,7 @@ const ModuleList &ModuleList::operator=(const ModuleList &rhs) { ModuleList::~ModuleList() = default; void ModuleList::AppendImpl(const ModuleSP &module_sp, bool use_notifier) { - if (!module_sp) + if (!module_sp) return; { std::lock_guard guard(m_modules_mutex); @@ -246,7 +246,6 @@ void ModuleList::AppendImpl(const ModuleSP &module_sp, bool use_notifier) { // NotifyModuleAdded should be thread-safe if (use_notifier && m_notifier) m_notifier->NotifyModuleAdded(*this, module_sp); - } void ModuleList::Append(const ModuleSP &module_sp, bool notify) { From 5cf94e96a1592a8941b1d1ad044242049ed9c892 Mon Sep 17 00:00:00 2001 From: qxy11 Date: Thu, 17 Jul 2025 20:03:06 -0700 Subject: [PATCH 6/6] Fix unit test expect str --- lldb/test/API/functionalities/statusline/TestStatusline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/functionalities/statusline/TestStatusline.py b/lldb/test/API/functionalities/statusline/TestStatusline.py index 2f0097b5ea8ab..33cd79736dc38 100644 --- a/lldb/test/API/functionalities/statusline/TestStatusline.py +++ b/lldb/test/API/functionalities/statusline/TestStatusline.py @@ -196,4 +196,6 @@ def _wait_for_server_ready_in_log(log_file_path, ready_message): self.launch(timeout=self.TIMEOUT) self.resize() self.expect("settings set show-statusline true", ["no target"]) - self.expect(f"gdb-remote {connect_address}", substrs=["Process", "stopped"]) + self.expect( + f"gdb-remote {connect_address}", [b"a.out \xe2\x94\x82 signal SIGSTOP"] + )