Skip to content

Parallelize module loading in POSIX dyld code #130912

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lldb/include/lldb/Target/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ class TargetProperties : public Properties {

llvm::StringRef GetLaunchWorkingDirectory() const;

bool GetParallelModuleLoad() const;

const char *GetDisassemblyFlavor() const;

const char *GetDisassemblyCPU() const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "DynamicLoaderPOSIXDYLD.h"

#include "lldb/Breakpoint/BreakpointLocation.h"
#include "lldb/Core/Debugger.h"
#include "lldb/Core/Module.h"
#include "lldb/Core/ModuleSpec.h"
#include "lldb/Core/PluginManager.h"
Expand All @@ -25,6 +26,7 @@
#include "lldb/Utility/LLDBLog.h"
#include "lldb/Utility/Log.h"
#include "lldb/Utility/ProcessInfo.h"
#include "llvm/Support/ThreadPool.h"

#include <memory>
#include <optional>
Expand Down Expand Up @@ -184,16 +186,37 @@ void DynamicLoaderPOSIXDYLD::DidLaunch() {

Status DynamicLoaderPOSIXDYLD::CanLoadImage() { return Status(); }

void DynamicLoaderPOSIXDYLD::SetLoadedModule(const ModuleSP &module_sp,
addr_t link_map_addr) {
llvm::sys::ScopedWriter lock(m_loaded_modules_rw_mutex);
m_loaded_modules[module_sp] = link_map_addr;
}

void DynamicLoaderPOSIXDYLD::UnloadModule(const ModuleSP &module_sp) {
llvm::sys::ScopedWriter lock(m_loaded_modules_rw_mutex);
m_loaded_modules.erase(module_sp);
}

std::optional<lldb::addr_t>
DynamicLoaderPOSIXDYLD::GetLoadedModuleLinkAddr(const ModuleSP &module_sp) {
llvm::sys::ScopedReader lock(m_loaded_modules_rw_mutex);
auto it = m_loaded_modules.find(module_sp);
if (it != m_loaded_modules.end())
return it->second;
return std::nullopt;
}

void DynamicLoaderPOSIXDYLD::UpdateLoadedSections(ModuleSP module,
addr_t link_map_addr,
addr_t base_addr,
bool base_addr_is_offset) {
m_loaded_modules[module] = link_map_addr;
SetLoadedModule(module, link_map_addr);

UpdateLoadedSectionsCommon(module, base_addr, base_addr_is_offset);
}

void DynamicLoaderPOSIXDYLD::UnloadSections(const ModuleSP module) {
m_loaded_modules.erase(module);
UnloadModule(module);

UnloadSectionsCommon(module);
}
Expand Down Expand Up @@ -401,7 +424,7 @@ void DynamicLoaderPOSIXDYLD::RefreshModules() {
// The rendezvous class doesn't enumerate the main module, so track that
// ourselves here.
ModuleSP executable = GetTargetExecutable();
m_loaded_modules[executable] = m_rendezvous.GetLinkMapAddress();
SetLoadedModule(executable, m_rendezvous.GetLinkMapAddress());

DYLDRendezvous::iterator I;
DYLDRendezvous::iterator E;
Expand All @@ -423,34 +446,70 @@ void DynamicLoaderPOSIXDYLD::RefreshModules() {
E = m_rendezvous.end();
m_initial_modules_added = true;
}
for (; I != E; ++I) {
// Don't load a duplicate copy of ld.so if we have already loaded it
// earlier in LoadInterpreterModule. If we instead loaded then unloaded it
// later, the section information for ld.so would be removed. That
// information is required for placing breakpoints on Arm/Thumb systems.
if ((m_interpreter_module.lock() != nullptr) &&
(I->base_addr == m_interpreter_base))
continue;

ModuleSP module_sp =
LoadModuleAtAddress(I->file_spec, I->link_addr, I->base_addr, true);
if (!module_sp.get())
continue;

if (module_sp->GetObjectFile()->GetBaseAddress().GetLoadAddress(
&m_process->GetTarget()) == m_interpreter_base) {
ModuleSP interpreter_sp = m_interpreter_module.lock();
if (m_interpreter_module.lock() == nullptr) {
m_interpreter_module = module_sp;
} else if (module_sp == interpreter_sp) {
// Module already loaded.
continue;
}
}

loaded_modules.AppendIfNeeded(module_sp);
new_modules.Append(module_sp);
// Synchronize reading and writing of `m_interpreter_module`.
std::mutex interpreter_module_mutex;
// We should be able to take SOEntry as reference since the data
// exists for the duration of this call in `m_rendezvous`.
auto load_module_fn =
[this, &loaded_modules, &new_modules,
&interpreter_module_mutex](const DYLDRendezvous::SOEntry &so_entry) {
// Don't load a duplicate copy of ld.so if we have already loaded it
// earlier in LoadInterpreterModule. If we instead loaded then
// unloaded it later, the section information for ld.so would be
// removed. That information is required for placing breakpoints on
// Arm/Thumb systems.
{
// `m_interpreter_module` may be modified by another thread at the
// same time, so we guard the access here.
std::lock_guard<std::mutex> lock(interpreter_module_mutex);
if ((m_interpreter_module.lock() != nullptr) &&
(so_entry.base_addr == m_interpreter_base))
return;
}

ModuleSP module_sp = LoadModuleAtAddress(
so_entry.file_spec, so_entry.link_addr, so_entry.base_addr, true);
if (!module_sp.get())
return;

{
// `m_interpreter_module` may be modified by another thread at the
// same time, so we guard the access here.
std::lock_guard<std::mutex> lock(interpreter_module_mutex);
// Set the interpreter module, if this is the interpreter.
if (module_sp->GetObjectFile()->GetBaseAddress().GetLoadAddress(
&m_process->GetTarget()) == m_interpreter_base) {
ModuleSP interpreter_sp = m_interpreter_module.lock();
if (m_interpreter_module.lock() == nullptr) {
m_interpreter_module = module_sp;
} else if (module_sp == interpreter_sp) {
// Module already loaded.
return;
}
}
}

// Note: in a multi-threaded environment, these module lists may be
// appended to out-of-order. This is fine, since there's no
// expectation for `loaded_modules` or `new_modules` to be in any
// particular order, and appending to each module list is thread-safe.
// Also, `new_modules` is only used for the `ModulesDidLoad` call at
// the end of this function.
loaded_modules.AppendIfNeeded(module_sp);
new_modules.Append(module_sp);
Comment on lines +499 to +500
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we ever get into trouble because this operation isn't atomic? Both ModuleLists are thread safe, but is there a situation where another thread could see a module in one but not the other? Given that we only append to it in the lambda, I suspect it's fine. Maybe it's worth adding a comment here, something along the lines of we don't need to synchronize the module list as it's thread safe and no two threads look at the two lists at the same time, or something.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, so the threads don't read from new_modules during loading, and new_modules is only used after all modules have been loaded in parallel in ModulesDidLoad, which does some more initialization synchronously.

I added a comment to explain this -- hopefully the comment is clear enough.

};

if (m_process->GetTarget().GetParallelModuleLoad()) {
llvm::ThreadPoolTaskGroup task_group(Debugger::GetThreadPool());
for (; I != E; ++I)
task_group.async(load_module_fn, *I);
task_group.wait();
} else {
for (; I != E; ++I)
load_module_fn(*I);
}

m_process->GetTarget().ModulesDidLoad(new_modules);
}

Expand Down Expand Up @@ -636,27 +695,39 @@ void DynamicLoaderPOSIXDYLD::LoadAllCurrentModules() {
// The rendezvous class doesn't enumerate the main module, so track that
// ourselves here.
ModuleSP executable = GetTargetExecutable();
m_loaded_modules[executable] = m_rendezvous.GetLinkMapAddress();
SetLoadedModule(executable, m_rendezvous.GetLinkMapAddress());

std::vector<FileSpec> module_names;
for (I = m_rendezvous.begin(), E = m_rendezvous.end(); I != E; ++I)
module_names.push_back(I->file_spec);
m_process->PrefetchModuleSpecs(
module_names, m_process->GetTarget().GetArchitecture().GetTriple());

for (I = m_rendezvous.begin(), E = m_rendezvous.end(); I != E; ++I) {
ModuleSP module_sp =
LoadModuleAtAddress(I->file_spec, I->link_addr, I->base_addr, true);
auto load_module_fn = [this, &module_list,
&log](const DYLDRendezvous::SOEntry &so_entry) {
ModuleSP module_sp = LoadModuleAtAddress(
so_entry.file_spec, so_entry.link_addr, so_entry.base_addr, true);
if (module_sp.get()) {
LLDB_LOG(log, "LoadAllCurrentModules loading module: {0}",
I->file_spec.GetFilename());
so_entry.file_spec.GetFilename());
module_list.Append(module_sp);
} else {
Log *log = GetLog(LLDBLog::DynamicLoader);
LLDB_LOGF(
log,
"DynamicLoaderPOSIXDYLD::%s failed loading module %s at 0x%" PRIx64,
__FUNCTION__, I->file_spec.GetPath().c_str(), I->base_addr);
__FUNCTION__, so_entry.file_spec.GetPath().c_str(),
so_entry.base_addr);
}
};
if (m_process->GetTarget().GetParallelModuleLoad()) {
llvm::ThreadPoolTaskGroup task_group(Debugger::GetThreadPool());
for (I = m_rendezvous.begin(), E = m_rendezvous.end(); I != E; ++I)
task_group.async(load_module_fn, *I);
task_group.wait();
} else {
for (I = m_rendezvous.begin(), E = m_rendezvous.end(); I != E; ++I) {
load_module_fn(*I);
}
}

Expand Down Expand Up @@ -728,15 +799,15 @@ DynamicLoaderPOSIXDYLD::GetThreadLocalData(const lldb::ModuleSP module_sp,
const lldb::ThreadSP thread,
lldb::addr_t tls_file_addr) {
Log *log = GetLog(LLDBLog::DynamicLoader);
auto it = m_loaded_modules.find(module_sp);
if (it == m_loaded_modules.end()) {
std::optional<addr_t> link_map_addr_opt = GetLoadedModuleLinkAddr(module_sp);
if (!link_map_addr_opt.has_value()) {
LLDB_LOGF(
log, "GetThreadLocalData error: module(%s) not found in loaded modules",
module_sp->GetObjectName().AsCString());
return LLDB_INVALID_ADDRESS;
}

addr_t link_map = it->second;
addr_t link_map = link_map_addr_opt.value();
if (link_map == LLDB_INVALID_ADDRESS || link_map == 0) {
LLDB_LOGF(log,
"GetThreadLocalData error: invalid link map address=0x%" PRIx64,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,6 @@ class DynamicLoaderPOSIXDYLD : public lldb_private::DynamicLoader {
/// Contains the pointer to the interpret module, if loaded.
std::weak_ptr<lldb_private::Module> m_interpreter_module;

/// Loaded module list. (link map for each module)
std::map<lldb::ModuleWP, lldb::addr_t, std::owner_less<lldb::ModuleWP>>
m_loaded_modules;

/// Returns true if the process is for a core file.
bool IsCoreFile() const;

Expand Down Expand Up @@ -180,6 +176,19 @@ class DynamicLoaderPOSIXDYLD : public lldb_private::DynamicLoader {
DynamicLoaderPOSIXDYLD(const DynamicLoaderPOSIXDYLD &) = delete;
const DynamicLoaderPOSIXDYLD &
operator=(const DynamicLoaderPOSIXDYLD &) = delete;

/// Loaded module list. (link map for each module)
/// This may be accessed in a multi-threaded context. Use the accessor methods
/// to access `m_loaded_modules` safely.
std::map<lldb::ModuleWP, lldb::addr_t, std::owner_less<lldb::ModuleWP>>
m_loaded_modules;
llvm::sys::RWMutex m_loaded_modules_rw_mutex;

void SetLoadedModule(const lldb::ModuleSP &module_sp,
lldb::addr_t link_map_addr);
void UnloadModule(const lldb::ModuleSP &module_sp);
std::optional<lldb::addr_t>
GetLoadedModuleLinkAddr(const lldb::ModuleSP &module_sp);
};

#endif // LLDB_SOURCE_PLUGINS_DYNAMICLOADER_POSIX_DYLD_DYNAMICLOADERPOSIXDYLD_H
6 changes: 6 additions & 0 deletions lldb/source/Target/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4488,6 +4488,12 @@ llvm::StringRef TargetProperties::GetLaunchWorkingDirectory() const {
idx, g_target_properties[idx].default_cstr_value);
}

bool TargetProperties::GetParallelModuleLoad() const {
const uint32_t idx = ePropertyParallelModuleLoad;
return GetPropertyAtIndexAs<bool>(
idx, g_target_properties[idx].default_uint_value != 0);
}

const char *TargetProperties::GetDisassemblyFlavor() const {
const uint32_t idx = ePropertyDisassemblyFlavor;
const char *return_value;
Expand Down
3 changes: 3 additions & 0 deletions lldb/source/Target/TargetProperties.td
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@ let Definition = "target" in {
"launched. If you change this setting, the new value will only apply to "
"subsequent launches. Commands that take an explicit working directory "
"will override this setting.">;
def ParallelModuleLoad: Property<"parallel-module-load", "Boolean">,
DefaultTrue,
Desc<"Enable loading of modules in parallel for the dynamic loader.">;
}

let Definition = "process_experimental" in {
Expand Down
Loading