Skip to content

[Offload] Replace device info queue with a tree #144050

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions offload/liboffload/src/OffloadImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,26 +229,19 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,

// Find the info if it exists under any of the given names
auto GetInfo = [&](std::vector<std::string> Names) {
InfoQueueTy DevInfo;
if (Device == HostDevice())
return std::string("Host");

if (!Device->Device)
return std::string("");

if (auto Err = Device->Device->obtainInfoImpl(DevInfo))
auto Info = Device->Device->obtainInfoImpl();
if (auto Err = Info.takeError())
return std::string("");

for (auto Name : Names) {
auto InfoKeyMatches = [&](const InfoQueueTy::InfoQueueEntryTy &Info) {
return Info.Key == Name;
};
auto Item = std::find_if(DevInfo.getQueue().begin(),
DevInfo.getQueue().end(), InfoKeyMatches);

if (Item != std::end(DevInfo.getQueue())) {
return Item->Value;
}
if (auto Entry = Info->get(Name))
return (*Entry)->Value;
}

return std::string("");
Expand Down
45 changes: 23 additions & 22 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2551,7 +2551,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}

/// Print information about the device.
Error obtainInfoImpl(InfoQueueTy &Info) override {
Expected<InfoTreeNode> obtainInfoImpl() override {
char TmpChar[1000];
const char *TmpCharPtr = "Unknown";
uint16_t Major, Minor;
Expand All @@ -2562,6 +2562,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
uint16_t WorkgrpMaxDim[3];
hsa_dim3_t GridMaxDim;
hsa_status_t Status, Status2;
InfoTreeNode Info;

Status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &Major);
Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor);
Expand Down Expand Up @@ -2617,11 +2618,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// runtime.
Status = getDeviceAttrRaw(HSA_AGENT_INFO_CACHE_SIZE, CacheSize);
if (Status == HSA_STATUS_SUCCESS) {
Info.add("Cache");
auto &Cache = *Info.add("Cache");

for (int I = 0; I < 4; I++)
if (CacheSize[I])
Info.add<InfoLevel2>("L" + std::to_string(I), CacheSize[I]);
Cache.add("L" + std::to_string(I), CacheSize[I]);
}

Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_CACHELINE_SIZE, TmpUInt);
Expand Down Expand Up @@ -2654,10 +2655,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {

Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
Info.add("Workgroup Max Size per Dimension");
Info.add<InfoLevel2>("x", WorkgrpMaxDim[0]);
Info.add<InfoLevel2>("y", WorkgrpMaxDim[1]);
Info.add<InfoLevel2>("z", WorkgrpMaxDim[2]);
auto &MaxSize = *Info.add("Workgroup Max Size per Dimension");
MaxSize.add("x", WorkgrpMaxDim[0]);
MaxSize.add("y", WorkgrpMaxDim[1]);
MaxSize.add("z", WorkgrpMaxDim[2]);
}

Status = getDeviceAttrRaw(
Expand All @@ -2673,17 +2674,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {

Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_DIM, GridMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
Info.add("Grid Max Size per Dimension");
Info.add<InfoLevel2>("x", GridMaxDim.x);
Info.add<InfoLevel2>("y", GridMaxDim.y);
Info.add<InfoLevel2>("z", GridMaxDim.z);
auto &MaxDim = *Info.add("Grid Max Size per Dimension");
MaxDim.add("x", GridMaxDim.x);
MaxDim.add("y", GridMaxDim.y);
MaxDim.add("z", GridMaxDim.z);
}

Status = getDeviceAttrRaw(HSA_AGENT_INFO_FBARRIER_MAX_SIZE, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Max fbarriers/Workgrp", TmpUInt);

Info.add("Memory Pools");
auto &RootPool = *Info.add("Memory Pools");
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
std::string TmpStr, TmpStr2;

Expand All @@ -2698,7 +2699,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
else
TmpStr = "Unknown";

Info.add<InfoLevel2>(std::string("Pool ") + TmpStr);
auto &PoolNode = *RootPool.add(std::string("Pool ") + TmpStr);

if (Pool->isGlobal()) {
if (Pool->isFineGrained())
Expand All @@ -2708,39 +2709,39 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Pool->supportsKernelArgs())
TmpStr2 += "Kernarg ";

Info.add<InfoLevel3>("Flags", TmpStr2);
PoolNode.add("Flags", TmpStr2);
}

Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel3>("Size", TmpSt, "bytes");
PoolNode.add("Size", TmpSt, "bytes");

Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
TmpBool);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel3>("Allocatable", TmpBool);
PoolNode.add("Allocatable", TmpBool);

Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
TmpSt);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel3>("Runtime Alloc Granule", TmpSt, "bytes");
PoolNode.add("Runtime Alloc Granule", TmpSt, "bytes");

Status = Pool->getAttrRaw(
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, TmpSt);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel3>("Runtime Alloc Alignment", TmpSt, "bytes");
PoolNode.add("Runtime Alloc Alignment", TmpSt, "bytes");

Status =
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, TmpBool);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel3>("Accessible by all", TmpBool);
PoolNode.add("Accessible by all", TmpBool);
}

Info.add("ISAs");
auto &ISAs = *Info.add("ISAs");
auto Err = hsa_utils::iterateAgentISAs(getAgent(), [&](hsa_isa_t ISA) {
Status = hsa_isa_get_info_alt(ISA, HSA_ISA_INFO_NAME, TmpChar);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel2>("Name", TmpChar);
ISAs.add("Name", TmpChar);

return Status;
});
Expand All @@ -2749,7 +2750,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Err)
consumeError(std::move(Err));

return Plugin::success();
return Info;
}

/// Returns true if auto zero-copy the best configuration for the current
Expand Down
139 changes: 81 additions & 58 deletions offload/plugins-nextgen/common/include/PluginInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,77 +112,100 @@ struct AsyncInfoWrapperTy {
__tgt_async_info *AsyncInfoPtr;
};

/// The information level represents the level of a key-value property in the
/// info tree print (i.e. indentation). The first level should be the default.
enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 };

/// Class for storing device information and later be printed. An object of this
/// type acts as a queue of key-value properties. Each property has a key, a
/// a value, and an optional unit for the value. For printing purposes, the
/// information can be classified into several levels. These levels are useful
/// for defining sections and subsections. Thus, each key-value property also
/// has an additional field indicating to which level belongs to. Notice that
/// we use the level to determine the indentation of the key-value property at
/// printing time. See the enum InfoLevelKind for the list of accepted levels.
class InfoQueueTy {
public:
struct InfoQueueEntryTy {
std::string Key;
std::string Value;
std::string Units;
uint64_t Level;
};

private:
std::deque<InfoQueueEntryTy> Queue;

public:
/// Add a new info entry to the queue. The entry requires at least a key
/// string in \p Key. The value in \p Value is optional and can be any type
/// that is representable as a string. The units in \p Units is optional and
/// must be a string. The info level is a template parameter that defaults to
/// the first level (top level).
template <InfoLevelKind L = InfoLevel1, typename T = std::string>
void add(const std::string &Key, T Value = T(),
const std::string &Units = std::string()) {
/// Tree node for device information
///
/// This information is either printed or used by liboffload to extract certain
/// device queries. Each property has an optional key, an optional value
/// and optional children. The children can be used to store additional
/// information (such as x, y and z components of ranges).
struct InfoTreeNode {
static constexpr uint64_t IndentSize = 4;

std::string Key;
std::string Value;
std::string Units;
// Need to specify a default value number of elements here as `InfoTreeNode`'s
// size is unknown. This is a vector (rather than a Key->Value map) since:
// * The keys need to be owned and thus `std::string`s
// * The order of keys is important
// * The same key can appear multiple times
std::unique_ptr<llvm::SmallVector<InfoTreeNode, 8>> Children;

InfoTreeNode() : InfoTreeNode("", "", "") {}
InfoTreeNode(std::string Key, std::string Value, std::string Units)
: Key(Key), Value(Value), Units(Units) {}

/// Add a new info entry as a child of this node. The entry requires at least
/// a key string in \p Key. The value in \p Value is optional and can be any
/// type that is representable as a string. The units in \p Units is optional
/// and must be a string.
template <typename T = std::string>
InfoTreeNode *add(std::string Key, T Value = T(),
const std::string &Units = std::string()) {
assert(!Key.empty() && "Invalid info key");

// Convert the value to a string depending on its type.
if (!Children)
Children = std::make_unique<llvm::SmallVector<InfoTreeNode, 8>>();

std::string ValueStr;
if constexpr (std::is_same_v<T, bool>)
Queue.push_back({Key, Value ? "Yes" : "No", Units, L});
ValueStr = Value ? "Yes" : "No";
else if constexpr (std::is_arithmetic_v<T>)
Queue.push_back({Key, std::to_string(Value), Units, L});
ValueStr = std::to_string(Value);
else
Queue.push_back({Key, Value, Units, L});
ValueStr = Value;

return &Children->emplace_back(Key, ValueStr, Units);
}

const std::deque<InfoQueueEntryTy> &getQueue() const { return Queue; }
std::optional<InfoTreeNode *> get(StringRef Key) {
if (!Children)
return std::nullopt;

/// Print all info entries added to the queue.
void print() const {
// We print four spances for each level.
constexpr uint64_t IndentSize = 4;
auto It = std::find_if(Children->begin(), Children->end(),
[&](auto &V) { return V.Key == Key; });
if (It == Children->end())
return std::nullopt;
return It;
}

// Find the maximum key length (level + key) to compute the individual
// indentation of each entry.
uint64_t MaxKeySize = 0;
for (const auto &Entry : Queue) {
uint64_t KeySize = Entry.Key.size() + Entry.Level * IndentSize;
if (KeySize > MaxKeySize)
MaxKeySize = KeySize;
}
/// Print all info entries in the tree
void print() const {
// Fake an additional indent so that values are offset from the keys
doPrint(0, maxKeySize(1));
}

// Print all info entries.
for (const auto &Entry : Queue) {
private:
void doPrint(int Level, uint64_t MaxKeySize) const {
if (Key.size()) {
// Compute the indentations for the current entry.
uint64_t KeyIndentSize = Entry.Level * IndentSize;
uint64_t KeyIndentSize = Level * IndentSize;
uint64_t ValIndentSize =
MaxKeySize - (Entry.Key.size() + KeyIndentSize) + IndentSize;
MaxKeySize - (Key.size() + KeyIndentSize) + IndentSize;

llvm::outs() << std::string(KeyIndentSize, ' ') << Entry.Key
<< std::string(ValIndentSize, ' ') << Entry.Value
<< (Entry.Units.empty() ? "" : " ") << Entry.Units << "\n";
llvm::outs() << std::string(KeyIndentSize, ' ') << Key
<< std::string(ValIndentSize, ' ') << Value
<< (Units.empty() ? "" : " ") << Units << "\n";
}

// Print children
if (Children)
for (const auto &Entry : *Children)
Entry.doPrint(Level + 1, MaxKeySize);
}

// Recursively calculates the maximum width of each key, including indentation
uint64_t maxKeySize(int Level) const {
uint64_t MaxKeySize = 0;

if (Children)
for (const auto &Entry : *Children) {
uint64_t KeySize = Entry.Key.size() + Level * IndentSize;
MaxKeySize = std::max(MaxKeySize, KeySize);
MaxKeySize = std::max(MaxKeySize, Entry.maxKeySize(Level + 1));
}

return MaxKeySize;
}
};

Expand Down Expand Up @@ -871,7 +894,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {

/// Print information about the device.
Error printInfo();
virtual Error obtainInfoImpl(InfoQueueTy &Info) = 0;
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;

/// Getters of the grid values.
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
Expand Down
6 changes: 3 additions & 3 deletions offload/plugins-nextgen/common/src/PluginInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1578,14 +1578,14 @@ Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
}

Error GenericDeviceTy::printInfo() {
InfoQueueTy InfoQueue;
auto Info = obtainInfoImpl();

// Get the vendor-specific info entries describing the device properties.
if (auto Err = obtainInfoImpl(InfoQueue))
if (auto Err = Info.takeError())
return Err;

// Print all info entries.
InfoQueue.print();
Info->print();

return Plugin::success();
}
Expand Down
Loading
Loading