-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Offload] Replace device info queue with a tree #144050
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Previously, device info was returned as a queue with each element having a "Level" field indicating its nesting level. This replaces this queue with a more traditional tree-like structure. This should not result in a change to the output of `llvm-offload-device-info`.
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-offload Author: Ross Brunton (RossBrunton) ChangesPreviously, device info was returned as a queue with each element having This should not result in a change to the output of Full diff: https://github.com/llvm/llvm-project/pull/144050.diff 6 Files Affected:
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 0a784cddeaecb..770c212d804d2 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -229,26 +229,19 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
// Find the info if it exists under any of the given names
auto GetInfo = [&](std::vector<std::string> Names) {
- InfoQueueTy DevInfo;
if (Device == HostDevice())
return std::string("Host");
if (!Device->Device)
return std::string("");
- if (auto Err = Device->Device->obtainInfoImpl(DevInfo))
+ auto Info = Device->Device->obtainInfoImpl();
+ if (auto Err = Info.takeError())
return std::string("");
for (auto Name : Names) {
- auto InfoKeyMatches = [&](const InfoQueueTy::InfoQueueEntryTy &Info) {
- return Info.Key == Name;
- };
- auto Item = std::find_if(DevInfo.getQueue().begin(),
- DevInfo.getQueue().end(), InfoKeyMatches);
-
- if (Item != std::end(DevInfo.getQueue())) {
- return Item->Value;
- }
+ if (auto Entry = Info->get(Name))
+ return (*Entry)->Value;
}
return std::string("");
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index e4c32713e2c15..73e1e66928fac 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2551,7 +2551,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}
/// Print information about the device.
- Error obtainInfoImpl(InfoQueueTy &Info) override {
+ Expected<InfoTreeNode> obtainInfoImpl() override {
char TmpChar[1000];
const char *TmpCharPtr = "Unknown";
uint16_t Major, Minor;
@@ -2562,6 +2562,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
uint16_t WorkgrpMaxDim[3];
hsa_dim3_t GridMaxDim;
hsa_status_t Status, Status2;
+ InfoTreeNode Info;
Status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &Major);
Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor);
@@ -2617,11 +2618,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// runtime.
Status = getDeviceAttrRaw(HSA_AGENT_INFO_CACHE_SIZE, CacheSize);
if (Status == HSA_STATUS_SUCCESS) {
- Info.add("Cache");
+ auto &Cache = *Info.add("Cache");
for (int I = 0; I < 4; I++)
if (CacheSize[I])
- Info.add<InfoLevel2>("L" + std::to_string(I), CacheSize[I]);
+ Cache.add("L" + std::to_string(I), CacheSize[I]);
}
Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_CACHELINE_SIZE, TmpUInt);
@@ -2654,10 +2655,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
- Info.add("Workgroup Max Size per Dimension");
- Info.add<InfoLevel2>("x", WorkgrpMaxDim[0]);
- Info.add<InfoLevel2>("y", WorkgrpMaxDim[1]);
- Info.add<InfoLevel2>("z", WorkgrpMaxDim[2]);
+ auto &MaxSize = *Info.add("Workgroup Max Size per Dimension");
+ MaxSize.add("x", WorkgrpMaxDim[0]);
+ MaxSize.add("y", WorkgrpMaxDim[1]);
+ MaxSize.add("z", WorkgrpMaxDim[2]);
}
Status = getDeviceAttrRaw(
@@ -2673,17 +2674,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_DIM, GridMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
- Info.add("Grid Max Size per Dimension");
- Info.add<InfoLevel2>("x", GridMaxDim.x);
- Info.add<InfoLevel2>("y", GridMaxDim.y);
- Info.add<InfoLevel2>("z", GridMaxDim.z);
+ auto &MaxDim = *Info.add("Grid Max Size per Dimension");
+ MaxDim.add("x", GridMaxDim.x);
+ MaxDim.add("y", GridMaxDim.y);
+ MaxDim.add("z", GridMaxDim.z);
}
Status = getDeviceAttrRaw(HSA_AGENT_INFO_FBARRIER_MAX_SIZE, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Max fbarriers/Workgrp", TmpUInt);
- Info.add("Memory Pools");
+ auto &RootPool = *Info.add("Memory Pools");
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
std::string TmpStr, TmpStr2;
@@ -2698,7 +2699,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
else
TmpStr = "Unknown";
- Info.add<InfoLevel2>(std::string("Pool ") + TmpStr);
+ auto &PoolNode = *RootPool.add(std::string("Pool ") + TmpStr);
if (Pool->isGlobal()) {
if (Pool->isFineGrained())
@@ -2708,39 +2709,39 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Pool->supportsKernelArgs())
TmpStr2 += "Kernarg ";
- Info.add<InfoLevel3>("Flags", TmpStr2);
+ PoolNode.add("Flags", TmpStr2);
}
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel3>("Size", TmpSt, "bytes");
+ PoolNode.add("Size", TmpSt, "bytes");
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
TmpBool);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel3>("Allocatable", TmpBool);
+ PoolNode.add("Allocatable", TmpBool);
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
TmpSt);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel3>("Runtime Alloc Granule", TmpSt, "bytes");
+ PoolNode.add("Runtime Alloc Granule", TmpSt, "bytes");
Status = Pool->getAttrRaw(
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, TmpSt);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel3>("Runtime Alloc Alignment", TmpSt, "bytes");
+ PoolNode.add("Runtime Alloc Alignment", TmpSt, "bytes");
Status =
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, TmpBool);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel3>("Accessible by all", TmpBool);
+ PoolNode.add("Accessible by all", TmpBool);
}
- Info.add("ISAs");
+ auto &ISAs = *Info.add("ISAs");
auto Err = hsa_utils::iterateAgentISAs(getAgent(), [&](hsa_isa_t ISA) {
Status = hsa_isa_get_info_alt(ISA, HSA_ISA_INFO_NAME, TmpChar);
if (Status == HSA_STATUS_SUCCESS)
- Info.add<InfoLevel2>("Name", TmpChar);
+ ISAs.add("Name", TmpChar);
return Status;
});
@@ -2749,7 +2750,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Err)
consumeError(std::move(Err));
- return Plugin::success();
+ return Info;
}
/// Returns true if auto zero-copy the best configuration for the current
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index d2437908a0a6f..f5d995532b7a5 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -112,77 +112,100 @@ struct AsyncInfoWrapperTy {
__tgt_async_info *AsyncInfoPtr;
};
-/// The information level represents the level of a key-value property in the
-/// info tree print (i.e. indentation). The first level should be the default.
-enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 };
-
-/// Class for storing device information and later be printed. An object of this
-/// type acts as a queue of key-value properties. Each property has a key, a
-/// a value, and an optional unit for the value. For printing purposes, the
-/// information can be classified into several levels. These levels are useful
-/// for defining sections and subsections. Thus, each key-value property also
-/// has an additional field indicating to which level belongs to. Notice that
-/// we use the level to determine the indentation of the key-value property at
-/// printing time. See the enum InfoLevelKind for the list of accepted levels.
-class InfoQueueTy {
-public:
- struct InfoQueueEntryTy {
- std::string Key;
- std::string Value;
- std::string Units;
- uint64_t Level;
- };
-
-private:
- std::deque<InfoQueueEntryTy> Queue;
-
-public:
- /// Add a new info entry to the queue. The entry requires at least a key
- /// string in \p Key. The value in \p Value is optional and can be any type
- /// that is representable as a string. The units in \p Units is optional and
- /// must be a string. The info level is a template parameter that defaults to
- /// the first level (top level).
- template <InfoLevelKind L = InfoLevel1, typename T = std::string>
- void add(const std::string &Key, T Value = T(),
- const std::string &Units = std::string()) {
+/// Tree node for device information
+///
+/// This information is either printed or used by liboffload to extract certain
+/// device queries. Each property has an optional key, an optional value
+/// and optional children. The children can be used to store additional
+/// information (such as x, y and z components of ranges).
+struct InfoTreeNode {
+ static constexpr uint64_t IndentSize = 4;
+
+ std::string Key;
+ std::string Value;
+ std::string Units;
+ // Need to specify a default value number of elements here as `InfoTreeNode`'s
+ // size is unknown. This is a vector (rather than a Key->Value map) since:
+ // * The keys need to be owned and thus `std::string`s
+ // * The order of keys is important
+ // * The same key can appear multiple times
+ std::unique_ptr<llvm::SmallVector<InfoTreeNode, 8>> Children;
+
+ InfoTreeNode() : InfoTreeNode("", "", "") {}
+ InfoTreeNode(std::string Key, std::string Value, std::string Units)
+ : Key(Key), Value(Value), Units(Units) {}
+
+ /// Add a new info entry as a child of this node. The entry requires at least
+ /// a key string in \p Key. The value in \p Value is optional and can be any
+ /// type that is representable as a string. The units in \p Units is optional
+ /// and must be a string.
+ template <typename T = std::string>
+ InfoTreeNode *add(std::string Key, T Value = T(),
+ const std::string &Units = std::string()) {
assert(!Key.empty() && "Invalid info key");
- // Convert the value to a string depending on its type.
+ if (!Children)
+ Children = std::make_unique<llvm::SmallVector<InfoTreeNode, 8>>();
+
+ std::string ValueStr;
if constexpr (std::is_same_v<T, bool>)
- Queue.push_back({Key, Value ? "Yes" : "No", Units, L});
+ ValueStr = Value ? "Yes" : "No";
else if constexpr (std::is_arithmetic_v<T>)
- Queue.push_back({Key, std::to_string(Value), Units, L});
+ ValueStr = std::to_string(Value);
else
- Queue.push_back({Key, Value, Units, L});
+ ValueStr = Value;
+
+ return &Children->emplace_back(Key, ValueStr, Units);
}
- const std::deque<InfoQueueEntryTy> &getQueue() const { return Queue; }
+ std::optional<InfoTreeNode *> get(StringRef Key) {
+ if (!Children)
+ return std::nullopt;
- /// Print all info entries added to the queue.
- void print() const {
- // We print four spances for each level.
- constexpr uint64_t IndentSize = 4;
+ auto It = std::find_if(Children->begin(), Children->end(),
+ [&](auto &V) { return V.Key == Key; });
+ if (It == Children->end())
+ return std::nullopt;
+ return It;
+ }
- // Find the maximum key length (level + key) to compute the individual
- // indentation of each entry.
- uint64_t MaxKeySize = 0;
- for (const auto &Entry : Queue) {
- uint64_t KeySize = Entry.Key.size() + Entry.Level * IndentSize;
- if (KeySize > MaxKeySize)
- MaxKeySize = KeySize;
- }
+ /// Print all info entries in the tree
+ void print() const {
+ // Fake an additional indent so that values are offset from the keys
+ doPrint(0, maxKeySize(1));
+ }
- // Print all info entries.
- for (const auto &Entry : Queue) {
+private:
+ void doPrint(int Level, uint64_t MaxKeySize) const {
+ if (Key.size()) {
// Compute the indentations for the current entry.
- uint64_t KeyIndentSize = Entry.Level * IndentSize;
+ uint64_t KeyIndentSize = Level * IndentSize;
uint64_t ValIndentSize =
- MaxKeySize - (Entry.Key.size() + KeyIndentSize) + IndentSize;
+ MaxKeySize - (Key.size() + KeyIndentSize) + IndentSize;
- llvm::outs() << std::string(KeyIndentSize, ' ') << Entry.Key
- << std::string(ValIndentSize, ' ') << Entry.Value
- << (Entry.Units.empty() ? "" : " ") << Entry.Units << "\n";
+ llvm::outs() << std::string(KeyIndentSize, ' ') << Key
+ << std::string(ValIndentSize, ' ') << Value
+ << (Units.empty() ? "" : " ") << Units << "\n";
}
+
+ // Print children
+ if (Children)
+ for (const auto &Entry : *Children)
+ Entry.doPrint(Level + 1, MaxKeySize);
+ }
+
+ // Recursively calculates the maximum width of each key, including indentation
+ uint64_t maxKeySize(int Level) const {
+ uint64_t MaxKeySize = 0;
+
+ if (Children)
+ for (const auto &Entry : *Children) {
+ uint64_t KeySize = Entry.Key.size() + Level * IndentSize;
+ MaxKeySize = std::max(MaxKeySize, KeySize);
+ MaxKeySize = std::max(MaxKeySize, Entry.maxKeySize(Level + 1));
+ }
+
+ return MaxKeySize;
}
};
@@ -871,7 +894,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Print information about the device.
Error printInfo();
- virtual Error obtainInfoImpl(InfoQueueTy &Info) = 0;
+ virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
/// Getters of the grid values.
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index f9a6b3c1f4324..6fd3405d03afa 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1578,14 +1578,14 @@ Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
}
Error GenericDeviceTy::printInfo() {
- InfoQueueTy InfoQueue;
+ auto Info = obtainInfoImpl();
// Get the vendor-specific info entries describing the device properties.
- if (auto Err = obtainInfoImpl(InfoQueue))
+ if (auto Err = Info.takeError())
return Err;
// Print all info entries.
- InfoQueue.print();
+ Info->print();
return Plugin::success();
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 44ccfc47a21c9..9943f533ef5a8 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -922,11 +922,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
}
/// Print information about the device.
- Error obtainInfoImpl(InfoQueueTy &Info) override {
+ Expected<InfoTreeNode> obtainInfoImpl() override {
char TmpChar[1000];
const char *TmpCharPtr;
size_t TmpSt;
int TmpInt;
+ InfoTreeNode Info;
CUresult Res = cuDriverGetVersion(&TmpInt);
if (Res == CUDA_SUCCESS)
@@ -971,27 +972,27 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (Res == CUDA_SUCCESS)
Info.add("Maximum Threads per Block", TmpInt);
- Info.add("Maximum Block Dimensions", "");
+ auto &MaxBlock = *Info.add("Maximum Block Dimensions", "");
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("x", TmpInt);
+ MaxBlock.add("x", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("y", TmpInt);
+ MaxBlock.add("y", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("z", TmpInt);
+ MaxBlock.add("z", TmpInt);
- Info.add("Maximum Grid Dimensions", "");
+ auto &MaxGrid = *Info.add("Maximum Grid Dimensions", "");
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("x", TmpInt);
+ MaxGrid.add("x", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("y", TmpInt);
+ MaxGrid.add("y", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, TmpInt);
if (Res == CUDA_SUCCESS)
- Info.add<InfoLevel2>("z", TmpInt);
+ MaxGrid.add("z", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_PITCH, TmpInt);
if (Res == CUDA_SUCCESS)
@@ -1087,7 +1088,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
Info.add("Compute Capabilities", ComputeCapability.str());
- return Plugin::success();
+ return Info;
}
virtual bool shouldSetupDeviceMemoryPool() const override {
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 9916f4d0ab250..ced9208acaedc 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -326,9 +326,10 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
Error syncEventImpl(void *EventPtr) override { return Plugin::success(); }
/// Print information about the device.
- Error obtainInfoImpl(InfoQueueTy &Info) override {
+ Expected<InfoTreeNode> obtainInfoImpl() override {
+ InfoTreeNode Info;
Info.add("Device Type", "Generic-elf-64bit");
- return Plugin::success();
+ return Info;
}
/// This plugin should not setup the device environment or memory pool.
|
Previously, device info was returned as a queue with each element having a "Level" field indicating its nesting level. This replaces this queue with a more traditional tree-like structure. This should not result in a change to the output of `llvm-offload-device-info`.
Previously, device info was returned as a queue with each element having a "Level" field indicating its nesting level. This replaces this queue with a more traditional tree-like structure. This should not result in a change to the output of `llvm-offload-device-info`.
Previously, device info was returned as a queue with each element having
a "Level" field indicating its nesting level. This replaces this queue
with a more traditional tree-like structure.
This should not result in a change to the output of
llvm-offload-device-info
.