diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 0a784cddeaecb..770c212d804d2 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -229,26 +229,19 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device, // Find the info if it exists under any of the given names auto GetInfo = [&](std::vector Names) { - InfoQueueTy DevInfo; if (Device == HostDevice()) return std::string("Host"); if (!Device->Device) return std::string(""); - if (auto Err = Device->Device->obtainInfoImpl(DevInfo)) + auto Info = Device->Device->obtainInfoImpl(); + if (auto Err = Info.takeError()) return std::string(""); for (auto Name : Names) { - auto InfoKeyMatches = [&](const InfoQueueTy::InfoQueueEntryTy &Info) { - return Info.Key == Name; - }; - auto Item = std::find_if(DevInfo.getQueue().begin(), - DevInfo.getQueue().end(), InfoKeyMatches); - - if (Item != std::end(DevInfo.getQueue())) { - return Item->Value; - } + if (auto Entry = Info->get(Name)) + return (*Entry)->Value; } return std::string(""); diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index e4c32713e2c15..73e1e66928fac 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2551,7 +2551,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { } /// Print information about the device. - Error obtainInfoImpl(InfoQueueTy &Info) override { + Expected obtainInfoImpl() override { char TmpChar[1000]; const char *TmpCharPtr = "Unknown"; uint16_t Major, Minor; @@ -2562,6 +2562,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { uint16_t WorkgrpMaxDim[3]; hsa_dim3_t GridMaxDim; hsa_status_t Status, Status2; + InfoTreeNode Info; Status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &Major); Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor); @@ -2617,11 +2618,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { // runtime. Status = getDeviceAttrRaw(HSA_AGENT_INFO_CACHE_SIZE, CacheSize); if (Status == HSA_STATUS_SUCCESS) { - Info.add("Cache"); + auto &Cache = *Info.add("Cache"); for (int I = 0; I < 4; I++) if (CacheSize[I]) - Info.add("L" + std::to_string(I), CacheSize[I]); + Cache.add("L" + std::to_string(I), CacheSize[I]); } Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_CACHELINE_SIZE, TmpUInt); @@ -2654,10 +2655,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim); if (Status == HSA_STATUS_SUCCESS) { - Info.add("Workgroup Max Size per Dimension"); - Info.add("x", WorkgrpMaxDim[0]); - Info.add("y", WorkgrpMaxDim[1]); - Info.add("z", WorkgrpMaxDim[2]); + auto &MaxSize = *Info.add("Workgroup Max Size per Dimension"); + MaxSize.add("x", WorkgrpMaxDim[0]); + MaxSize.add("y", WorkgrpMaxDim[1]); + MaxSize.add("z", WorkgrpMaxDim[2]); } Status = getDeviceAttrRaw( @@ -2673,17 +2674,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_DIM, GridMaxDim); if (Status == HSA_STATUS_SUCCESS) { - Info.add("Grid Max Size per Dimension"); - Info.add("x", GridMaxDim.x); - Info.add("y", GridMaxDim.y); - Info.add("z", GridMaxDim.z); + auto &MaxDim = *Info.add("Grid Max Size per Dimension"); + MaxDim.add("x", GridMaxDim.x); + MaxDim.add("y", GridMaxDim.y); + MaxDim.add("z", GridMaxDim.z); } Status = getDeviceAttrRaw(HSA_AGENT_INFO_FBARRIER_MAX_SIZE, TmpUInt); if (Status == HSA_STATUS_SUCCESS) Info.add("Max fbarriers/Workgrp", TmpUInt); - Info.add("Memory Pools"); + auto &RootPool = *Info.add("Memory Pools"); for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) { std::string TmpStr, TmpStr2; @@ -2698,7 +2699,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { else TmpStr = "Unknown"; - Info.add(std::string("Pool ") + TmpStr); + auto &PoolNode = *RootPool.add(std::string("Pool ") + TmpStr); if (Pool->isGlobal()) { if (Pool->isFineGrained()) @@ -2708,39 +2709,39 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { if (Pool->supportsKernelArgs()) TmpStr2 += "Kernarg "; - Info.add("Flags", TmpStr2); + PoolNode.add("Flags", TmpStr2); } Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt); if (Status == HSA_STATUS_SUCCESS) - Info.add("Size", TmpSt, "bytes"); + PoolNode.add("Size", TmpSt, "bytes"); Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, TmpBool); if (Status == HSA_STATUS_SUCCESS) - Info.add("Allocatable", TmpBool); + PoolNode.add("Allocatable", TmpBool); Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, TmpSt); if (Status == HSA_STATUS_SUCCESS) - Info.add("Runtime Alloc Granule", TmpSt, "bytes"); + PoolNode.add("Runtime Alloc Granule", TmpSt, "bytes"); Status = Pool->getAttrRaw( HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, TmpSt); if (Status == HSA_STATUS_SUCCESS) - Info.add("Runtime Alloc Alignment", TmpSt, "bytes"); + PoolNode.add("Runtime Alloc Alignment", TmpSt, "bytes"); Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, TmpBool); if (Status == HSA_STATUS_SUCCESS) - Info.add("Accessible by all", TmpBool); + PoolNode.add("Accessible by all", TmpBool); } - Info.add("ISAs"); + auto &ISAs = *Info.add("ISAs"); auto Err = hsa_utils::iterateAgentISAs(getAgent(), [&](hsa_isa_t ISA) { Status = hsa_isa_get_info_alt(ISA, HSA_ISA_INFO_NAME, TmpChar); if (Status == HSA_STATUS_SUCCESS) - Info.add("Name", TmpChar); + ISAs.add("Name", TmpChar); return Status; }); @@ -2749,7 +2750,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { if (Err) consumeError(std::move(Err)); - return Plugin::success(); + return Info; } /// Returns true if auto zero-copy the best configuration for the current diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..f5d995532b7a5 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -112,77 +112,100 @@ struct AsyncInfoWrapperTy { __tgt_async_info *AsyncInfoPtr; }; -/// The information level represents the level of a key-value property in the -/// info tree print (i.e. indentation). The first level should be the default. -enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 }; - -/// Class for storing device information and later be printed. An object of this -/// type acts as a queue of key-value properties. Each property has a key, a -/// a value, and an optional unit for the value. For printing purposes, the -/// information can be classified into several levels. These levels are useful -/// for defining sections and subsections. Thus, each key-value property also -/// has an additional field indicating to which level belongs to. Notice that -/// we use the level to determine the indentation of the key-value property at -/// printing time. See the enum InfoLevelKind for the list of accepted levels. -class InfoQueueTy { -public: - struct InfoQueueEntryTy { - std::string Key; - std::string Value; - std::string Units; - uint64_t Level; - }; - -private: - std::deque Queue; - -public: - /// Add a new info entry to the queue. The entry requires at least a key - /// string in \p Key. The value in \p Value is optional and can be any type - /// that is representable as a string. The units in \p Units is optional and - /// must be a string. The info level is a template parameter that defaults to - /// the first level (top level). - template - void add(const std::string &Key, T Value = T(), - const std::string &Units = std::string()) { +/// Tree node for device information +/// +/// This information is either printed or used by liboffload to extract certain +/// device queries. Each property has an optional key, an optional value +/// and optional children. The children can be used to store additional +/// information (such as x, y and z components of ranges). +struct InfoTreeNode { + static constexpr uint64_t IndentSize = 4; + + std::string Key; + std::string Value; + std::string Units; + // Need to specify a default value number of elements here as `InfoTreeNode`'s + // size is unknown. This is a vector (rather than a Key->Value map) since: + // * The keys need to be owned and thus `std::string`s + // * The order of keys is important + // * The same key can appear multiple times + std::unique_ptr> Children; + + InfoTreeNode() : InfoTreeNode("", "", "") {} + InfoTreeNode(std::string Key, std::string Value, std::string Units) + : Key(Key), Value(Value), Units(Units) {} + + /// Add a new info entry as a child of this node. The entry requires at least + /// a key string in \p Key. The value in \p Value is optional and can be any + /// type that is representable as a string. The units in \p Units is optional + /// and must be a string. + template + InfoTreeNode *add(std::string Key, T Value = T(), + const std::string &Units = std::string()) { assert(!Key.empty() && "Invalid info key"); - // Convert the value to a string depending on its type. + if (!Children) + Children = std::make_unique>(); + + std::string ValueStr; if constexpr (std::is_same_v) - Queue.push_back({Key, Value ? "Yes" : "No", Units, L}); + ValueStr = Value ? "Yes" : "No"; else if constexpr (std::is_arithmetic_v) - Queue.push_back({Key, std::to_string(Value), Units, L}); + ValueStr = std::to_string(Value); else - Queue.push_back({Key, Value, Units, L}); + ValueStr = Value; + + return &Children->emplace_back(Key, ValueStr, Units); } - const std::deque &getQueue() const { return Queue; } + std::optional get(StringRef Key) { + if (!Children) + return std::nullopt; - /// Print all info entries added to the queue. - void print() const { - // We print four spances for each level. - constexpr uint64_t IndentSize = 4; + auto It = std::find_if(Children->begin(), Children->end(), + [&](auto &V) { return V.Key == Key; }); + if (It == Children->end()) + return std::nullopt; + return It; + } - // Find the maximum key length (level + key) to compute the individual - // indentation of each entry. - uint64_t MaxKeySize = 0; - for (const auto &Entry : Queue) { - uint64_t KeySize = Entry.Key.size() + Entry.Level * IndentSize; - if (KeySize > MaxKeySize) - MaxKeySize = KeySize; - } + /// Print all info entries in the tree + void print() const { + // Fake an additional indent so that values are offset from the keys + doPrint(0, maxKeySize(1)); + } - // Print all info entries. - for (const auto &Entry : Queue) { +private: + void doPrint(int Level, uint64_t MaxKeySize) const { + if (Key.size()) { // Compute the indentations for the current entry. - uint64_t KeyIndentSize = Entry.Level * IndentSize; + uint64_t KeyIndentSize = Level * IndentSize; uint64_t ValIndentSize = - MaxKeySize - (Entry.Key.size() + KeyIndentSize) + IndentSize; + MaxKeySize - (Key.size() + KeyIndentSize) + IndentSize; - llvm::outs() << std::string(KeyIndentSize, ' ') << Entry.Key - << std::string(ValIndentSize, ' ') << Entry.Value - << (Entry.Units.empty() ? "" : " ") << Entry.Units << "\n"; + llvm::outs() << std::string(KeyIndentSize, ' ') << Key + << std::string(ValIndentSize, ' ') << Value + << (Units.empty() ? "" : " ") << Units << "\n"; } + + // Print children + if (Children) + for (const auto &Entry : *Children) + Entry.doPrint(Level + 1, MaxKeySize); + } + + // Recursively calculates the maximum width of each key, including indentation + uint64_t maxKeySize(int Level) const { + uint64_t MaxKeySize = 0; + + if (Children) + for (const auto &Entry : *Children) { + uint64_t KeySize = Entry.Key.size() + Level * IndentSize; + MaxKeySize = std::max(MaxKeySize, KeySize); + MaxKeySize = std::max(MaxKeySize, Entry.maxKeySize(Level + 1)); + } + + return MaxKeySize; } }; @@ -871,7 +894,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// Print information about the device. Error printInfo(); - virtual Error obtainInfoImpl(InfoQueueTy &Info) = 0; + virtual Expected obtainInfoImpl() = 0; /// Getters of the grid values. uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; } diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index f9a6b3c1f4324..6fd3405d03afa 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -1578,14 +1578,14 @@ Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) { } Error GenericDeviceTy::printInfo() { - InfoQueueTy InfoQueue; + auto Info = obtainInfoImpl(); // Get the vendor-specific info entries describing the device properties. - if (auto Err = obtainInfoImpl(InfoQueue)) + if (auto Err = Info.takeError()) return Err; // Print all info entries. - InfoQueue.print(); + Info->print(); return Plugin::success(); } diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 44ccfc47a21c9..9943f533ef5a8 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -922,11 +922,12 @@ struct CUDADeviceTy : public GenericDeviceTy { } /// Print information about the device. - Error obtainInfoImpl(InfoQueueTy &Info) override { + Expected obtainInfoImpl() override { char TmpChar[1000]; const char *TmpCharPtr; size_t TmpSt; int TmpInt; + InfoTreeNode Info; CUresult Res = cuDriverGetVersion(&TmpInt); if (Res == CUDA_SUCCESS) @@ -971,27 +972,27 @@ struct CUDADeviceTy : public GenericDeviceTy { if (Res == CUDA_SUCCESS) Info.add("Maximum Threads per Block", TmpInt); - Info.add("Maximum Block Dimensions", ""); + auto &MaxBlock = *Info.add("Maximum Block Dimensions", ""); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, TmpInt); if (Res == CUDA_SUCCESS) - Info.add("x", TmpInt); + MaxBlock.add("x", TmpInt); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, TmpInt); if (Res == CUDA_SUCCESS) - Info.add("y", TmpInt); + MaxBlock.add("y", TmpInt); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, TmpInt); if (Res == CUDA_SUCCESS) - Info.add("z", TmpInt); + MaxBlock.add("z", TmpInt); - Info.add("Maximum Grid Dimensions", ""); + auto &MaxGrid = *Info.add("Maximum Grid Dimensions", ""); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, TmpInt); if (Res == CUDA_SUCCESS) - Info.add("x", TmpInt); + MaxGrid.add("x", TmpInt); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, TmpInt); if (Res == CUDA_SUCCESS) - Info.add("y", TmpInt); + MaxGrid.add("y", TmpInt); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, TmpInt); if (Res == CUDA_SUCCESS) - Info.add("z", TmpInt); + MaxGrid.add("z", TmpInt); Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_PITCH, TmpInt); if (Res == CUDA_SUCCESS) @@ -1087,7 +1088,7 @@ struct CUDADeviceTy : public GenericDeviceTy { Info.add("Compute Capabilities", ComputeCapability.str()); - return Plugin::success(); + return Info; } virtual bool shouldSetupDeviceMemoryPool() const override { diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index 9916f4d0ab250..ced9208acaedc 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -326,9 +326,10 @@ struct GenELF64DeviceTy : public GenericDeviceTy { Error syncEventImpl(void *EventPtr) override { return Plugin::success(); } /// Print information about the device. - Error obtainInfoImpl(InfoQueueTy &Info) override { + Expected obtainInfoImpl() override { + InfoTreeNode Info; Info.add("Device Type", "Generic-elf-64bit"); - return Plugin::success(); + return Info; } /// This plugin should not setup the device environment or memory pool.