1111#include  < unordered_set> 
1212
1313#include  " core/common/cpuid_info.h" 
14+ #include  " core/common/logging/logging.h" 
1415#include  " core/session/abi_devices.h" 
1516
16- // // UsingSetupApi 
17+ // // For SetupApi info 
1718#include  < Windows.h> 
1819#include  < SetupAPI.h> 
1920#include  < devguid.h> 
2021#include  < cfgmgr32.h> 
2122#pragma  comment(lib, "setupapi.lib")
2223
23- // // Using  D3D12
24+ // // For  D3D12 info 
2425//  #include <windows.h>
2526#include  < d3d12.h> 
2627#include  < dxgi1_6.h> 
2728#include  < iostream> 
29+ #include  < wrl/client.h> 
30+ using  Microsoft::WRL::ComPtr;
2831
2932#pragma  comment(lib, "d3d12.lib")
3033#pragma  comment(lib, "dxgi.lib")
3134
32- // // Using DXCore. Requires newer Windows SDK than what we target by default.
33- //  these values were added in 10.0.22621.0 as part of DirectXCore API
34- // 
35- //  In theory this #if should be fine, but the QNN ARM64 CI fails even with that applied.
36- //  with the NTDII_VERSION value there...
37- // 
38- //  Defining a local GUID instead.
39- //  #if NTDDI_VERSION < NTDDI_WIN10_RS5
40- //   DEFINE_GUID(DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML, 0xb71b0d41, 0x1088, 0x422f, 0xa2, 0x7c, 0x2, 0x50, 0xb7, 0xd3, 0xa9, 0x88);
41- //   DEFINE_GUID(DXCORE_HARDWARE_TYPE_ATTRIBUTE_NPU, 0xd46140c4, 0xadd7, 0x451b, 0x9e, 0x56, 0x6, 0xfe, 0x8c, 0x3b, 0x58, 0xed);
42- //  #endif
35+ // // For DXCore info.
4336#include  < initguid.h> 
4437#include  < dxcore.h> 
4538#include  < dxcore_interface.h> 
4639#include  < wil/com.h> 
4740
48- // 
49- //  In theory this #if should be fine, but the QNN ARM64 CI fails even with that applied. Not sure what is happening
50- //  with the NTDII_VERSION value there...
51- // 
52- //  Defining a local GUID instead.
5341#include  " core/common/cpuid_info.h" 
5442#include  " core/session/abi_devices.h" 
5543
5644namespace  onnxruntime  {
57- #if  !defined(ORT_MINIMAL_BUILD)
45+ //  unsupported in minimal build. also needs xbox specific handling to be implemented.
46+ #if  !defined(ORT_MINIMAL_BUILD) && !defined(_GAMING_XBOX)
5847namespace  {
5948
6049//  device info we accumulate from various sources
@@ -64,7 +53,6 @@ struct DeviceInfo {
6453  uint32_t  device_id;
6554  std::wstring vendor;
6655  std::wstring description;
67-   std::vector<DWORD> bus_ids;  //  assuming could have multiple GPUs that are the same model
6856  std::unordered_map<std::wstring, std::wstring> metadata;
6957};
7058
@@ -97,14 +85,13 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
9785  for  (auto  guid : guids) {
9886    HDEVINFO devInfo = SetupDiGetClassDevs (&guid, nullptr , nullptr , DIGCF_PRESENT);
9987    if  (devInfo == INVALID_HANDLE_VALUE) {
100-       return  device_info ;
88+       continue ;
10189    }
10290
10391    SP_DEVINFO_DATA devData = {};
10492    devData.cbSize  = sizeof (SP_DEVINFO_DATA);
10593
106-     std::wstring buffer;
107-     buffer.resize (1024 );
94+     WCHAR buffer[1024 ];
10895
10996    for  (DWORD i = 0 ; SetupDiEnumDeviceInfo (devInfo, i, &devData); ++i) {
11097      DWORD size = 0 ;
@@ -114,13 +101,8 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
114101      DeviceInfo* entry = nullptr ;
115102
116103      // // Get hardware ID (contains VEN_xxxx&DEV_xxxx)
117-       if  (SetupDiGetDeviceRegistryPropertyW (devInfo,
118-                                             &devData,
119-                                             SPDRP_HARDWAREID,
120-                                             ®DataType,
121-                                             (PBYTE)buffer.data (),
122-                                             (DWORD)buffer.size (),
123-                                             &size)) {
104+       if  (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_HARDWAREID, ®DataType,
105+                                             (PBYTE)buffer, sizeof (buffer), &size)) {
124106        //  PCI\VEN_xxxx&DEV_yyyy&...
125107        //  ACPI\VEN_xxxx&DEV_yyyy&... if we're lucky.
126108        //  ACPI values seem to be very inconsistent, so we check fairly carefully and always require a device id.
@@ -148,23 +130,31 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
148130          device_info[key] = {};
149131        } else  {
150132          if  (guid == GUID_DEVCLASS_PROCESSOR) {
151-             //  skip duplicate processor entries as we don't need to accumulate bus numbers for them 
133+             //  skip duplicate processor entries
152134            continue ;
153135          }
154136        }
155137
156138        entry = &device_info[key];
157139        entry->vendor_id  = vendor_id;
158140        entry->device_id  = device_id;
141+         //  put the first hardware id string in the metadata. ignore the other lines.
142+         entry->metadata .emplace (L" SPDRP_HARDWAREID"  , std::wstring (buffer, wcslen (buffer)));
159143      } else  {
160144        //  need valid ids
161145        continue ;
162146      }
163147
164-       //  Get device description.
148+       //  Use the friendly name if available.
149+       if  (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_FRIENDLYNAME, nullptr ,
150+                                             (PBYTE)buffer, sizeof (buffer), &size)) {
151+         entry->description  = std::wstring{buffer};
152+       }
153+ 
154+       //  Set type using the device description to try and infer an NPU.
165155      if  (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_DEVICEDESC, nullptr ,
166-                                             (PBYTE)buffer. data (), (DWORD) buffer. size ( ), &size)) {
167-         entry-> description  =  buffer;
156+                                             (PBYTE)buffer,  sizeof ( buffer), &size)) {
157+         std::wstring desc{ buffer} ;
168158
169159        //  Should we require the NPU to be found by DXCore or do we want to allow this vague matching?
170160        //  Probably depends on whether we always attempt to run DXCore or not.
@@ -175,9 +165,13 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
175165                  desc.find (L" VPU"  ) != std::wstring::npos);
176166        };
177167
178-         //  not 100% accurate. is there a better way?
168+         //  use description if no friendly name
169+         if  (entry->description .empty ()) {
170+           entry->description  = desc;
171+         }
172+ 
179173        uint64_t  npu_key = GetDeviceKey (*entry);
180-         bool  is_npu = npus.count (npu_key) > 0  || possible_npu (entry-> description );
174+         bool  is_npu = npus.count (npu_key) > 0  || possible_npu (desc );
181175
182176        if  (guid == GUID_DEVCLASS_DISPLAY) {
183177          entry->type  = OrtHardwareDeviceType_GPU;
@@ -201,18 +195,21 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
201195      }
202196
203197      if  (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_MFG, nullptr ,
204-                                             (PBYTE)buffer. data (), (DWORD) buffer. size ( ), &size)) {
205-         entry->vendor  = buffer;
198+                                             (PBYTE)buffer,  sizeof ( buffer), &size)) {
199+         entry->vendor  = std::wstring ( buffer,  wcslen (buffer)) ;
206200      }
207201
208-       if  (guid != GUID_DEVCLASS_PROCESSOR) {
209-         DWORD busNumber = 0 ;
210-         size = 0 ;
211-         if  (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_BUSNUMBER, nullptr ,
212-                                               reinterpret_cast <PBYTE>(&busNumber), sizeof (busNumber), &size)) {
213-           //  push_back in case there are two identical devices. not sure how else to tell them apart
214-           entry->bus_ids .push_back (busNumber);
202+       //  Add the UI number if GPU. Helpful if user has integrated and discrete GPUs
203+       if  (entry->type  == OrtHardwareDeviceType_GPU) {
204+         DWORD ui_number = 0 ;
205+         if  (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_UI_NUMBER, nullptr ,
206+                                               (PBYTE)&ui_number, sizeof (ui_number), &size)) {
207+           //  use value read.
208+         } else  {
209+           //  infer it as 0 if not set.
215210        }
211+ 
212+         entry->metadata .emplace (L" SPDRP_UI_NUMBER"  , std::to_wstring (ui_number));
216213      }
217214    }
218215
@@ -226,50 +223,58 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
226223std::unordered_map<uint64_t , DeviceInfo> GetDeviceInfoD3D12 () {
227224  std::unordered_map<uint64_t , DeviceInfo> device_info;
228225
229-   IDXGIFactory6* factory = nullptr ;
230-   HRESULT hr = CreateDXGIFactory1 (IID_PPV_ARGS (&factory));
231-   if  (FAILED (hr)) {
226+   ComPtr<IDXGIFactory6> factory;
227+   if  (FAILED (CreateDXGIFactory2 (0 , IID_PPV_ARGS (&factory)))) {
232228    std::cerr << " Failed to create DXGI factory.\n "  ;
233229    return  device_info;
234230  }
235231
236-   IDXGIAdapter1* adapter = nullptr ;
237- 
238-   //  iterate by high-performance GPU preference first
239-   for  (UINT i = 0 ; factory->EnumAdapterByGpuPreference (i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
240-                                                        IID_PPV_ARGS (&adapter)) != DXGI_ERROR_NOT_FOUND;
241-        ++i) {
232+   ComPtr<IDXGIAdapter1> adapter;
233+   for  (UINT i = 0 ; factory->EnumAdapters1 (i, adapter.ReleaseAndGetAddressOf ()) != DXGI_ERROR_NOT_FOUND; ++i) {
242234    DXGI_ADAPTER_DESC1 desc;
243235    if  (FAILED (adapter->GetDesc1 (&desc))) {
244236      continue ;
245237    }
246238
247-     do  {
248-       if  ((desc.Flags  & DXGI_ADAPTER_FLAG_SOFTWARE) != 0  ||
249-           (desc.Flags  & DXGI_ADAPTER_FLAG_REMOTE) != 0 ) {
250-         //  software or remote. skip
251-         break ;
252-       }
239+     if  ((desc.Flags  & DXGI_ADAPTER_FLAG_SOFTWARE) != 0  ||
240+         (desc.Flags  & DXGI_ADAPTER_FLAG_REMOTE) != 0 ) {
241+       //  software or remote. skip
242+       continue ;
243+     }
253244
254-        static_assert (sizeof (LUID) == sizeof (uint64_t ), " LUID and uint64_t are not the same size"  );
255-        uint64_t  key = GetLuidKey (desc.AdapterLuid );
245+     static_assert (sizeof (LUID) == sizeof (uint64_t ), " LUID and uint64_t are not the same size"  );
246+     uint64_t  key = GetLuidKey (desc.AdapterLuid );
256247
257-       DeviceInfo& info = device_info[key];
258-       info.type  = OrtHardwareDeviceType_GPU;
259-       info.vendor_id  = desc.VendorId ;
260-       info.device_id  = desc.DeviceId ;
261-       info.description  = std::wstring (desc.Description );
262- 
263-       info.metadata [L" VideoMemory"  ] = std::to_wstring (desc.DedicatedVideoMemory  / (1024  * 1024 )) + L"  MB"  ;
264-       info.metadata [L" SystemMemory"  ] = std::to_wstring (desc.DedicatedSystemMemory  / (1024  * 1024 )) + L"  MB"  ;
265-       info.metadata [L" SharedSystemMemory"  ] = std::to_wstring (desc.DedicatedSystemMemory  / (1024  * 1024 )) + L"  MB"  ;
266-       info.metadata [L" HighPerformanceIndex"  ] = std::to_wstring (i);
267-     } while  (false );
248+     DeviceInfo& info = device_info[key];
249+     info.type  = OrtHardwareDeviceType_GPU;
250+     info.vendor_id  = desc.VendorId ;
251+     info.device_id  = desc.DeviceId ;
252+     info.description  = std::wstring (desc.Description );
268253
269-     adapter->Release ();
254+     info.metadata [L" DxgiAdapterNumber"  ] = std::to_wstring (i);
255+     info.metadata [L" VideoMemory"  ] = std::to_wstring (desc.DedicatedVideoMemory  / (1024  * 1024 )) + L"  MB"  ;
256+     info.metadata [L" SystemMemory"  ] = std::to_wstring (desc.DedicatedSystemMemory  / (1024  * 1024 )) + L"  MB"  ;
257+     info.metadata [L" SharedSystemMemory"  ] = std::to_wstring (desc.DedicatedSystemMemory  / (1024  * 1024 )) + L"  MB"  ;
270258  }
271259
272-   factory->Release ();
260+   //  iterate by high-performance GPU preference to add that info
261+   for  (UINT i = 0 ; factory->EnumAdapterByGpuPreference (
262+                        i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
263+                        IID_PPV_ARGS (adapter.ReleaseAndGetAddressOf ())) != DXGI_ERROR_NOT_FOUND;
264+        ++i) {
265+     DXGI_ADAPTER_DESC1 desc;
266+     if  (FAILED (adapter->GetDesc1 (&desc))) {
267+       continue ;
268+     }
269+ 
270+     uint64_t  key = GetLuidKey (desc.AdapterLuid );
271+ 
272+     auto  it = device_info.find (key);
273+     if  (it != device_info.end ()) {
274+       DeviceInfo& info = it->second ;
275+       info.metadata [L" HighPerformanceIndex"  ] = std::to_wstring (i);
276+     }
277+   }
273278
274279  return  device_info;
275280}
@@ -284,7 +289,9 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoDxcore() {
284289    return  device_info;
285290  }
286291
287-   //  manually define for older Windows versions. will be no matches but means this code works on machines with dxcore.
292+   //  NOTE: These GUIDs requires a newer Windows SDK than what we target by default.
293+   //  They were added in 10.0.22621.0 as part of DirectXCore API
294+   //  To workaround this we define a local copy of the values. On an older Windows machine they won't match anything.
288295  static  const  GUID local_DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML = {0xb71b0d41 , 0x1088 , 0x422f , 0xa2 , 0x7c , 0x2 , 0x50 , 0xb7 , 0xd3 , 0xa9 , 0x88 };
289296  static  const  GUID local_DXCORE_HARDWARE_TYPE_ATTRIBUTE_NPU = {0xd46140c4 , 0xadd7 , 0x451b , 0x9e , 0x56 , 0x6 , 0xfe , 0x8c , 0x3b , 0x58 , 0xed };
290297
@@ -353,27 +360,17 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoDxcore() {
353360                                         &is_integrated))) {
354361        info.metadata [L" Discrete"  ] = is_integrated ? L" 0"   : L" 1"  ;
355362      }
356- 
357-       //  this returns char_t on us-en Windows. assuming it returns wchar_t on other locales but not clear what it
358-       //  does when.
359-       //  The description from SetupApi is wchar_t so assuming we have that and don't need this one.
360-       // 
361-       //  hrId = HRESULT_FROM_WIN32(ERROR_NOT_FOUND);
362-       //  std::wstring driverDescription;
363-       //  driverDescription.resize(256);
364-       // // this doesn't seem to return wchar_t
365-       //  if (adapter->IsPropertySupported(DXCoreAdapterProperty::DriverDescription)) {
366-       //    hrId = adapter->GetProperty(DXCoreAdapterProperty::DriverDescription, sizeof(driverDescription),
367-       //                                &driverDescription);
368-       //    info.description = driverDescription;
369-       //  }
370363    }
371364  }
372365
373366  return  device_info;
374367}
375368}  //  namespace
376369
370+ //  Get devices from various sources and combine them into a single set of devices.
371+ //  For CPU we use setupapi data.
372+ //  For GPU we augment the d3d12 and dxcore data with the setupapi data.
373+ //  For NPU we augment the dxcore data with the setupapi data.
377374std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform () {
378375  //  dxcore info. key is luid
379376  std::unordered_map<uint64_t , DeviceInfo> luid_to_dxinfo = GetDeviceInfoDxcore ();
@@ -408,18 +405,12 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
408405    }
409406  }
410407
411-   std::wstring_convert<std::codecvt_utf8<wchar_t >> converter;  //  wstring to string
408+   std::wstring_convert<std::codecvt_utf8<wchar_t >  > converter;  //  wstring to string
412409  const  auto  device_to_ortdevice = [&converter](
413410                                       DeviceInfo& device,
414411                                       std::unordered_map<std::wstring, std::wstring>* extra_metadata = nullptr ) {
415412    OrtHardwareDevice ortdevice{device.type , device.vendor_id , device.device_id , converter.to_bytes (device.vendor )};
416413
417-     if  (device.bus_ids .size () > 0 ) {
418-       //  use the first bus number. not sure how to handle multiple
419-       ortdevice.metadata .Add (" BusNumber"  , std::to_string (device.bus_ids .back ()).c_str ());
420-       device.bus_ids .pop_back ();
421-     }
422- 
423414    if  (!device.description .empty ()) {
424415      ortdevice.metadata .Add (" Description"  , converter.to_bytes (device.description ));
425416    }
@@ -437,6 +428,18 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
437428      }
438429    }
439430
431+     std::ostringstream oss;
432+     oss << " Adding OrtHardwareDevice {vendor_id:0x"   << std::hex << ortdevice.vendor_id 
433+         << " , device_id:0x"   << ortdevice.device_id 
434+         << " , type:"   << std::dec << static_cast <int >(ortdevice.type )
435+         << " , metadata: ["  ;
436+     for  (auto & [key, value] : ortdevice.metadata .entries ) {
437+       oss << key << " ="   << value << " , "  ;
438+     }
439+ 
440+     oss << " ]}"   << std::endl;
441+     LOGS_DEFAULT (INFO) << oss.str ();
442+ 
440443    return  ortdevice;
441444  };
442445
@@ -459,14 +462,14 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
459462      //  use SetupApi info. merge metadata.
460463      devices.emplace (device_to_ortdevice (it->second , &device.metadata ));
461464    } else  {
462-       //  no matching entry in SetupApi. use the dxinfo. no  vendor. no BusNumber. 
465+       //  no matching entry in SetupApi. use the dxinfo. will be missing  vendor name and UI_NUMBER 
463466      devices.emplace (device_to_ortdevice (device));
464467    }
465468  }
466469
467470  return  devices;
468471}
469- #else   //  !defined(ORT_MINIMAL_BUILD)
472+ #else   //  !defined(ORT_MINIMAL_BUILD) && !defined(_GAMING_XBOX) 
470473std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform () {
471474  return  {};
472475}
0 commit comments