23
23
#include < cassert>
24
24
#include < cstring>
25
25
#include < iostream>
26
+ #include < list>
27
+ #include < map>
26
28
#include < memory>
27
29
#include < mutex>
28
30
#include < unordered_map>
@@ -76,18 +78,33 @@ struct _pi_platform {
76
78
// Cache pi_devices for reuse
77
79
std::vector<pi_device> PiDevicesCache;
78
80
std::mutex PiDevicesCacheMutex;
81
+ // Maximum Number of Command Lists that can be created.
82
+ // This Value is initialized to 20000, but can be changed by the user
83
+ // thru the environment variable SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE
84
+ // ie SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE =10000.
85
+ int ZeMaxCommandListCache = 0 ;
86
+
87
+ // Current number of L0 Command Lists created on this platform.
88
+ // this number must not exceed ZeMaxCommandListCache.
89
+ std::atomic<int > ZeGlobalCommandListCount{0 };
79
90
};
80
91
81
92
struct _pi_device : _pi_object {
82
93
_pi_device (ze_device_handle_t Device, pi_platform Plt,
83
94
bool isSubDevice = false )
84
- : ZeDevice{Device}, Platform{Plt}, ZeCommandListInit{nullptr },
85
- IsSubDevice{isSubDevice}, ZeDeviceProperties{},
86
- ZeDeviceComputeProperties{} {
95
+ : ZeDevice{Device}, Platform{Plt}, IsSubDevice{isSubDevice},
96
+ ZeDeviceProperties{}, ZeDeviceComputeProperties{} {
87
97
// NOTE: one must additionally call initialize() to complete
88
98
// PI device creation.
89
99
}
90
100
101
+ // Keep the ordinal of a "compute" commands group, where we send all
102
+ // commands currently.
103
+ // TODO[1.0]: discover "copy" command group as well to use for memory
104
+ // copying operations exclusively.
105
+ //
106
+ uint32_t ZeComputeQueueGroupIndex;
107
+
91
108
// Initialize the entire PI device.
92
109
pi_result initialize ();
93
110
@@ -97,23 +114,28 @@ struct _pi_device : _pi_object {
97
114
// PI platform to which this device belongs.
98
115
pi_platform Platform;
99
116
100
- // Immediate Level Zero command list for this device, to be used for
101
- // initializations. To be created as:
102
- // - Immediate command list: So any command appended to it is immediately
103
- // offloaded to the device.
104
- // - Synchronous: So implicit synchronization is made inside the level-zero
105
- // driver.
106
- ze_command_list_handle_t ZeCommandListInit;
117
+ // Mutex Lock for the Command List Cache
118
+ std::mutex ZeCommandListCacheMutex;
119
+ // Cache of all currently Available Command Lists for use by PI APIs
120
+ std::list<ze_command_list_handle_t > ZeCommandListCache;
107
121
108
122
// Indicates if this is a root-device or a sub-device.
109
123
// Technically this information can be queried from a device handle, but it
110
124
// seems better to just keep it here.
111
125
bool IsSubDevice;
112
126
113
- // Create a new command list for executing on this device.
114
- // It's caller's responsibility to remember and destroy the created
115
- // command list when no longer needed.
116
- pi_result createCommandList (ze_command_list_handle_t *ze_command_list);
127
+ // Retrieves a command list for executing on this device along with
128
+ // a fence to be used in tracking the execution of this command list.
129
+ // If a command list has been created on this device which has
130
+ // completed its commands, then that command list and its associated fence
131
+ // will be reused. Otherwise, a new command list and fence will be created for
132
+ // running on this device. L0 fences are created on a L0 command queue so the
133
+ // caller must pass a command queue to create a new fence for the new command
134
+ // list if a command list/fence pair is not available. All Command Lists &
135
+ // associated fences are destroyed at Device Release.
136
+ pi_result getAvailableCommandList (pi_queue Queue,
137
+ ze_command_list_handle_t *ZeCommandList,
138
+ ze_fence_handle_t *ZeFence);
117
139
118
140
// Cache of the immutable device properties.
119
141
ze_device_properties_t ZeDeviceProperties;
@@ -122,14 +144,27 @@ struct _pi_device : _pi_object {
122
144
123
145
struct _pi_context : _pi_object {
124
146
_pi_context (pi_device Device)
125
- : Device{Device}, ZeEventPool{nullptr }, NumEventsAvailableInEventPool{},
126
- NumEventsLiveInEventPool{} {}
147
+ : Device{Device}, ZeCommandListInit{nullptr }, ZeEventPool{nullptr },
148
+ NumEventsAvailableInEventPool{}, NumEventsLiveInEventPool{} {}
149
+
150
+ // A L0 context handle is primarily used during creation and management of
151
+ // resources that may be used by multiple devices.
152
+ ze_context_handle_t ZeContext;
127
153
128
- // Level Zero does not have notion of contexts.
129
154
// Keep the device here (must be exactly one) to return it when PI context
130
155
// is queried for devices.
131
156
pi_device Device;
132
157
158
+ // Immediate Level Zero command list for the device in this context, to be
159
+ // used for initializations. To be created as:
160
+ // - Immediate command list: So any command appended to it is immediately
161
+ // offloaded to the device.
162
+ // - Synchronous: So implicit synchronization is made inside the level-zero
163
+ // driver.
164
+ // There will be a list of immediate command lists (for each device) when
165
+ // support of the multiple devices per context will be added.
166
+ ze_command_list_handle_t ZeCommandListInit;
167
+
133
168
// Get index of the free slot in the available pool. If there is no avialble
134
169
// pool then create new one.
135
170
ze_result_t getFreeSlotInExistingOrNewPool (ze_event_pool_handle_t &,
@@ -169,25 +204,46 @@ struct _pi_context : _pi_object {
169
204
};
170
205
171
206
struct _pi_queue : _pi_object {
172
- _pi_queue (ze_command_queue_handle_t Queue, pi_context Context)
173
- : ZeCommandQueue{Queue}, Context{Context} {}
207
+ _pi_queue (ze_command_queue_handle_t Queue, pi_context Context,
208
+ pi_device Device)
209
+ : ZeCommandQueue{Queue}, Context{Context}, Device{Device} {}
174
210
175
211
// Level Zero command queue handle.
176
212
ze_command_queue_handle_t ZeCommandQueue;
177
213
178
214
// Keeps the PI context to which this queue belongs.
179
215
pi_context Context;
180
216
217
+ // Mutex Lock for the Command List, Fence Map
218
+ std::mutex ZeCommandListFenceMapMutex;
219
+ // Map of all Command lists created with their associated Fence used for
220
+ // tracking when the command list is available for use again.
221
+ std::map<ze_command_list_handle_t , ze_fence_handle_t > ZeCommandListFenceMap;
222
+
223
+ // Resets the Command List and Associated fence in the ZeCommandListFenceMap.
224
+ // If the reset command list should be made available, then MakeAvailable
225
+ // needs to be set to true. The caller must verify that this command list and
226
+ // fence have been signalled and call while holding the
227
+ // ZeCommandListFenceMapMutex lock.
228
+ pi_result resetCommandListFenceEntry (ze_command_list_handle_t ZeCommandList,
229
+ bool MakeAvailable);
230
+
231
+ // Keeps the PI device to which this queue belongs.
232
+ pi_device Device;
233
+
181
234
// Attach a command list to this queue, close, and execute it.
182
235
// Note that this command list cannot be appended to after this.
183
236
// The "is_blocking" tells if the wait for completion is requested.
237
+ // The "ZeFence" passed is used to track when the command list passed
238
+ // has completed execution on the device and can be reused.
184
239
pi_result executeCommandList (ze_command_list_handle_t ZeCommandList,
240
+ ze_fence_handle_t ZeFence,
185
241
bool is_blocking = false );
186
242
};
187
243
188
244
struct _pi_mem : _pi_object {
189
- // Keeps the PI platform of this memory handle.
190
- pi_platform Platform ;
245
+ // Keeps the PI context of this memory handle.
246
+ pi_context Context ;
191
247
192
248
// Keeps the host pointer where the buffer will be mapped to,
193
249
// if created with PI_MEM_FLAGS_HOST_PTR_USE (see
@@ -221,8 +277,8 @@ struct _pi_mem : _pi_object {
221
277
pi_result removeMapping (void *MappedTo, Mapping &MapInfo);
222
278
223
279
protected:
224
- _pi_mem (pi_platform Plt , char *HostPtr)
225
- : Platform{Plt }, MapHostPtr{HostPtr}, Mappings{} {}
280
+ _pi_mem (pi_context Ctx , char *HostPtr)
281
+ : Context{Ctx }, MapHostPtr{HostPtr}, Mappings{} {}
226
282
227
283
private:
228
284
// The key is the host pointer representing an active mapping.
@@ -237,9 +293,9 @@ struct _pi_mem : _pi_object {
237
293
238
294
struct _pi_buffer final : _pi_mem {
239
295
// Buffer/Sub-buffer constructor
240
- _pi_buffer (pi_platform Plt , char *Mem, char *HostPtr,
296
+ _pi_buffer (pi_context Ctx , char *Mem, char *HostPtr,
241
297
_pi_mem *Parent = nullptr , size_t Origin = 0 , size_t Size = 0 )
242
- : _pi_mem(Plt , HostPtr), ZeMem{Mem}, SubBuffer{Parent, Origin, Size} {}
298
+ : _pi_mem(Ctx , HostPtr), ZeMem{Mem}, SubBuffer{Parent, Origin, Size} {}
243
299
244
300
void *getZeHandle () override { return ZeMem; }
245
301
@@ -262,8 +318,8 @@ struct _pi_buffer final : _pi_mem {
262
318
263
319
struct _pi_image final : _pi_mem {
264
320
// Image constructor
265
- _pi_image (pi_platform Plt , ze_image_handle_t Image, char *HostPtr)
266
- : _pi_mem(Plt , HostPtr), ZeImage{Image} {}
321
+ _pi_image (pi_context Ctx , ze_image_handle_t Image, char *HostPtr)
322
+ : _pi_mem(Ctx , HostPtr), ZeImage{Image} {}
267
323
268
324
void *getZeHandle () override { return ZeImage; }
269
325
0 commit comments