@@ -80,7 +80,7 @@ Error PluginAdaptorTy::init() {
80
80
}
81
81
82
82
// No devices are supported by this RTL?
83
- NumberOfPluginDevices = number_of_devices ();
83
+ int32_t NumberOfPluginDevices = number_of_devices ();
84
84
if (!NumberOfPluginDevices) {
85
85
return createStringError (inconvertibleErrorCode (),
86
86
" No devices supported in this RTL\n " );
@@ -112,26 +112,27 @@ void PluginManager::init() {
112
112
DP (" RTLs loaded!\n " );
113
113
}
114
114
115
- void PluginAdaptorTy::initDevices (PluginManager &PM) {
116
- if (isUsed ())
115
+ void PluginManager::initDevices (PluginAdaptorTy &RTL) {
116
+ // If this RTL has already been initialized.
117
+ if (PM->DeviceOffsets .contains (&RTL))
117
118
return ;
118
119
TIMESCOPE ();
119
120
120
121
// If this RTL is not already in use, initialize it.
121
- assert (getNumberOfPluginDevices () > 0 &&
122
+ assert (RTL. number_of_devices () > 0 &&
122
123
" Tried to initialize useless plugin adaptor" );
123
124
124
125
// Initialize the device information for the RTL we are about to use.
125
- auto ExclusiveDevicesAccessor = PM. getExclusiveDevicesAccessor ();
126
+ auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor ();
126
127
127
128
// Initialize the index of this RTL and save it in the used RTLs.
128
- DeviceOffset = ExclusiveDevicesAccessor->size ();
129
+ int32_t DeviceOffset = ExclusiveDevicesAccessor->size ();
129
130
130
- // If possible, set the device identifier offset in the plugin.
131
- if (set_device_offset)
132
- set_device_offset (DeviceOffset);
131
+ // Set the device identifier offset in the plugin.
132
+ RTL.set_device_offset (DeviceOffset);
133
133
134
- int32_t NumPD = getNumberOfPluginDevices ();
134
+ int32_t NumberOfUserDevices = 0 ;
135
+ int32_t NumPD = RTL.number_of_devices ();
135
136
ExclusiveDevicesAccessor->reserve (DeviceOffset + NumPD);
136
137
// Auto zero-copy is a per-device property. We need to ensure
137
138
// that all devices are suggesting to use it.
@@ -140,7 +141,7 @@ void PluginAdaptorTy::initDevices(PluginManager &PM) {
140
141
// They are surfaced per-device because the related properties
141
142
// are computed as such in the plugins.
142
143
for (int32_t PDevI = 0 , UserDevId = DeviceOffset; PDevI < NumPD; PDevI++) {
143
- auto Device = std::make_unique<DeviceTy>(this , UserDevId, PDevI);
144
+ auto Device = std::make_unique<DeviceTy>(&RTL , UserDevId, PDevI);
144
145
if (auto Err = Device->init ()) {
145
146
DP (" Skip plugin known device %d: %s\n " , PDevI,
146
147
toString (std::move (Err)).c_str ());
@@ -158,11 +159,9 @@ void PluginAdaptorTy::initDevices(PluginManager &PM) {
158
159
// We do not mix APUs with discrete GPUs. Eager maps is set by a host
159
160
// environment variable.
160
161
bool IsAPU = false ;
161
- bool SupportsUnifiedMemory = false ;
162
162
if (ExclusiveDevicesAccessor->size () > 0 ) {
163
163
auto &Device = *(*ExclusiveDevicesAccessor)[0 ];
164
164
IsAPU = Device.checkIfAPU ();
165
- SupportsUnifiedMemory = Device.supportsUnifiedMemory ();
166
165
}
167
166
bool EagerMapsRequested = BoolEnvar (" OMPX_EAGER_ZERO_COPY_MAPS" , false ).get ();
168
167
@@ -171,43 +170,46 @@ void PluginAdaptorTy::initDevices(PluginManager &PM) {
171
170
// If all devices suggest to use it, change requirment flags to trigger
172
171
// zero-copy behavior when mapping memory.
173
172
if (UseAutoZeroCopy)
174
- PM. addRequirements (OMPX_REQ_AUTO_ZERO_COPY);
173
+ addRequirements (OMPX_REQ_AUTO_ZERO_COPY);
175
174
176
175
// Eager Zero-Copy Maps makes a "copy" execution turn into
177
176
// an automatic zero-copy. It also applies to unified_shared_memory.
178
177
// It is only available on APUs.
179
178
if (IsAPU && EagerMapsRequested) {
180
- PM. addRequirements (OMPX_REQ_EAGER_ZERO_COPY_MAPS);
181
- if (!(PM. getRequirements () & OMP_REQ_UNIFIED_SHARED_MEMORY))
182
- PM. addRequirements (OMPX_REQ_AUTO_ZERO_COPY);
179
+ addRequirements (OMPX_REQ_EAGER_ZERO_COPY_MAPS);
180
+ if (!(getRequirements () & OMP_REQ_UNIFIED_SHARED_MEMORY))
181
+ addRequirements (OMPX_REQ_AUTO_ZERO_COPY);
183
182
}
184
183
185
184
// sanity checks for zero-copy depend on specific devices: request it here
186
185
if ((ExclusiveDevicesAccessor->size () > 0 ) &&
187
- ((PM. getRequirements () & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
188
- (PM. getRequirements () & OMPX_REQ_AUTO_ZERO_COPY))) {
186
+ ((getRequirements () & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
187
+ (getRequirements () & OMPX_REQ_AUTO_ZERO_COPY))) {
189
188
// APUs are assumed to be a homogeneous set of GPUs: ask
190
189
// the first device in the system to run a sanity check.
191
190
auto &Device = *(*ExclusiveDevicesAccessor)[0 ];
192
191
// just skip checks if no devices are found in the system
193
192
Device.zeroCopySanityChecksAndDiag (
194
- (PM. getRequirements () & OMP_REQ_UNIFIED_SHARED_MEMORY),
195
- (PM. getRequirements () & OMPX_REQ_AUTO_ZERO_COPY),
196
- (PM. getRequirements () & OMPX_REQ_EAGER_ZERO_COPY_MAPS));
193
+ (getRequirements () & OMP_REQ_UNIFIED_SHARED_MEMORY),
194
+ (getRequirements () & OMPX_REQ_AUTO_ZERO_COPY),
195
+ (getRequirements () & OMPX_REQ_EAGER_ZERO_COPY_MAPS));
197
196
}
198
197
198
+ DeviceOffsets[&RTL] = DeviceOffset;
199
+ DeviceUsed[&RTL] = NumberOfUserDevices;
199
200
DP (" Plugin adaptor " DPxMOD " has index %d, exposes %d out of %d devices!\n " ,
200
- DPxPTR (LibraryHandler.get ()), DeviceOffset, NumberOfUserDevices,
201
- NumberOfPluginDevices );
201
+ DPxPTR (RTL. LibraryHandler .get ()), DeviceOffset, NumberOfUserDevices,
202
+ RTL. number_of_devices () );
202
203
}
203
204
204
205
void PluginManager::initAllPlugins () {
205
206
for (auto &R : PluginAdaptors)
206
- R-> initDevices (*this );
207
+ initDevices (*R );
207
208
}
208
209
209
210
static void registerImageIntoTranslationTable (TranslationTable &TT,
210
- PluginAdaptorTy &RTL,
211
+ int32_t DeviceOffset,
212
+ int32_t NumberOfUserDevices,
211
213
__tgt_device_image *Image) {
212
214
213
215
// same size, as when we increase one, we also increase the other.
@@ -216,8 +218,7 @@ static void registerImageIntoTranslationTable(TranslationTable &TT,
216
218
217
219
// Resize the Targets Table and Images to accommodate the new targets if
218
220
// required
219
- unsigned TargetsTableMinimumSize =
220
- RTL.DeviceOffset + RTL.getNumberOfUserDevices ();
221
+ unsigned TargetsTableMinimumSize = DeviceOffset + NumberOfUserDevices;
221
222
222
223
if (TT.TargetsTable .size () < TargetsTableMinimumSize) {
223
224
TT.DeviceTables .resize (TargetsTableMinimumSize, {});
@@ -227,11 +228,11 @@ static void registerImageIntoTranslationTable(TranslationTable &TT,
227
228
}
228
229
229
230
// Register the image in all devices for this target type.
230
- for (int32_t I = 0 ; I < RTL. getNumberOfUserDevices () ; ++I) {
231
+ for (int32_t I = 0 ; I < NumberOfUserDevices ; ++I) {
231
232
// If we are changing the image we are also invalidating the target table.
232
- if (TT.TargetsImages [RTL. DeviceOffset + I] != Image) {
233
- TT.TargetsImages [RTL. DeviceOffset + I] = Image;
234
- TT.TargetsTable [RTL. DeviceOffset + I] =
233
+ if (TT.TargetsImages [DeviceOffset + I] != Image) {
234
+ TT.TargetsImages [DeviceOffset + I] = Image;
235
+ TT.TargetsTable [DeviceOffset + I] =
235
236
0 ; // lazy initialization of target table.
236
237
}
237
238
}
@@ -270,7 +271,7 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
270
271
DP (" Image " DPxMOD " is compatible with RTL %s!\n " ,
271
272
DPxPTR (Img->ImageStart ), R.Name .c_str ());
272
273
273
- R. initDevices (* this );
274
+ PM-> initDevices (R );
274
275
275
276
// Initialize (if necessary) translation table for this library.
276
277
PM->TrlTblMtx .lock ();
@@ -288,8 +289,10 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
288
289
289
290
DP (" Registering image " DPxMOD " with RTL %s!\n " , DPxPTR (Img->ImageStart ),
290
291
R.Name .c_str ());
291
- registerImageIntoTranslationTable (TransTable, R, Img);
292
- R.UsedImages .insert (Img);
292
+
293
+ registerImageIntoTranslationTable (TransTable, PM->DeviceOffsets [&R],
294
+ PM->DeviceUsed [&R], Img);
295
+ PM->UsedImages .insert (Img);
293
296
294
297
PM->TrlTblMtx .unlock ();
295
298
FoundRTL = &R;
@@ -344,11 +347,11 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
344
347
// Scan the RTLs that have associated images until we find one that supports
345
348
// the current image. We only need to scan RTLs that are already being used.
346
349
for (auto &R : PM->pluginAdaptors ()) {
347
- if (!R. isUsed ( ))
350
+ if (!DeviceOffsets. contains (&R ))
348
351
continue ;
349
352
350
353
// Ensure that we do not use any unused images associated with this RTL.
351
- if (!R. UsedImages .contains (Img))
354
+ if (!UsedImages.contains (Img))
352
355
continue ;
353
356
354
357
FoundRTL = &R;
0 commit comments