20
20
#include < unistd.h>
21
21
#include < unordered_map>
22
22
23
+ #include " Shared/APITypes.h"
23
24
#include " Shared/Debug.h"
24
25
#include " Shared/Environment.h"
25
26
#include " Shared/Utils.h"
@@ -558,7 +559,8 @@ struct AMDGPUKernelTy : public GenericKernelTy {
558
559
559
560
// / Launch the AMDGPU kernel function.
560
561
Error launchImpl (GenericDeviceTy &GenericDevice, uint32_t NumThreads,
561
- uint64_t NumBlocks, KernelArgsTy &KernelArgs, void *Args,
562
+ uint64_t NumBlocks, KernelArgsTy &KernelArgs,
563
+ KernelLaunchParamsTy LaunchParams,
562
564
AsyncInfoWrapperTy &AsyncInfoWrapper) const override ;
563
565
564
566
// / Print more elaborate kernel launch info for AMDGPU
@@ -2802,9 +2804,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2802
2804
AsyncInfoWrapperTy AsyncInfoWrapper (*this , nullptr );
2803
2805
2804
2806
KernelArgsTy KernelArgs = {};
2805
- if (auto Err = AMDGPUKernel.launchImpl (*this , /* NumThread=*/ 1u ,
2806
- /* NumBlocks=*/ 1ul , KernelArgs,
2807
- /* Args=*/ nullptr , AsyncInfoWrapper))
2807
+ if (auto Err =
2808
+ AMDGPUKernel.launchImpl (*this , /* NumThread=*/ 1u ,
2809
+ /* NumBlocks=*/ 1ul , KernelArgs,
2810
+ KernelLaunchParamsTy{}, AsyncInfoWrapper))
2808
2811
return Err;
2809
2812
2810
2813
Error Err = Plugin::success ();
@@ -3265,12 +3268,11 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
3265
3268
3266
3269
Error AMDGPUKernelTy::launchImpl (GenericDeviceTy &GenericDevice,
3267
3270
uint32_t NumThreads, uint64_t NumBlocks,
3268
- KernelArgsTy &KernelArgs, void *Args,
3271
+ KernelArgsTy &KernelArgs,
3272
+ KernelLaunchParamsTy LaunchParams,
3269
3273
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
3270
- const uint32_t LaunchParamsSize = KernelArgs.NumArgs * sizeof (void *);
3271
-
3272
- if (ArgsSize != LaunchParamsSize &&
3273
- ArgsSize != LaunchParamsSize + getImplicitArgsSize ())
3274
+ if (ArgsSize != LaunchParams.Size &&
3275
+ ArgsSize != LaunchParams.Size + getImplicitArgsSize ())
3274
3276
return Plugin::error (" Mismatch of kernel arguments size" );
3275
3277
3276
3278
AMDGPUPluginTy &AMDGPUPlugin =
@@ -3294,10 +3296,10 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
3294
3296
return Err;
3295
3297
3296
3298
utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr ;
3297
- if (ArgsSize == LaunchParamsSize + getImplicitArgsSize ()) {
3299
+ if (ArgsSize == LaunchParams. Size + getImplicitArgsSize ()) {
3298
3300
// Initialize implicit arguments.
3299
3301
ImplArgs = reinterpret_cast <utils::AMDGPUImplicitArgsTy *>(
3300
- advanceVoidPtr (AllArgs, LaunchParamsSize ));
3302
+ advanceVoidPtr (AllArgs, LaunchParams. Size ));
3301
3303
3302
3304
// Initialize the implicit arguments to zero.
3303
3305
std::memset (ImplArgs, 0 , getImplicitArgsSize ());
@@ -3306,8 +3308,8 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
3306
3308
// Copy the explicit arguments.
3307
3309
// TODO: We should expose the args memory manager alloc to the common part as
3308
3310
// alternative to copying them twice.
3309
- if (LaunchParamsSize )
3310
- std::memcpy (AllArgs, * static_cast < void **>(Args), LaunchParamsSize );
3311
+ if (LaunchParams. Size )
3312
+ std::memcpy (AllArgs, LaunchParams. Data , LaunchParams. Size );
3311
3313
3312
3314
AMDGPUDeviceTy &AMDGPUDevice = static_cast <AMDGPUDeviceTy &>(GenericDevice);
3313
3315
0 commit comments