diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index e83d38a14f77f..b2ede888b542d 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -576,8 +576,7 @@ struct AMDGPUKernelTy : public GenericKernelTy { /// Get the HSA kernel object representing the kernel function. uint64_t getKernelObject() const { return KernelObject; } - /// Get the size of implicitargs based on the code object version - /// @return 56 for cov4 and 256 for cov5 + /// Get the size of implicitargs based on the code object version. uint32_t getImplicitArgsSize() const { return ImplicitArgsSize; } /// Indicates whether or not we need to set up our own private segment size. @@ -3386,20 +3385,17 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, if (auto Err = AMDGPUDevice.getStream(AsyncInfoWrapper, Stream)) return Err; - // Only COV5 implicitargs needs to be set. COV4 implicitargs are not used. - if (ImplArgs && - getImplicitArgsSize() == sizeof(hsa_utils::AMDGPUImplicitArgsTy)) { - ImplArgs->BlockCountX = NumBlocks[0]; - ImplArgs->BlockCountY = NumBlocks[1]; - ImplArgs->BlockCountZ = NumBlocks[2]; - ImplArgs->GroupSizeX = NumThreads[0]; - ImplArgs->GroupSizeY = NumThreads[1]; - ImplArgs->GroupSizeZ = NumThreads[2]; - ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1 - ? 3 - : 1 + (NumBlocks[1] * NumThreads[1] != 1); - ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem; - } + // Set the COV5+ implicit arguments to the appropriate values. + ImplArgs->BlockCountX = NumBlocks[0]; + ImplArgs->BlockCountY = NumBlocks[1]; + ImplArgs->BlockCountZ = NumBlocks[2]; + ImplArgs->GroupSizeX = NumThreads[0]; + ImplArgs->GroupSizeY = NumThreads[1]; + ImplArgs->GroupSizeZ = NumThreads[2]; + ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1 + ? 3 + : 1 + (NumBlocks[1] * NumThreads[1] != 1); + ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem; // Push the kernel launch into the stream. return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks, diff --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h index 43be4e8edeba4..609ead942dbb3 100644 --- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h +++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h @@ -40,17 +40,10 @@ struct AMDGPUImplicitArgsTy { uint8_t Unused2[132]; // 132 byte offset. }; -// Dummy struct for COV4 implicitargs. -struct AMDGPUImplicitArgsTyCOV4 { - uint8_t Unused[56]; -}; - /// Returns the size in bytes of the implicit arguments of AMDGPU kernels. /// `Version` is the ELF ABI version, e.g. COV5. inline uint32_t getImplicitArgsSize(uint16_t Version) { - return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 - ? sizeof(AMDGPUImplicitArgsTyCOV4) - : sizeof(AMDGPUImplicitArgsTy); + return sizeof(AMDGPUImplicitArgsTy); } /// Reads the AMDGPU specific metadata from the ELF file and propagates the diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index 44d1c737e2efb..b33101b99aa10 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -65,10 +65,9 @@ checkMachineImpl(const object::ELFObjectFile &ELFObj, uint16_t EMachine) { if (Header.e_machine == EM_AMDGPU) { if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA"); - if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 && - Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 && + if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 && Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6) - return createError("Invalid AMD ABI version, must be version 4 or above"); + return createError("Invalid AMD ABI version, must be version 5 or above"); if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 || (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)