Skip to content

Commit 0b12f77

Browse files
jhuber6tru
authored andcommitted
[Libomptarget][CUDA] Check CUDA compatibilty correctly
We recently added support for multi-architecture binaries in libomptarget. This is done by extracting the architecture from the embedded image and comparing it with the major and minor version supported by the current CUDA installation. Previously we just compared these directly, which was not correct for binary compatibility. The CUDA documentation states that we can consider any image with an equivalent major or a greater or equal to minor compatible with the current image. Change the check to use this new logic in the CUDA plugin. Fixes #57049 Reviewed By: jdoerfert, ye-luo Differential Revision: https://reviews.llvm.org/D131567 (cherry picked from commit fdbb153)
1 parent a8d4cf1 commit 0b12f77

File tree

1 file changed

+11
-3
lines changed
  • openmp/libomptarget/plugins/cuda/src

1 file changed

+11
-3
lines changed

openmp/libomptarget/plugins/cuda/src/rtl.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "llvm/ADT/StringRef.h"
14+
1315
#include <algorithm>
1416
#include <cassert>
1517
#include <cstddef>
@@ -33,6 +35,8 @@
3335

3436
#include "llvm/Frontend/OpenMP/OMPConstants.h"
3537

38+
using namespace llvm;
39+
3640
// Utility for retrieving and printing CUDA error string.
3741
#ifdef OMPTARGET_DEBUG
3842
#define CUDA_ERR_STRING(err) \
@@ -1529,13 +1533,14 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
15291533
return false;
15301534

15311535
// A subarchitecture was not specified. Assume it is compatible.
1532-
if (!info->Arch)
1536+
if (!info || !info->Arch)
15331537
return true;
15341538

15351539
int32_t NumberOfDevices = 0;
15361540
if (cuDeviceGetCount(&NumberOfDevices) != CUDA_SUCCESS)
15371541
return false;
15381542

1543+
StringRef ArchStr = StringRef(info->Arch).drop_front(sizeof("sm_") - 1);
15391544
for (int32_t DeviceId = 0; DeviceId < NumberOfDevices; ++DeviceId) {
15401545
CUdevice Device;
15411546
if (cuDeviceGet(&Device, DeviceId) != CUDA_SUCCESS)
@@ -1551,8 +1556,11 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
15511556
Device) != CUDA_SUCCESS)
15521557
return false;
15531558

1554-
std::string ArchStr = "sm_" + std::to_string(Major) + std::to_string(Minor);
1555-
if (ArchStr != info->Arch)
1559+
// A cubin generated for a certain compute capability is supported to run on
1560+
// any GPU with the same major revision and same or higher minor revision.
1561+
int32_t ImageMajor = ArchStr[0] - '0';
1562+
int32_t ImageMinor = ArchStr[1] - '0';
1563+
if (Major != ImageMajor || Minor < ImageMinor)
15561564
return false;
15571565
}
15581566

0 commit comments

Comments
 (0)