Skip to content

Commit f6685f7

Browse files
committed
[OpenMP][CUDA] Refine the logic to determine grid size
This patch refines the logic to determine grid size as previous method can escape the check of whether `CudaBlocksPerGrid` could be greater than the actual hardware limit. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D119311
1 parent 547a667 commit f6685f7

File tree

1 file changed

+6
-4
lines changed
  • openmp/libomptarget/plugins/cuda/src

1 file changed

+6
-4
lines changed

openmp/libomptarget/plugins/cuda/src/rtl.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,15 +1170,17 @@ class DeviceRTLTy {
11701170
DP("Using default number of teams %d\n", DeviceData[DeviceId].NumTeams);
11711171
CudaBlocksPerGrid = DeviceData[DeviceId].NumTeams;
11721172
}
1173-
} else if (TeamNum > DeviceData[DeviceId].BlocksPerGrid) {
1174-
DP("Capping number of teams to team limit %d\n",
1175-
DeviceData[DeviceId].BlocksPerGrid);
1176-
CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid;
11771173
} else {
11781174
DP("Using requested number of teams %d\n", TeamNum);
11791175
CudaBlocksPerGrid = TeamNum;
11801176
}
11811177

1178+
if (CudaBlocksPerGrid > DeviceData[DeviceId].BlocksPerGrid) {
1179+
DP("Capping number of teams to team limit %d\n",
1180+
DeviceData[DeviceId].BlocksPerGrid);
1181+
CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid;
1182+
}
1183+
11821184
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
11831185
"Launching kernel %s with %d blocks and %d threads in %s mode\n",
11841186
(getOffloadEntry(DeviceId, TgtEntryPtr))

0 commit comments

Comments
 (0)