Skip to content

Commit 90abdf8

Browse files
authored
[CUDA][HIP][NFC] add CodeGenModule::shouldEmitCUDAGlobalVar (#98543)
Extract the logic whether to emit a global var based on CUDA/HIP host/device related attributes to CodeGenModule::shouldEmitCUDAGlobalVar to be used by other places.
1 parent f52a467 commit 90abdf8

File tree

2 files changed

+27
-20
lines changed

2 files changed

+27
-20
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3702,6 +3702,19 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) {
37023702
return D->isImplicit();
37033703
}
37043704

3705+
bool CodeGenModule::shouldEmitCUDAGlobalVar(const VarDecl *Global) const {
3706+
assert(LangOpts.CUDA && "Should not be called by non-CUDA languages");
3707+
// We need to emit host-side 'shadows' for all global
3708+
// device-side variables because the CUDA runtime needs their
3709+
// size and host-side address in order to provide access to
3710+
// their device-side incarnations.
3711+
return !LangOpts.CUDAIsDevice || Global->hasAttr<CUDADeviceAttr>() ||
3712+
Global->hasAttr<CUDAConstantAttr>() ||
3713+
Global->hasAttr<CUDASharedAttr>() ||
3714+
Global->getType()->isCUDADeviceBuiltinSurfaceType() ||
3715+
Global->getType()->isCUDADeviceBuiltinTextureType();
3716+
}
3717+
37053718
void CodeGenModule::EmitGlobal(GlobalDecl GD) {
37063719
const auto *Global = cast<ValueDecl>(GD.getDecl());
37073720

@@ -3726,36 +3739,27 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
37263739
// Non-constexpr non-lambda implicit host device functions are not emitted
37273740
// unless they are used on device side.
37283741
if (LangOpts.CUDA) {
3729-
if (LangOpts.CUDAIsDevice) {
3742+
assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
3743+
"Expected Variable or Function");
3744+
if (const auto *VD = dyn_cast<VarDecl>(Global)) {
3745+
if (!shouldEmitCUDAGlobalVar(VD))
3746+
return;
3747+
} else if (LangOpts.CUDAIsDevice) {
37303748
const auto *FD = dyn_cast<FunctionDecl>(Global);
37313749
if ((!Global->hasAttr<CUDADeviceAttr>() ||
3732-
(LangOpts.OffloadImplicitHostDeviceTemplates && FD &&
3750+
(LangOpts.OffloadImplicitHostDeviceTemplates &&
37333751
hasImplicitAttr<CUDAHostAttr>(FD) &&
37343752
hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr() &&
37353753
!isLambdaCallOperator(FD) &&
37363754
!getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) &&
37373755
!Global->hasAttr<CUDAGlobalAttr>() &&
3738-
!Global->hasAttr<CUDAConstantAttr>() &&
3739-
!Global->hasAttr<CUDASharedAttr>() &&
3740-
!Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
3741-
!Global->getType()->isCUDADeviceBuiltinTextureType() &&
37423756
!(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) &&
37433757
!Global->hasAttr<CUDAHostAttr>()))
37443758
return;
3745-
} else {
3746-
// We need to emit host-side 'shadows' for all global
3747-
// device-side variables because the CUDA runtime needs their
3748-
// size and host-side address in order to provide access to
3749-
// their device-side incarnations.
3750-
3751-
// So device-only functions are the only things we skip.
3752-
if (isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>() &&
3753-
Global->hasAttr<CUDADeviceAttr>())
3754-
return;
3755-
3756-
assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
3757-
"Expected Variable or Function");
3758-
}
3759+
// Device-only functions are the only things we skip.
3760+
} else if (!Global->hasAttr<CUDAHostAttr>() &&
3761+
Global->hasAttr<CUDADeviceAttr>())
3762+
return;
37593763
}
37603764

37613765
if (LangOpts.OpenMP) {

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,9 @@ class CodeGenModule : public CodeGenTypeCache {
563563

564564
bool isTriviallyRecursive(const FunctionDecl *F);
565565
bool shouldEmitFunction(GlobalDecl GD);
566+
// Whether a global variable should be emitted by CUDA/HIP host/device
567+
// related attributes.
568+
bool shouldEmitCUDAGlobalVar(const VarDecl *VD) const;
566569
bool shouldOpportunisticallyEmitVTables();
567570
/// Map used to be sure we don't emit the same CompoundLiteral twice.
568571
llvm::DenseMap<const CompoundLiteralExpr *, llvm::GlobalVariable *>

0 commit comments

Comments
 (0)