@@ -3702,6 +3702,19 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) {
3702
3702
return D->isImplicit ();
3703
3703
}
3704
3704
3705
+ bool CodeGenModule::shouldEmitCUDAGlobalVar (const VarDecl *Global) const {
3706
+ assert (LangOpts.CUDA && " Should not be called by non-CUDA languages" );
3707
+ // We need to emit host-side 'shadows' for all global
3708
+ // device-side variables because the CUDA runtime needs their
3709
+ // size and host-side address in order to provide access to
3710
+ // their device-side incarnations.
3711
+ return !LangOpts.CUDAIsDevice || Global->hasAttr <CUDADeviceAttr>() ||
3712
+ Global->hasAttr <CUDAConstantAttr>() ||
3713
+ Global->hasAttr <CUDASharedAttr>() ||
3714
+ Global->getType ()->isCUDADeviceBuiltinSurfaceType () ||
3715
+ Global->getType ()->isCUDADeviceBuiltinTextureType ();
3716
+ }
3717
+
3705
3718
void CodeGenModule::EmitGlobal (GlobalDecl GD) {
3706
3719
const auto *Global = cast<ValueDecl>(GD.getDecl ());
3707
3720
@@ -3726,36 +3739,27 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
3726
3739
// Non-constexpr non-lambda implicit host device functions are not emitted
3727
3740
// unless they are used on device side.
3728
3741
if (LangOpts.CUDA ) {
3729
- if (LangOpts.CUDAIsDevice ) {
3742
+ assert ((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
3743
+ " Expected Variable or Function" );
3744
+ if (const auto *VD = dyn_cast<VarDecl>(Global)) {
3745
+ if (!shouldEmitCUDAGlobalVar (VD))
3746
+ return ;
3747
+ } else if (LangOpts.CUDAIsDevice ) {
3730
3748
const auto *FD = dyn_cast<FunctionDecl>(Global);
3731
3749
if ((!Global->hasAttr <CUDADeviceAttr>() ||
3732
- (LangOpts.OffloadImplicitHostDeviceTemplates && FD &&
3750
+ (LangOpts.OffloadImplicitHostDeviceTemplates &&
3733
3751
hasImplicitAttr<CUDAHostAttr>(FD) &&
3734
3752
hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr () &&
3735
3753
!isLambdaCallOperator (FD) &&
3736
3754
!getContext ().CUDAImplicitHostDeviceFunUsedByDevice .count (FD))) &&
3737
3755
!Global->hasAttr <CUDAGlobalAttr>() &&
3738
- !Global->hasAttr <CUDAConstantAttr>() &&
3739
- !Global->hasAttr <CUDASharedAttr>() &&
3740
- !Global->getType ()->isCUDADeviceBuiltinSurfaceType () &&
3741
- !Global->getType ()->isCUDADeviceBuiltinTextureType () &&
3742
3756
!(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) &&
3743
3757
!Global->hasAttr <CUDAHostAttr>()))
3744
3758
return ;
3745
- } else {
3746
- // We need to emit host-side 'shadows' for all global
3747
- // device-side variables because the CUDA runtime needs their
3748
- // size and host-side address in order to provide access to
3749
- // their device-side incarnations.
3750
-
3751
- // So device-only functions are the only things we skip.
3752
- if (isa<FunctionDecl>(Global) && !Global->hasAttr <CUDAHostAttr>() &&
3753
- Global->hasAttr <CUDADeviceAttr>())
3754
- return ;
3755
-
3756
- assert ((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
3757
- " Expected Variable or Function" );
3758
- }
3759
+ // Device-only functions are the only things we skip.
3760
+ } else if (!Global->hasAttr <CUDAHostAttr>() &&
3761
+ Global->hasAttr <CUDADeviceAttr>())
3762
+ return ;
3759
3763
}
3760
3764
3761
3765
if (LangOpts.OpenMP ) {
0 commit comments