Skip to content

Commit e203a67

Browse files
authored
[cuda][HIP] __constant__ should imply constant (#110182)
Currently, `__constant__` variables do not get unconditionally marked as `constant` in IR, which seems a bit odd given their definition. This is generally inconsequential for NVPTX/AMDGPU, since said variables get emitted in the constant address space for those BEs. However, it is potentially significant for e.g. HIP-on-SPIR-V cases, as SPIR-V does not allow casts to/from the constant AS (`UniformConstant`), which forces `__constant__` variables to be emitted in the global AS, thus making IR constness meaningful.
1 parent 6558e56 commit e203a67

12 files changed

+44
-30
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5622,8 +5622,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
56225622
emitter->finalize(GV);
56235623

56245624
// If it is safe to mark the global 'constant', do so now.
5625-
GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor &&
5626-
D->getType().isConstantStorage(getContext(), true, true));
5625+
GV->setConstant((D->hasAttr<CUDAConstantAttr>() && LangOpts.CUDAIsDevice) ||
5626+
(!NeedsGlobalCtor && !NeedsGlobalDtor &&
5627+
D->getType().isConstantStorage(getContext(), true, true)));
56275628

56285629
// If it is in a read-only section, mark it 'constant'.
56295630
if (const SectionAttr *SA = D->getAttr<SectionAttr>()) {

clang/test/CodeGenCUDA/address-spaces.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
// CHECK: @i ={{.*}} addrspace(1) externally_initialized global
1010
__device__ int i;
1111

12-
// CHECK: @j ={{.*}} addrspace(4) externally_initialized global
12+
// CHECK: @j ={{.*}} addrspace(4) externally_initialized constant
1313
__constant__ int j;
1414

1515
// CHECK: @k ={{.*}} addrspace(3) global

clang/test/CodeGenCUDA/amdgpu-visibility.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44

55
#include "Inputs/cuda.h"
66

7-
// CHECK-DEFAULT: @c ={{.*}} addrspace(4) externally_initialized global
7+
// CHECK-DEFAULT: @c ={{.*}} addrspace(4) externally_initialized constant
88
// CHECK-DEFAULT: @g ={{.*}} addrspace(1) externally_initialized global
9-
// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized global
9+
// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized constant
1010
// CHECK-PROTECTED: @g = protected addrspace(1) externally_initialized global
11-
// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized global
11+
// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized constant
1212
// CHECK-HIDDEN: @g = protected addrspace(1) externally_initialized global
1313
__constant__ int c;
1414
__device__ int g;

clang/test/CodeGenCUDA/anon-ns.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@
2828
// HIP-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
2929
// HIP-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
3030
// HIP-DAG: @[[VM:_ZN12_GLOBAL__N_12vmE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
31-
// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized global
31+
// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized constant
3232
// HIP-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
3333

3434
// CUDA-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
3535
// CUDA-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
3636
// CUDA-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
37-
// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized global
37+
// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized constant
3838
// CUDA-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global
3939

4040
// COMMON-DAG: @_ZN12_GLOBAL__N_12vdE = internal addrspace(1) global

clang/test/CodeGenCUDA/device-var-init.cu

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ __shared__ int s_v;
2626
// DEVICE: @s_v ={{.*}} addrspace(3) global i32 undef,
2727
// HOST: @s_v = internal global i32 undef,
2828
__constant__ int c_v;
29-
// DEVICE: addrspace(4) externally_initialized global i32 0,
29+
// DEVICE: addrspace(4) externally_initialized constant i32 0,
3030
// HOST: @c_v = internal global i32 undef,
3131

3232
__device__ int d_v_i = 1;
@@ -51,14 +51,14 @@ __shared__ T s_t;
5151
// DEVICE: @s_t ={{.*}} addrspace(3) global %struct.T undef,
5252
// HOST: @s_t = internal global %struct.T undef,
5353
__constant__ T c_t;
54-
// DEVICE: @c_t ={{.*}} addrspace(4) externally_initialized global %struct.T zeroinitializer,
54+
// DEVICE: @c_t ={{.*}} addrspace(4) externally_initialized constant %struct.T zeroinitializer,
5555
// HOST: @c_t = internal global %struct.T undef,
5656

5757
__device__ T d_t_i = {2};
5858
// DEVICE: @d_t_i ={{.*}} addrspace(1) externally_initialized global %struct.T { i32 2 },
5959
// HOST: @d_t_i = internal global %struct.T undef,
6060
__constant__ T c_t_i = {2};
61-
// DEVICE: @c_t_i ={{.*}} addrspace(4) externally_initialized global %struct.T { i32 2 },
61+
// DEVICE: @c_t_i ={{.*}} addrspace(4) externally_initialized constant %struct.T { i32 2 },
6262
// HOST: @c_t_i = internal global %struct.T undef,
6363

6464
// empty constructor
@@ -69,7 +69,7 @@ __shared__ EC s_ec;
6969
// DEVICE: @s_ec ={{.*}} addrspace(3) global %struct.EC undef,
7070
// HOST: @s_ec = internal global %struct.EC undef,
7171
__constant__ EC c_ec;
72-
// DEVICE: @c_ec ={{.*}} addrspace(4) externally_initialized global %struct.EC zeroinitializer,
72+
// DEVICE: @c_ec ={{.*}} addrspace(4) externally_initialized constant %struct.EC zeroinitializer,
7373
// HOST: @c_ec = internal global %struct.EC undef
7474

7575
// empty destructor
@@ -80,7 +80,7 @@ __shared__ ED s_ed;
8080
// DEVICE: @s_ed ={{.*}} addrspace(3) global %struct.ED undef,
8181
// HOST: @s_ed = internal global %struct.ED undef,
8282
__constant__ ED c_ed;
83-
// DEVICE: @c_ed ={{.*}} addrspace(4) externally_initialized global %struct.ED zeroinitializer,
83+
// DEVICE: @c_ed ={{.*}} addrspace(4) externally_initialized constant %struct.ED zeroinitializer,
8484
// HOST: @c_ed = internal global %struct.ED undef,
8585

8686
__device__ ECD d_ecd;
@@ -90,7 +90,7 @@ __shared__ ECD s_ecd;
9090
// DEVICE: @s_ecd ={{.*}} addrspace(3) global %struct.ECD undef,
9191
// HOST: @s_ecd = internal global %struct.ECD undef,
9292
__constant__ ECD c_ecd;
93-
// DEVICE: @c_ecd ={{.*}} addrspace(4) externally_initialized global %struct.ECD zeroinitializer,
93+
// DEVICE: @c_ecd ={{.*}} addrspace(4) externally_initialized constant %struct.ECD zeroinitializer,
9494
// HOST: @c_ecd = internal global %struct.ECD undef,
9595

9696
// empty templated constructor -- allowed with no arguments
@@ -101,14 +101,14 @@ __shared__ ETC s_etc;
101101
// DEVICE: @s_etc ={{.*}} addrspace(3) global %struct.ETC undef,
102102
// HOST: @s_etc = internal global %struct.ETC undef,
103103
__constant__ ETC c_etc;
104-
// DEVICE: @c_etc ={{.*}} addrspace(4) externally_initialized global %struct.ETC zeroinitializer,
104+
// DEVICE: @c_etc ={{.*}} addrspace(4) externally_initialized constant %struct.ETC zeroinitializer,
105105
// HOST: @c_etc = internal global %struct.ETC undef,
106106

107107
__device__ NCFS d_ncfs;
108108
// DEVICE: @d_ncfs ={{.*}} addrspace(1) externally_initialized global %struct.NCFS { i32 3 }
109109
// HOST: @d_ncfs = internal global %struct.NCFS undef,
110110
__constant__ NCFS c_ncfs;
111-
// DEVICE: @c_ncfs ={{.*}} addrspace(4) externally_initialized global %struct.NCFS { i32 3 }
111+
// DEVICE: @c_ncfs ={{.*}} addrspace(4) externally_initialized constant %struct.NCFS { i32 3 }
112112
// HOST: @c_ncfs = internal global %struct.NCFS undef,
113113

114114
// Regular base class -- allowed
@@ -119,7 +119,7 @@ __shared__ T_B_T s_t_b_t;
119119
// DEVICE: @s_t_b_t ={{.*}} addrspace(3) global %struct.T_B_T undef,
120120
// HOST: @s_t_b_t = internal global %struct.T_B_T undef,
121121
__constant__ T_B_T c_t_b_t;
122-
// DEVICE: @c_t_b_t ={{.*}} addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer,
122+
// DEVICE: @c_t_b_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_B_T zeroinitializer,
123123
// HOST: @c_t_b_t = internal global %struct.T_B_T undef,
124124

125125
// Incapsulated object of allowed class -- allowed
@@ -130,7 +130,7 @@ __shared__ T_F_T s_t_f_t;
130130
// DEVICE: @s_t_f_t ={{.*}} addrspace(3) global %struct.T_F_T undef,
131131
// HOST: @s_t_f_t = internal global %struct.T_F_T undef,
132132
__constant__ T_F_T c_t_f_t;
133-
// DEVICE: @c_t_f_t ={{.*}} addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer,
133+
// DEVICE: @c_t_f_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_F_T zeroinitializer,
134134
// HOST: @c_t_f_t = internal global %struct.T_F_T undef,
135135

136136
// array of allowed objects -- allowed
@@ -141,7 +141,7 @@ __shared__ T_FA_T s_t_fa_t;
141141
// DEVICE: @s_t_fa_t ={{.*}} addrspace(3) global %struct.T_FA_T undef,
142142
// HOST: @s_t_fa_t = internal global %struct.T_FA_T undef,
143143
__constant__ T_FA_T c_t_fa_t;
144-
// DEVICE: @c_t_fa_t ={{.*}} addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer,
144+
// DEVICE: @c_t_fa_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_FA_T zeroinitializer,
145145
// HOST: @c_t_fa_t = internal global %struct.T_FA_T undef,
146146

147147

@@ -153,7 +153,7 @@ __shared__ EC_I_EC s_ec_i_ec;
153153
// DEVICE: @s_ec_i_ec ={{.*}} addrspace(3) global %struct.EC_I_EC undef,
154154
// HOST: @s_ec_i_ec = internal global %struct.EC_I_EC undef,
155155
__constant__ EC_I_EC c_ec_i_ec;
156-
// DEVICE: @c_ec_i_ec ={{.*}} addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
156+
// DEVICE: @c_ec_i_ec ={{.*}} addrspace(4) externally_initialized constant %struct.EC_I_EC zeroinitializer,
157157
// HOST: @c_ec_i_ec = internal global %struct.EC_I_EC undef,
158158

159159
// DEVICE: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef

clang/test/CodeGenCUDA/device-var-linkage.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
// NORDC-H-DAG: @v1 = internal global i32 undef
2121
// RDC-H-DAG: @v1 = global i32 undef
2222
__device__ int v1;
23-
// DEV-DAG: @v2 = addrspace(4) externally_initialized global i32 0
23+
// DEV-DAG: @v2 = addrspace(4) externally_initialized constant i32 0
2424
// NORDC-H-DAG: @v2 = internal global i32 undef
2525
// RDC-H-DAG: @v2 = global i32 undef
2626
__constant__ int v2;
@@ -48,10 +48,10 @@ extern __managed__ int ev3;
4848
// HOST-DAG: @_ZL3sv1 = internal global i32 undef
4949
// CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
5050
static __device__ int sv1;
51-
// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0
52-
// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
51+
// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized constant i32 0
52+
// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0
5353
// HOST-DAG: @_ZL3sv2 = internal global i32 undef
54-
// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
54+
// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized constant i32 0
5555
static __constant__ int sv2;
5656
// NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global ptr addrspace(1) null
5757
// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global ptr addrspace(1) null

clang/test/CodeGenCUDA/filter-decl.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
__asm__("file scope asm is host only");
1111

1212
// CHECK-HOST: constantdata = internal global
13-
// CHECK-DEVICE: constantdata = {{(dso_local )?}}externally_initialized global
13+
// CHECK-DEVICE: constantdata = {{(dso_local )?}}externally_initialized constant
1414
__constant__ char constantdata[256];
1515

1616
// CHECK-HOST: devicedata = internal global

clang/test/CodeGenCUDA/static-device-var-no-rdc.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ static __device__ int x5;
5050
}
5151

5252
// Check a static constant variable referenced by host is externalized.
53-
// DEV-DAG: @_ZL1y ={{.*}} addrspace(4) externally_initialized global i32 0
53+
// DEV-DAG: @_ZL1y ={{.*}} addrspace(4) externally_initialized constant i32 0
5454
// HOST-DAG: @_ZL1y = internal global i32 undef
5555
// HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
5656

clang/test/CodeGenCUDA/static-device-var-rdc.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,11 @@ static __device__ int x;
8181
static __device__ int x2;
8282

8383
// Test normal static device variables
84-
// INT-DEV-DAG: @_ZL1y[[FILEID:.*]] = addrspace(4) externally_initialized global i32 0
84+
// INT-DEV-DAG: @_ZL1y[[FILEID:.*]] = addrspace(4) externally_initialized constant i32 0
8585
// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y[[FILEID:.*]]\00"
8686

8787
// Test externalized static device variables
88-
// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
88+
// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0
8989
// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00"
9090

9191
static __constant__ int y;

clang/test/CodeGenCUDA/template-class-static-member.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ const int A<T>::const_member;
3838
template class A<int>;
3939

4040
//DEV-DAG: @_ZN1AIiE8d_memberE = internal addrspace(1) global i32 0, comdat, align 4
41-
//DEV-DAG: @_ZN1AIiE8c_memberE = internal addrspace(4) global i32 0, comdat, align 4
41+
//DEV-DAG: @_ZN1AIiE8c_memberE = internal addrspace(4) constant i32 0, comdat, align 4
4242
//DEV-DAG: @_ZN1AIiE8m_memberE = internal addrspace(1) externally_initialized global ptr addrspace(1) null
4343
//DEV-DAG: @_ZN1AIiE12const_memberE = internal addrspace(4) constant i32 0, comdat, align 4
4444
//DEV-NEG-NOT: @_ZN1AIiE8h_memberE

0 commit comments

Comments
 (0)