Skip to content

Commit a536022

Browse files
authored
[libclc] Change _CLC_DECL to _CLC_DEF for some PTX builtin definitions (#4580)
Change _CLC_DECL (meant for declaration) to _CLC_DEF. The macro definition of _CLC_DEF adds an `always_inline` attribute needed for optimizations. Signed-off-by: Victor Lomuller <[email protected]>
1 parent 51b450c commit a536022

File tree

3 files changed

+28
-27
lines changed

3 files changed

+28
-27
lines changed

libclc/ptx-nvidiacl/libspirv/SPV_EXT_shader_atomic_float_add/atomicfaddext.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,14 @@ float __clc__atomic_fetch_add_float_local_seq_cst(__local float *, float) __asm(
6565
__CLC_ATOMICFADDEXT(float, global)
6666
__CLC_ATOMICFADDEXT(float, local)
6767

68-
_CLC_DECL float
68+
_CLC_DEF float
6969
_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(
7070
__global float *pointer, unsigned int scope, unsigned int semantics,
7171
float value) {
7272
return __spirv_AtomicFAddEXT(pointer, scope, semantics, value);
7373
}
7474

75-
_CLC_DECL float
75+
_CLC_DEF float
7676
_Z21__spirv_AtomicFAddEXTPU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(
7777
__local float *pointer, unsigned int scope, unsigned int semantics,
7878
float value) {
@@ -116,15 +116,15 @@ double __clc__atomic_fetch_add_double_local_seq_cst(
116116
__CLC_ATOMICFADDEXT(double, global)
117117
__CLC_ATOMICFADDEXT(double, local)
118118

119-
_CLC_DECL double
119+
_CLC_DEF double
120120
_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(
121121
__global double *pointer, unsigned int scope, unsigned int semantics,
122122
double value) {
123123
// FIXME: Double-precision atomics must be emulated for __CUDA_ARCH <= sm_50
124124
return __spirv_AtomicFAddEXT(pointer, scope, semantics, value);
125125
}
126126

127-
_CLC_DECL double
127+
_CLC_DEF double
128128
_Z21__spirv_AtomicFAddEXTPU3AS3dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(
129129
__local double *pointer, unsigned int scope, unsigned int semantics,
130130
double value) {

libclc/ptx-nvidiacl/libspirv/group/collectives.cl

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -369,16 +369,16 @@ __CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, float, -FLT_MAX)
369369
__CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX)
370370

371371
// half requires additional mangled entry points
372-
_CLC_DECL _CLC_CONVERGENT half _Z17__spirv_GroupFAddjjDF16_(uint scope, uint op,
373-
half x) {
372+
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFAddjjDF16_(uint scope, uint op,
373+
half x) {
374374
return __spirv_GroupFAdd(scope, op, x);
375375
}
376-
_CLC_DECL _CLC_CONVERGENT half _Z17__spirv_GroupFMinjjDF16_(uint scope, uint op,
377-
half x) {
376+
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFMinjjDF16_(uint scope, uint op,
377+
half x) {
378378
return __spirv_GroupFMin(scope, op, x);
379379
}
380-
_CLC_DECL _CLC_CONVERGENT half _Z17__spirv_GroupFMaxjjDF16_(uint scope, uint op,
381-
half x) {
380+
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFMaxjjDF16_(uint scope, uint op,
381+
half x) {
382382
return __spirv_GroupFMax(scope, op, x);
383383
}
384384

@@ -461,20 +461,21 @@ __CLC_GROUP_BROADCAST(float)
461461
__CLC_GROUP_BROADCAST(double)
462462

463463
// half requires additional mangled entry points
464-
_CLC_DECL _CLC_CONVERGENT half
464+
_CLC_DEF _CLC_CONVERGENT half
465465
_Z17__spirv_GroupBroadcastjDF16_m(uint scope, half x, ulong local_id) {
466466
return __spirv_GroupBroadcast(scope, x, local_id);
467467
}
468-
_CLC_DECL _CLC_CONVERGENT half
468+
_CLC_DEF _CLC_CONVERGENT half
469469
_Z17__spirv_GroupBroadcastjDF16_Dv2_m(uint scope, half x, ulong2 local_id) {
470470
return __spirv_GroupBroadcast(scope, x, local_id);
471471
}
472-
_CLC_DECL _CLC_CONVERGENT half
472+
_CLC_DEF _CLC_CONVERGENT half
473473
_Z17__spirv_GroupBroadcastjDF16_Dv3_m(uint scope, half x, ulong3 local_id) {
474474
return __spirv_GroupBroadcast(scope, x, local_id);
475475
}
476-
_CLC_DECL _CLC_CONVERGENT half
477-
_Z22__spirv_GroupBroadcastjDF16_j(uint scope, half x, uint local_id) {
476+
_CLC_DEF _CLC_CONVERGENT half _Z22__spirv_GroupBroadcastjDF16_j(uint scope,
477+
half x,
478+
uint local_id) {
478479
return __spirv_GroupBroadcast(scope, x, (ulong)local_id);
479480
}
480481

libclc/ptx-nvidiacl/libspirv/images/image.cl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -319,15 +319,15 @@ _DEFINE_READ_3D_PIXELF(16, clamp)
319319

320320
// Unsampled images
321321
#define _CLC_DEFINE_IMAGE1D_READ_BUILTIN(elem_t, elem_t_mangled, elem_size) \
322-
_CLC_DECL \
322+
_CLC_DEF \
323323
elem_t##4 _Z17__spirv_ImageReadIDv4_##elem_t_mangled##14ocl_image1d_roiET_T0_T1_( \
324324
read_only image1d_t image, int x) { \
325325
return out_##elem_t( \
326326
__nvvm_suld_1d_v4i##elem_size##_clamp(image, x * sizeof(elem_t##4))); \
327327
}
328328

329329
#define _CLC_DEFINE_IMAGE2D_READ_BUILTIN(elem_t, elem_t_mangled, elem_size) \
330-
_CLC_DECL \
330+
_CLC_DEF \
331331
elem_t##4 _Z17__spirv_ImageReadIDv4_##elem_t_mangled##14ocl_image2d_roDv2_iET_T0_T1_( \
332332
read_only image2d_t image, int2 coord) { \
333333
return out_##elem_t(__nvvm_suld_2d_v4i##elem_size##_clamp( \
@@ -336,7 +336,7 @@ _DEFINE_READ_3D_PIXELF(16, clamp)
336336

337337
#define _CLC_DEFINE_IMAGE3D_READ_BUILTIN(elem_t, elem_t_mangled, elem_size, \
338338
coord_mangled) \
339-
_CLC_DECL \
339+
_CLC_DEF \
340340
elem_t##4 _Z17__spirv_ImageReadIDv4_##elem_t_mangled##14ocl_image3d_ro##coord_mangled##ET_T0_T1_( \
341341
read_only image3d_t image, int4 coord) { \
342342
return out_##elem_t(__nvvm_suld_3d_v4i##elem_size##_clamp( \
@@ -345,7 +345,7 @@ _DEFINE_READ_3D_PIXELF(16, clamp)
345345

346346
#define _CLC_DEFINE_IMAGE1D_WRITE_BUILTIN(elem_t, elem_t_mangled, elem_size, \
347347
int_rep) \
348-
_CLC_DECL void \
348+
_CLC_DEF void \
349349
_Z18__spirv_ImageWriteI14ocl_image1d_woiDv4_##elem_t_mangled##EvT_T0_T1_( \
350350
write_only image1d_t image, int x, elem_t##4 c) { \
351351
__nvvm_sust_1d_v4i##elem_size##_clamp( \
@@ -355,7 +355,7 @@ _DEFINE_READ_3D_PIXELF(16, clamp)
355355

356356
#define _CLC_DEFINE_IMAGE2D_WRITE_BUILTIN(elem_t, elem_t_mangled, elem_size, \
357357
int_rep) \
358-
_CLC_DECL void \
358+
_CLC_DEF void \
359359
_Z18__spirv_ImageWriteI14ocl_image2d_woDv2_iDv4_##elem_t_mangled##EvT_T0_T1_( \
360360
write_only image2d_t image, int2 coord, elem_t##4 c) { \
361361
__nvvm_sust_2d_v4i##elem_size##_clamp( \
@@ -365,7 +365,7 @@ _DEFINE_READ_3D_PIXELF(16, clamp)
365365

366366
#define _CLC_DEFINE_IMAGE3D_WRITE_BUILTIN(elem_t, elem_t_mangled, elem_size, \
367367
int_rep, val_mangled) \
368-
_CLC_DECL void \
368+
_CLC_DEF void \
369369
_Z18__spirv_ImageWriteI14ocl_image3d_woDv4_i##val_mangled##EvT_T0_T1_( \
370370
write_only image3d_t image, int4 coord, elem_t##4 c) { \
371371
__nvvm_sust_3d_v4i##elem_size##_clamp( \
@@ -414,7 +414,7 @@ _CLC_DEFINE_IMAGE3D_WRITE_BUILTIN(half, DF16_, 16, short, Dv4_DF16_)
414414

415415
// Sampled images
416416
#define _CLC_DEFINE_SAMPLED_IMAGE_BUILTIN(dims) \
417-
_CLC_DECL __ocl_sampled_image##dims##d_ro_t \
417+
_CLC_DEF __ocl_sampled_image##dims##d_ro_t \
418418
_Z20__spirv_SampledImageI14ocl_image##dims##d_ro32__spirv_SampledImage__image##dims##d_roET0_T_11ocl_sampler( \
419419
read_only image##dims##d_t image, sampler_t sampler) { \
420420
return __clc__sampled_image##dims##d_pack(image, sampler); \
@@ -899,7 +899,7 @@ _DEFINE_SAMPLED_LOADS(half, 16)
899899
#define _CLC_DEFINE_IMAGE_SAMPLED_READ_BUILTIN( \
900900
elem_t, elem_t_mangled, dims, input_coord_t, input_coord_t_mangled, \
901901
sampling_coord_t) \
902-
_CLC_DECL \
902+
_CLC_DEF \
903903
elem_t##4 _Z30__spirv_ImageSampleExplicitLodI32__spirv_SampledImage__image##dims##d_roDv4_##elem_t_mangled##input_coord_t_mangled##ET0_T_T1_if( \
904904
__ocl_sampled_image##dims##d_ro_t sampled_image, \
905905
input_coord_t input_coord, int operands, float lod) { \
@@ -973,22 +973,22 @@ _CLC_DEFINE_IMAGE_SAMPLED_READ_BUILTIN(half, DF16_, 3, int4, Dv4_i, float4)
973973
#undef _CLC_DEFINE_IMAGE_SAMPLED_READ_BUILTIN
974974

975975
// Size Queries
976-
_CLC_DECL int _Z22__spirv_ImageQuerySizeIDv1_i14ocl_image1d_roET_T0_(
976+
_CLC_DEF int _Z22__spirv_ImageQuerySizeIDv1_i14ocl_image1d_roET_T0_(
977977
read_only image1d_t image) {
978978
return __nvvm_suq_width_1i(image);
979979
}
980980

981-
_CLC_DECL int2 _Z22__spirv_ImageQuerySizeIDv2_i14ocl_image2d_roET_T0_(
981+
_CLC_DEF int2 _Z22__spirv_ImageQuerySizeIDv2_i14ocl_image2d_roET_T0_(
982982
read_only image2d_t image) {
983983
int width = __nvvm_suq_width_2i(image);
984984
int height = __nvvm_suq_height_2i(image);
985985
return (int2)(width, height);
986986
}
987987

988-
_CLC_DECL int3 _Z22__spirv_ImageQuerySizeIDv3_i14ocl_image3d_roET_T0_(
988+
_CLC_DEF int3 _Z22__spirv_ImageQuerySizeIDv3_i14ocl_image3d_roET_T0_(
989989
read_only image3d_t image) {
990990
int width = __nvvm_suq_width_3i(image);
991991
int height = __nvvm_suq_height_3i(image);
992992
int depth = __nvvm_suq_depth_3i(image);
993993
return (int3)(width, height, depth);
994-
}
994+
}

0 commit comments

Comments
 (0)