Skip to content

Commit 7429b97

Browse files
author
anikelal
committed
Removed always-inline attribute from stubs and made required littest changes
1 parent 2cab779 commit 7429b97

18 files changed

+2124
-1962
lines changed

clang/lib/AST/MicrosoftMangle.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1163,7 +1163,7 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
11631163
->hasAttr<CUDAGlobalAttr>())) &&
11641164
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
11651165
bool IsOCLDeviceStub =
1166-
ND && (isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>()) &&
1166+
ND && isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>() &&
11671167
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
11681168
if (IsDeviceStub)
11691169
mangleSourceName(

clang/lib/CodeGen/CGCall.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2502,7 +2502,12 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
25022502
NumElemsParam);
25032503
}
25042504

2505-
if (TargetDecl->hasAttr<OpenCLKernelAttr>()) {
2505+
if (TargetDecl->hasAttr<OpenCLKernelAttr>() &&
2506+
CallingConv != CallingConv::CC_C &&
2507+
CallingConv !=
2508+
CallingConv::CC_SpirFunction) { // Check CallingConv to avoid adding
2509+
// uniform-work-group-size attribute
2510+
// to OpenCL Kernel Stub
25062511
if (getLangOpts().OpenCLVersion <= 120) {
25072512
// OpenCL v1.2 Work groups are always uniform
25082513
FuncAttrs.addAttribute("uniform-work-group-size", "true");

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6133,11 +6133,9 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
61336133

61346134
const auto *FnType = cast<FunctionType>(PointeeType);
61356135

6136-
if (auto FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
6137-
if (FD->hasAttr<OpenCLKernelAttr>()) {
6138-
CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FnType);
6139-
}
6140-
}
6136+
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
6137+
FD && FD->hasAttr<OpenCLKernelAttr>())
6138+
CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FnType);
61416139

61426140
// If we are checking indirect calls and this call is indirect, check that the
61436141
// function pointer is a member of the bit set for the function type.

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6151,16 +6151,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
61516151

61526152
setNonAliasAttributes(GD, Fn);
61536153

6154-
if (D->hasAttr<OpenCLKernelAttr>()) {
6155-
if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
6156-
if (Fn->hasFnAttribute(llvm::Attribute::NoInline))
6157-
Fn->removeFnAttr(llvm::Attribute::NoInline);
6158-
if (Fn->hasFnAttribute(llvm::Attribute::InlineHint))
6159-
Fn->removeFnAttr(llvm::Attribute::InlineHint);
6160-
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
6161-
}
6162-
}
6163-
61646154
SetLLVMFunctionAttributesForDefinition(D, Fn);
61656155

61666156
if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())

clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

Lines changed: 1571 additions & 1120 deletions
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl

Lines changed: 218 additions & 138 deletions
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

Lines changed: 102 additions & 104 deletions
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
kernel void ker() {};
77
// CHECK: define{{.*}}@ker() #[[ATTR0:[0-9]+]]
8+
// CHECK: call void @__clang_ocl_kern_imp_ker() #[[ATTR2:[0-9]+]]
9+
10+
// CHECK: define{{.*}}@__clang_ocl_kern_imp_ker() #[[ATTR1:[0-9]+]]
811

912
void foo() {};
1013
// CHECK: define{{.*}}@foo() #[[ATTR1:[0-9]+]]
@@ -15,3 +18,6 @@ void foo() {};
1518

1619
// CHECK: attributes #[[ATTR1]]
1720
// CHECK-NOT: uniform-work-group-size
21+
22+
// CHECK: attributes #[[ATTR2]]
23+
// CHECK-NOT: uniform-work-group-size

clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl

Lines changed: 40 additions & 102 deletions
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR
2-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR
3-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES
4-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR
5-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR
6-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES
7-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86
8-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86
9-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefix=CHECK-LIFETIMES
1+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR
2+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR
3+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR
4+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR
5+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR
6+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR
7+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86
8+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86
9+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLEX86
1010

1111
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
1212

@@ -39,6 +39,12 @@ void callee(int id, __global int *out) {
3939
out[id] = id;
4040
}
4141

42+
// TRIPLESPIR: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
43+
// TRIPLESPIR: call spir_func void @__clang_ocl_kern_imp_device_side_enqueue({{.*}})
44+
45+
// TRIPLEX86: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
46+
// TRIPLEX86: call void @__clang_ocl_kern_imp_device_side_enqueue({{.*}})
47+
4248
// COMMON-LABEL: define{{.*}} void @__clang_ocl_kern_imp_device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
4349
kernel void device_side_enqueue(global int *a, global int *b, int i) {
4450
// SPIR: %default_queue = alloca target("spirv.Queue")

clang/test/CodeGenOpenCL/convergent.cl

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ void test_not_unroll() {
127127
// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
128128

129129
// CHECK-LABEL: @assume_convergent_asm
130-
// CHECK: tail call void asm sideeffect "s_barrier", ""() #6
130+
// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
131131
kernel void assume_convergent_asm()
132132
{
133133
__asm__ volatile("s_barrier");
@@ -138,7 +138,6 @@ kernel void assume_convergent_asm()
138138
// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
139139
// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
140140
// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
141-
// CHECK: attributes #5 = { {{[^}]*}}alwaysinline convergent{{[^}]*}} }
142-
// CHECK: attributes #6 = { {{[^}]*}}convergent{{[^}]*}} }
143-
// CHECK: attributes #7 = { {{[^}]*}}nounwind{{[^}]*}} }
144-
// CHECK: attributes #8 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} }
141+
// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
142+
// CHECK: attributes #6 = { {{[^}]*}}nounwind{{[^}]*}} }
143+
// CHECK: attributes #7 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} }

clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,15 @@
99
typedef struct {int a;} ndrange_t;
1010

1111
kernel void test(int i) {
12+
1213
// AMDGPU-LABEL: define {{.*}} amdgpu_kernel void @test
14+
// AMDGPU-LABEL: call void @__clang_ocl_kern_imp_test(i32 noundef %0)
15+
1316
// SPIR-LABEL: define {{.*}} spir_kernel void @test
17+
// SPIR-LABEL: call spir_func void @__clang_ocl_kern_imp_test(i32 noundef %0)
18+
19+
// AMDGPU-LABEL: define {{.*}} void @__clang_ocl_kern_imp_test
20+
// SPIR-LABEL: define {{.*}} spir_func void @__clang_ocl_kern_imp_test
1421

1522
// COMMON-LABEL: entry:
1623
// AMDGPU: %block_sizes = alloca [1 x i64]
@@ -37,5 +44,5 @@ kernel void test(int i) {
3744

3845
// CHECK-DEBUG: ![[TESTFILE:[0-9]+]] = !DIFile(filename: "<stdin>"
3946
// CHECK-DEBUG: ![[TESTSCOPE:[0-9]+]] = distinct !DISubprogram(name: "test", linkageName: "__clang_ocl_kern_imp_test", {{.*}} file: ![[TESTFILE]]
40-
// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 26)
41-
// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 27, scope: ![[IFSCOPE]])
47+
// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 33)
48+
// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 34, scope: ![[IFSCOPE]])

clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ __kernel void use_of_local_var()
5757
// CHECK-LABEL: define dso_local amdgpu_kernel void @use_of_local_var(
5858
// CHECK-SAME: ) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] {
5959
// CHECK-NEXT: [[ENTRY:.*:]]
60-
// CHECK-NEXT: call void @__clang_ocl_kern_imp_use_of_local_var() #[[ATTR7:[0-9]+]]
60+
// CHECK-NEXT: call void @__clang_ocl_kern_imp_use_of_local_var() #[[ATTR6]]
6161
// CHECK-NEXT: ret void
6262
//
6363
//

0 commit comments

Comments
 (0)