-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[NFC][AMDGPU] Pre-commit clang and llvm tests for dynamic allocas #120063
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
9ebbeba
to
be3f799
Compare
be3f799
to
999d6ff
Compare
@llvm/pr-subscribers-clang @llvm/pr-subscribers-llvm-globalisel Author: Aaditya (easyonaadit) ChangesFor #119822 Patch is 39.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120063.diff 4 Files Affected:
diff --git a/clang/test/CodeGenHIP/dynamic-alloca.cpp b/clang/test/CodeGenHIP/dynamic-alloca.cpp
new file mode 100644
index 00000000000000..4bbc6b2e69917f
--- /dev/null
+++ b/clang/test/CodeGenHIP/dynamic-alloca.cpp
@@ -0,0 +1,532 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+
+#define __global__ __attribute__((global))
+#define __device__ __attribute__((device))
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z34kernel_function_builtin_alloca_immv(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 40, align 8, addrspace(5)
+// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
+// CHECK-NEXT: store ptr [[TMP1]], ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0
+// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+__global__ void kernel_function_builtin_alloca_imm(){
+ volatile int *alloca = static_cast<volatile int*>(__builtin_alloca(10*sizeof(int)));
+ static_cast<volatile int*>(alloca)[0] = 10;
+}
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z50kernel_function_non_entry_block_builtin_alloca_immPi(
+// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+// CHECK-NEXT: [[ALLOCA2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA2]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10
+// CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+// CHECK: [[IF_THEN]]:
+// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 40, align 8, addrspace(5)
+// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
+// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0
+// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: br label %[[IF_END:.*]]
+// CHECK: [[IF_ELSE]]:
+// CHECK-NEXT: [[TMP5:%.*]] = alloca i8, i64 80, align 8, addrspace(5)
+// CHECK-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(5) [[TMP5]] to ptr
+// CHECK-NEXT: store ptr [[TMP6]], ptr [[ALLOCA2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ALLOCA2_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0
+// CHECK-NEXT: store volatile i32 20, ptr [[ARRAYIDX3]], align 4
+// CHECK-NEXT: br label %[[IF_END]]
+// CHECK: [[IF_END]]:
+// CHECK-NEXT: ret void
+//
+__global__ void kernel_function_non_entry_block_builtin_alloca_imm(int* a){
+ if(*a < 10){
+ volatile void *alloca = __builtin_alloca(10*sizeof(int));
+ static_cast<volatile int*>(alloca)[0] = 10;
+ }
+ else {
+ volatile void *alloca = __builtin_alloca(20*sizeof(int));
+ static_cast<volatile int*>(alloca)[0] = 20;
+ }
+}
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z30kernel_function_builtin_allocaPi(
+// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4
+// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
+// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0
+// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+__global__ void kernel_function_builtin_alloca(int* a){
+ volatile void *alloca = __builtin_alloca((*a)*sizeof(int));
+ static_cast<volatile int*>(alloca)[0] = 10;
+}
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z44kernel_function_builtin_alloca_uninitializedPi(
+// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4
+// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
+// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 0
+// CHECK-NEXT: store volatile float 1.000000e+01, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+__global__ void kernel_function_builtin_alloca_uninitialized(int* a){
+ volatile void *alloca = __builtin_alloca_uninitialized((*a)*sizeof(float));
+ static_cast<volatile float*>(alloca)[0] = 10.0;
+}
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z44kernel_function_builtin_alloca_default_alignPi(
+// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 8
+// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
+// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 0
+// CHECK-NEXT: store volatile i64 10, ptr [[ARRAYIDX]], align 8
+// CHECK-NEXT: ret void
+//
+__global__ void kernel_function_builtin_alloca_default_align(int* a){
+ volatile void *alloca = __builtin_alloca_with_align((*a)*sizeof(long), 64);
+ static_cast<volatile long*>(alloca)[0] = 10;
+}
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z48kernel_function_builtin_alloca_non_default_alignPi(
+// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4
+// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[MUL]], align 32, addrspace(5)
+// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
+// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0
+// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+__global__ void kernel_function_builtin_alloca_non_default_align(int* a){
+ volatile void *alloca = __builtin_alloca_with_align((*a)*sizeof(unsigned), 256);
+ static_cast<volatile unsigned*>(alloca)[0] = 10;
+}
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z62kernel_function_builtin_alloca_non_default_align_uninitializedPi(
+// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4
+// CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[MUL]], align 32, addrspace(5)
+// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
+// CHECK-NEXT: store ptr [[TMP3]], ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0
+// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+__global__ void kernel_function_builtin_alloca_non_default_align_uninitialized(int* a){
+ volatile void *alloca = __builtin_alloca_with_align_uninitialized((*a)*sizeof(unsigned), 256);
+ static_cast<volatile unsigned*>(alloca)[0] = 10;
+}
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z35kernel_function_variable_size_arrayPi(
+// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[SAVED_STACK:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// CHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[SAVED_STACK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SAVED_STACK]] to ptr
+// CHECK-NEXT: [[__VLA_EXPR0_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__VLA_EXPR0]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(5) @llvm.stacksave.p5()
+// CHECK-NEXT: store ptr addrspace(5) [[TMP3]], ptr [[SAVED_STACK_ASCAST]], align 4
+// CHECK-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP2]], align 4, addrspace(5)
+// CHECK-NEXT: [[VLA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA]] to ptr
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA_ASCAST]], i64 2
+// CHECK-NEXT: store i32 10, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(5), ptr [[SAVED_STACK_ASCAST]], align 4
+// CHECK-NEXT: call void @llvm.stackrestore.p5(ptr addrspace(5) [[TMP4]])
+// CHECK-NEXT: ret void
+//
+__global__ void kernel_function_variable_size_array(int* a){
+ int arr[*a];
+ arr[2] = 10;
+}
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z51kernel_function_non_entry_block_static_sized_allocaPi(
+// CHECK-SAME: ptr addrspace(1) noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+// CHECK-NEXT: [[ALLOCA2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA2]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: [[A1:%.*]] = load ptr, ptr [[A_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[A1]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10
+// CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+// CHECK: [[IF_THEN]]:
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP3]] to i64
+// CHECK-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4
+// CHECK-NEXT: [[TMP4:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
+// CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(5) [[TMP4]] to ptr
+// CHECK-NEXT: store ptr [[TMP5]], ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0
+// CHECK-NEXT: store volatile i32 10, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: br label %[[IF_END:.*]]
+// CHECK: [[IF_ELSE]]:
+// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK-NEXT: [[MUL3:%.*]] = mul nsw i32 2, [[TMP8]]
+// CHECK-NEXT: [[CONV4:%.*]] = sext i32 [[MUL3]] to i64
+// CHECK-NEXT: [[MUL5:%.*]] = mul i64 [[CONV4]], 4
+// CHECK-NEXT: [[TMP9:%.*]] = alloca i8, i64 [[MUL5]], align 8, addrspace(5)
+// CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(5) [[TMP9]] to ptr
+// CHECK-NEXT: store ptr [[TMP10]], ptr [[ALLOCA2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ALLOCA2_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0
+// CHECK-NEXT: store volatile i32 20, ptr [[ARRAYIDX6]], align 4
+// CHECK-NEXT: br label %[[IF_END]]
+// CHECK: [[IF_END]]:
+// CHECK-NEXT: ret void
+//
+__global__ void kernel_function_non_entry_block_static_sized_alloca(int* a){
+ if(*a < 10){
+ volatile void *alloca = __builtin_alloca((*a)*sizeof(int));
+ static_cast<volatile int*>(alloca)[0] = 10;
+ }
+ else {
+ volatile void *alloca = __builtin_alloca(2*(*a)*sizeof(int));
+ static_cast<volatile int*>(alloca)[0] = 20;
+ }
+}
+
+// CHECK-LABEL: define dso_local void @_Z50device_function_non_entry_block_builtin_alloca_immv(
+// CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 10, align 8, addrspace(5)
+// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr
+// CHECK-NEXT: store ptr [[TMP1]], ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ALLOCA_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0
+// CHECK-NEXT: store i32 10, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+__device__ void device_function_non_entry_block_builtin_alloca_imm(){
+ int *alloca = static_cast<int *>(__builtin_alloca(10));
+ alloca[0] = 10;
+}
+
+// CHECK-LABEL: define dso_local void @_Z30device_function_builtin_allocaPi(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ALLOCA:...
[truncated]
|
@@ -0,0 +1,532 @@ | |||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 | |||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think there are already some clang tests for this, not sure what the point of this part is. We do need unit tests which execute the code
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We do need unit tests which execute the code
are you referring to hip-tests here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think there are already some clang tests for this
Yes, there are some OpenCL tests similar to this.
not sure what the point of this part is.
I was looking to test these builtins for HIP. Is this needed?
We do need unit tests which execute the code
We have plans of adding hip-tests, or do you want the code to be exercised in the llvm-test-suite
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess 51a014c didn't add any hip tests, so I guess we need some.
We have plans of adding hip-tests, or do you want the code to be exercised in the llvm-test-suite
llvm-test-suite would be better
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The hip-tests and the llvm-test-suite will be seperate patches. For now, I have removed this file.
ecae52c
to
ef99f48
Compare
llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
Outdated
Show resolved
Hide resolved
ef99f48
to
6171059
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/10388 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/12501 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/10388 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/10519 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/14775 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/13297 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/15483 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/10896 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/108/builds/7269 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/8484 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/17908 Here is the relevant piece of the build log for the reference
|
…120393) Machine-Verifier crashes in kernel functions, but fails gracefully in device functions. This is due to the buffer resource descriptor selected during G-ISEL, before the fallback path. Device functions use `$sgpr0_sgpr1_sgpr2_sgpr3`. while Kernel functions select `$private_rsrc_reg` where machine-verifier complains: `$private_rsrc_reg is not a SReg_128 register.` Modifying test case to capture both behaviors, this is related to #120063
…dynamic allocas" (#120369) Reverts llvm/llvm-project#120063 due to build-bot failures
…ifier bug (#120393) Machine-Verifier crashes in kernel functions, but fails gracefully in device functions. This is due to the buffer resource descriptor selected during G-ISEL, before the fallback path. Device functions use `$sgpr0_sgpr1_sgpr2_sgpr3`. while Kernel functions select `$private_rsrc_reg` where machine-verifier complains: `$private_rsrc_reg is not a SReg_128 register.` Modifying test case to capture both behaviors, this is related to llvm/llvm-project#120063
… dynamic allocas" (#120410) This reapplies commit llvm/llvm-project#120063. A machine-verifier bug was causing a crash in the previous commit. This has been addressed in llvm/llvm-project#120393.
…ocas" (llvm#120369) Reverts llvm#120063 due to build-bot failures
…lvm#120393) Machine-Verifier crashes in kernel functions, but fails gracefully in device functions. This is due to the buffer resource descriptor selected during G-ISEL, before the fallback path. Device functions use `$sgpr0_sgpr1_sgpr2_sgpr3`. while Kernel functions select `$private_rsrc_reg` where machine-verifier complains: `$private_rsrc_reg is not a SReg_128 register.` Modifying test case to capture both behaviors, this is related to llvm#120063
…locas" (llvm#120410) This reapplies commit llvm#120063. A machine-verifier bug was causing a crash in the previous commit. This has been addressed in llvm#120393.
For #119822