8
8
define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer (ptr addrspace (1 ) %out , i32 %a , i32 %b ) #0 {
9
9
; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
10
10
; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
11
- ; CHECK-NEXT: [[TMP3 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
12
- ; CHECK-NEXT: [[TMP4 :%.*]] = load i32, ptr addrspace(4) [[TMP3 ]], align 4, !invariant.load !0
13
- ; CHECK-NEXT: [[TMP5 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2
14
- ; CHECK-NEXT: [[TMP6 :%.*]] = load i32, ptr addrspace(4) [[TMP5 ]], align 4, !range [[RNG1:![0-9]+]], !invariant.load !0
15
- ; CHECK-NEXT: [[TMP7 :%.*]] = lshr i32 [[TMP4 ]], 16
16
- ; CHECK-NEXT: [[TMP8 :%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2:![0-9]+]]
17
- ; CHECK-NEXT: [[TMP9 :%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]]
18
- ; CHECK-NEXT: [[TMP10 :%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]]
19
- ; CHECK-NEXT: [[TMP11 :%.*]] = mul nuw nsw i32 [[TMP7 ]], [[TMP6 ]]
20
- ; CHECK-NEXT: [[TMP12 :%.*]] = mul i32 [[TMP11 ]], [[TMP8 ]]
21
- ; CHECK-NEXT: [[TMP13 :%.*]] = mul nuw nsw i32 [[TMP9 ]], [[TMP6 ]]
22
- ; CHECK-NEXT: [[TMP14 :%.*]] = add i32 [[TMP12 ]], [[TMP13 ]]
23
- ; CHECK-NEXT: [[TMP15 :%.*]] = add i32 [[TMP14 ]], [[TMP10 ]]
24
- ; CHECK-NEXT: [[TMP16 :%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 [[TMP15 ]]
25
- ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16 ]], i32 0, i32 [[A:%.*]]
26
- ; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16 ]], i32 0, i32 [[B:%.*]]
11
+ ; CHECK-NEXT: [[TMP2 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
12
+ ; CHECK-NEXT: [[TMP3 :%.*]] = load i32, ptr addrspace(4) [[TMP2 ]], align 4, !invariant.load [[META0:![0-9]+]]
13
+ ; CHECK-NEXT: [[TMP4 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2
14
+ ; CHECK-NEXT: [[TMP5 :%.*]] = load i32, ptr addrspace(4) [[TMP4 ]], align 4, !range [[RNG1:![0-9]+]], !invariant.load [[META0]]
15
+ ; CHECK-NEXT: [[TMP6 :%.*]] = lshr i32 [[TMP3 ]], 16
16
+ ; CHECK-NEXT: [[TMP7 :%.*]] = call range( i32 0, 256) i32 @llvm.amdgcn.workitem.id.x()
17
+ ; CHECK-NEXT: [[TMP8 :%.*]] = call range( i32 0, 256) i32 @llvm.amdgcn.workitem.id.y()
18
+ ; CHECK-NEXT: [[TMP9 :%.*]] = call range( i32 0, 256) i32 @llvm.amdgcn.workitem.id.z()
19
+ ; CHECK-NEXT: [[TMP10 :%.*]] = mul nuw nsw i32 [[TMP6 ]], [[TMP5 ]]
20
+ ; CHECK-NEXT: [[TMP11 :%.*]] = mul i32 [[TMP10 ]], [[TMP7 ]]
21
+ ; CHECK-NEXT: [[TMP12 :%.*]] = mul nuw nsw i32 [[TMP8 ]], [[TMP5 ]]
22
+ ; CHECK-NEXT: [[TMP13 :%.*]] = add i32 [[TMP11 ]], [[TMP12 ]]
23
+ ; CHECK-NEXT: [[TMP14 :%.*]] = add i32 [[TMP13 ]], [[TMP9 ]]
24
+ ; CHECK-NEXT: [[TMP15 :%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 [[TMP14 ]]
25
+ ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15 ]], i32 0, i32 [[A:%.*]]
26
+ ; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15 ]], i32 0, i32 [[B:%.*]]
27
27
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(3) [[PTR0]], [[PTR1]]
28
28
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
29
29
; CHECK-NEXT: store volatile i32 [[ZEXT]], ptr addrspace(1) [[OUT:%.*]], align 4
@@ -50,21 +50,21 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(ptr add
50
50
define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs (ptr addrspace (1 ) %out , i32 %a , i32 %b ) #0 {
51
51
; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs(
52
52
; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
53
- ; CHECK-NEXT: [[TMP3 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
54
- ; CHECK-NEXT: [[TMP4 :%.*]] = load i32, ptr addrspace(4) [[TMP3 ]], align 4, !invariant.load !0
55
- ; CHECK-NEXT: [[TMP5 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2
56
- ; CHECK-NEXT: [[TMP6 :%.*]] = load i32, ptr addrspace(4) [[TMP5 ]], align 4, !range [[RNG1]], !invariant.load !0
57
- ; CHECK-NEXT: [[TMP7 :%.*]] = lshr i32 [[TMP4 ]], 16
58
- ; CHECK-NEXT: [[TMP8 :%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2]]
59
- ; CHECK-NEXT: [[TMP9 :%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]]
60
- ; CHECK-NEXT: [[TMP10 :%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]]
61
- ; CHECK-NEXT: [[TMP11 :%.*]] = mul nuw nsw i32 [[TMP7 ]], [[TMP6 ]]
62
- ; CHECK-NEXT: [[TMP12 :%.*]] = mul i32 [[TMP11 ]], [[TMP8 ]]
63
- ; CHECK-NEXT: [[TMP13 :%.*]] = mul nuw nsw i32 [[TMP9 ]], [[TMP6 ]]
64
- ; CHECK-NEXT: [[TMP14 :%.*]] = add i32 [[TMP12 ]], [[TMP13 ]]
65
- ; CHECK-NEXT: [[TMP15 :%.*]] = add i32 [[TMP14 ]], [[TMP10 ]]
66
- ; CHECK-NEXT: [[TMP16 :%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 [[TMP15 ]]
67
- ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16 ]], i32 0, i32 [[A:%.*]]
53
+ ; CHECK-NEXT: [[TMP2 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
54
+ ; CHECK-NEXT: [[TMP3 :%.*]] = load i32, ptr addrspace(4) [[TMP2 ]], align 4, !invariant.load [[META0]]
55
+ ; CHECK-NEXT: [[TMP4 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2
56
+ ; CHECK-NEXT: [[TMP5 :%.*]] = load i32, ptr addrspace(4) [[TMP4 ]], align 4, !range [[RNG1]], !invariant.load [[META0]]
57
+ ; CHECK-NEXT: [[TMP6 :%.*]] = lshr i32 [[TMP3 ]], 16
58
+ ; CHECK-NEXT: [[TMP7 :%.*]] = call range( i32 0, 256) i32 @llvm.amdgcn.workitem.id.x()
59
+ ; CHECK-NEXT: [[TMP8 :%.*]] = call range( i32 0, 256) i32 @llvm.amdgcn.workitem.id.y()
60
+ ; CHECK-NEXT: [[TMP9 :%.*]] = call range( i32 0, 256) i32 @llvm.amdgcn.workitem.id.z()
61
+ ; CHECK-NEXT: [[TMP10 :%.*]] = mul nuw nsw i32 [[TMP6 ]], [[TMP5 ]]
62
+ ; CHECK-NEXT: [[TMP11 :%.*]] = mul i32 [[TMP10 ]], [[TMP7 ]]
63
+ ; CHECK-NEXT: [[TMP12 :%.*]] = mul nuw nsw i32 [[TMP8 ]], [[TMP5 ]]
64
+ ; CHECK-NEXT: [[TMP13 :%.*]] = add i32 [[TMP11 ]], [[TMP12 ]]
65
+ ; CHECK-NEXT: [[TMP14 :%.*]] = add i32 [[TMP13 ]], [[TMP9 ]]
66
+ ; CHECK-NEXT: [[TMP15 :%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 [[TMP14 ]]
67
+ ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15 ]], i32 0, i32 [[A:%.*]]
68
68
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(3) [[PTR0]], null
69
69
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
70
70
; CHECK-NEXT: store volatile i32 [[ZEXT]], ptr addrspace(1) [[OUT:%.*]], align 4
@@ -89,21 +89,21 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(ptr addrspace(1) %o
89
89
define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs (ptr addrspace (1 ) %out , i32 %a , i32 %b ) #0 {
90
90
; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs(
91
91
; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
92
- ; CHECK-NEXT: [[TMP3 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
93
- ; CHECK-NEXT: [[TMP4 :%.*]] = load i32, ptr addrspace(4) [[TMP3 ]], align 4, !invariant.load !0
94
- ; CHECK-NEXT: [[TMP5 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2
95
- ; CHECK-NEXT: [[TMP6 :%.*]] = load i32, ptr addrspace(4) [[TMP5 ]], align 4, !range [[RNG1]], !invariant.load !0
96
- ; CHECK-NEXT: [[TMP7 :%.*]] = lshr i32 [[TMP4 ]], 16
97
- ; CHECK-NEXT: [[TMP8 :%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2]]
98
- ; CHECK-NEXT: [[TMP9 :%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]]
99
- ; CHECK-NEXT: [[TMP10 :%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]]
100
- ; CHECK-NEXT: [[TMP11 :%.*]] = mul nuw nsw i32 [[TMP7 ]], [[TMP6 ]]
101
- ; CHECK-NEXT: [[TMP12 :%.*]] = mul i32 [[TMP11 ]], [[TMP8 ]]
102
- ; CHECK-NEXT: [[TMP13 :%.*]] = mul nuw nsw i32 [[TMP9 ]], [[TMP6 ]]
103
- ; CHECK-NEXT: [[TMP14 :%.*]] = add i32 [[TMP12 ]], [[TMP13 ]]
104
- ; CHECK-NEXT: [[TMP15 :%.*]] = add i32 [[TMP14 ]], [[TMP10 ]]
105
- ; CHECK-NEXT: [[TMP16 :%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 [[TMP15 ]]
106
- ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16 ]], i32 0, i32 [[A:%.*]]
92
+ ; CHECK-NEXT: [[TMP2 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
93
+ ; CHECK-NEXT: [[TMP3 :%.*]] = load i32, ptr addrspace(4) [[TMP2 ]], align 4, !invariant.load [[META0]]
94
+ ; CHECK-NEXT: [[TMP4 :%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2
95
+ ; CHECK-NEXT: [[TMP5 :%.*]] = load i32, ptr addrspace(4) [[TMP4 ]], align 4, !range [[RNG1]], !invariant.load [[META0]]
96
+ ; CHECK-NEXT: [[TMP6 :%.*]] = lshr i32 [[TMP3 ]], 16
97
+ ; CHECK-NEXT: [[TMP7 :%.*]] = call range( i32 0, 256) i32 @llvm.amdgcn.workitem.id.x()
98
+ ; CHECK-NEXT: [[TMP8 :%.*]] = call range( i32 0, 256) i32 @llvm.amdgcn.workitem.id.y()
99
+ ; CHECK-NEXT: [[TMP9 :%.*]] = call range( i32 0, 256) i32 @llvm.amdgcn.workitem.id.z()
100
+ ; CHECK-NEXT: [[TMP10 :%.*]] = mul nuw nsw i32 [[TMP6 ]], [[TMP5 ]]
101
+ ; CHECK-NEXT: [[TMP11 :%.*]] = mul i32 [[TMP10 ]], [[TMP7 ]]
102
+ ; CHECK-NEXT: [[TMP12 :%.*]] = mul nuw nsw i32 [[TMP8 ]], [[TMP5 ]]
103
+ ; CHECK-NEXT: [[TMP13 :%.*]] = add i32 [[TMP11 ]], [[TMP12 ]]
104
+ ; CHECK-NEXT: [[TMP14 :%.*]] = add i32 [[TMP13 ]], [[TMP9 ]]
105
+ ; CHECK-NEXT: [[TMP15 :%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 [[TMP14 ]]
106
+ ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15 ]], i32 0, i32 [[A:%.*]]
107
107
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(3) null, [[PTR0]]
108
108
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
109
109
; CHECK-NEXT: store volatile i32 [[ZEXT]], ptr addrspace(1) [[OUT:%.*]], align 4
0 commit comments