|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
1 | 2 | ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -passes=early-cse -earlycse-debug-hash | FileCheck %s
|
2 | 3 | ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes='early-cse<memssa>' | FileCheck %s
|
3 | 4 |
|
4 | 5 | define <4 x i32> @test_cse(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
|
| 6 | +; CHECK-LABEL: define <4 x i32> @test_cse( |
| 7 | +; CHECK-SAME: ptr [[A:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { |
| 8 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 9 | +; CHECK-NEXT: [[S_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 0 |
| 10 | +; CHECK-NEXT: [[S_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 1 |
| 11 | +; CHECK-NEXT: br label %[[FOR_COND:.*]] |
| 12 | +; CHECK: [[FOR_COND]]: |
| 13 | +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] |
| 14 | +; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ] |
| 15 | +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]] |
| 16 | +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]] |
| 17 | +; CHECK: [[FOR_BODY]]: |
| 18 | +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8> |
| 19 | +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8> |
| 20 | +; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], 0 |
| 21 | +; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP2]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], 1 |
| 22 | +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]]) |
| 23 | +; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]]) |
| 24 | +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 |
| 25 | +; CHECK-NEXT: br label %[[FOR_COND]] |
| 26 | +; CHECK: [[FOR_END]]: |
| 27 | +; CHECK-NEXT: ret <4 x i32> [[RES_0]] |
| 28 | +; |
5 | 29 | entry:
|
6 | 30 | ; Check that @llvm.aarch64.neon.ld2 is optimized away by Early CSE.
|
7 |
| -; CHECK-LABEL: @test_cse |
8 |
| -; CHECK-NOT: call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0 |
9 | 31 | %s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
|
10 | 32 | %s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
|
11 | 33 | br label %for.cond
|
@@ -34,11 +56,32 @@ for.end: ; preds = %for.cond
|
34 | 56 | }
|
35 | 57 |
|
36 | 58 | define <4 x i32> @test_cse2(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
|
| 59 | +; CHECK-LABEL: define <4 x i32> @test_cse2( |
| 60 | +; CHECK-SAME: ptr [[A:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { |
| 61 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 62 | +; CHECK-NEXT: [[S_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 0 |
| 63 | +; CHECK-NEXT: [[S_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 1 |
| 64 | +; CHECK-NEXT: br label %[[FOR_COND:.*]] |
| 65 | +; CHECK: [[FOR_COND]]: |
| 66 | +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] |
| 67 | +; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ] |
| 68 | +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]] |
| 69 | +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]] |
| 70 | +; CHECK: [[FOR_BODY]]: |
| 71 | +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8> |
| 72 | +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8> |
| 73 | +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], ptr [[A]]) |
| 74 | +; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], 0 |
| 75 | +; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP2]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], 1 |
| 76 | +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]]) |
| 77 | +; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]]) |
| 78 | +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 |
| 79 | +; CHECK-NEXT: br label %[[FOR_COND]] |
| 80 | +; CHECK: [[FOR_END]]: |
| 81 | +; CHECK-NEXT: ret <4 x i32> [[RES_0]] |
| 82 | +; |
37 | 83 | entry:
|
38 | 84 | ; Check that the first @llvm.aarch64.neon.st2 is optimized away by Early CSE.
|
39 |
| -; CHECK-LABEL: @test_cse2 |
40 |
| -; CHECK-NOT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %3, <4 x i32> %3, ptr %0) |
41 |
| -; CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %s.coerce.fca.0.extract, <4 x i32> %s.coerce.fca.1.extract, ptr %a) |
42 | 85 | %s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
|
43 | 86 | %s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
|
44 | 87 | br label %for.cond
|
@@ -68,11 +111,26 @@ for.end: ; preds = %for.cond
|
68 | 111 | }
|
69 | 112 |
|
70 | 113 | define <4 x i32> @test_cse3(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) #0 {
|
| 114 | +; CHECK-LABEL: define <4 x i32> @test_cse3( |
| 115 | +; CHECK-SAME: ptr [[A:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { |
| 116 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 117 | +; CHECK-NEXT: br label %[[FOR_COND:.*]] |
| 118 | +; CHECK: [[FOR_COND]]: |
| 119 | +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] |
| 120 | +; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ] |
| 121 | +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]] |
| 122 | +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]] |
| 123 | +; CHECK: [[FOR_BODY]]: |
| 124 | +; CHECK-NEXT: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[A]]) |
| 125 | +; CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2]], 0 |
| 126 | +; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[VLD2_FCA_0_EXTRACT]], <4 x i32> [[VLD2_FCA_0_EXTRACT]]) |
| 127 | +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 |
| 128 | +; CHECK-NEXT: br label %[[FOR_COND]] |
| 129 | +; CHECK: [[FOR_END]]: |
| 130 | +; CHECK-NEXT: ret <4 x i32> [[RES_0]] |
| 131 | +; |
71 | 132 | entry:
|
72 | 133 | ; Check that the first @llvm.aarch64.neon.ld2 is optimized away by Early CSE.
|
73 |
| -; CHECK-LABEL: @test_cse3 |
74 |
| -; CHECK: call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0 |
75 |
| -; CHECK-NOT: call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0 |
76 | 134 | %s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
|
77 | 135 | %s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
|
78 | 136 | br label %for.cond
|
@@ -100,11 +158,33 @@ for.end: ; preds = %for.cond
|
100 | 158 |
|
101 | 159 |
|
102 | 160 | define <4 x i32> @test_nocse(ptr %a, ptr %b, [2 x <4 x i32>] %s.coerce, i32 %n) {
|
| 161 | +; CHECK-LABEL: define <4 x i32> @test_nocse( |
| 162 | +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { |
| 163 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 164 | +; CHECK-NEXT: [[S_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 0 |
| 165 | +; CHECK-NEXT: [[S_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 1 |
| 166 | +; CHECK-NEXT: br label %[[FOR_COND:.*]] |
| 167 | +; CHECK: [[FOR_COND]]: |
| 168 | +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] |
| 169 | +; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ] |
| 170 | +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]] |
| 171 | +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]] |
| 172 | +; CHECK: [[FOR_BODY]]: |
| 173 | +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8> |
| 174 | +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8> |
| 175 | +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]]) |
| 176 | +; CHECK-NEXT: store i32 0, ptr [[B]], align 4 |
| 177 | +; CHECK-NEXT: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[A]]) |
| 178 | +; CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2]], 0 |
| 179 | +; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[VLD2_FCA_0_EXTRACT]], <4 x i32> [[VLD2_FCA_0_EXTRACT]]) |
| 180 | +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 |
| 181 | +; CHECK-NEXT: br label %[[FOR_COND]] |
| 182 | +; CHECK: [[FOR_END]]: |
| 183 | +; CHECK-NEXT: ret <4 x i32> [[RES_0]] |
| 184 | +; |
103 | 185 | entry:
|
104 | 186 | ; Check that the store prevents @llvm.aarch64.neon.ld2 from being optimized
|
105 | 187 | ; away by Early CSE.
|
106 |
| -; CHECK-LABEL: @test_nocse |
107 |
| -; CHECK: call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0 |
108 | 188 | %s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
|
109 | 189 | %s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
|
110 | 190 | br label %for.cond
|
@@ -134,11 +214,33 @@ for.end: ; preds = %for.cond
|
134 | 214 | }
|
135 | 215 |
|
136 | 216 | define <4 x i32> @test_nocse2(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
|
| 217 | +; CHECK-LABEL: define <4 x i32> @test_nocse2( |
| 218 | +; CHECK-SAME: ptr [[A:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { |
| 219 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 220 | +; CHECK-NEXT: [[S_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 0 |
| 221 | +; CHECK-NEXT: [[S_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 1 |
| 222 | +; CHECK-NEXT: br label %[[FOR_COND:.*]] |
| 223 | +; CHECK: [[FOR_COND]]: |
| 224 | +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] |
| 225 | +; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ] |
| 226 | +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]] |
| 227 | +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]] |
| 228 | +; CHECK: [[FOR_BODY]]: |
| 229 | +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8> |
| 230 | +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8> |
| 231 | +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]]) |
| 232 | +; CHECK-NEXT: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr [[A]]) |
| 233 | +; CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 0 |
| 234 | +; CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 2 |
| 235 | +; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[VLD3_FCA_0_EXTRACT]], <4 x i32> [[VLD3_FCA_2_EXTRACT]]) |
| 236 | +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 |
| 237 | +; CHECK-NEXT: br label %[[FOR_COND]] |
| 238 | +; CHECK: [[FOR_END]]: |
| 239 | +; CHECK-NEXT: ret <4 x i32> [[RES_0]] |
| 240 | +; |
137 | 241 | entry:
|
138 | 242 | ; Check that @llvm.aarch64.neon.ld3 is not optimized away by Early CSE due
|
139 | 243 | ; to mismatch between st2 and ld3.
|
140 |
| -; CHECK-LABEL: @test_nocse2 |
141 |
| -; CHECK: call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0 |
142 | 244 | %s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
|
143 | 245 | %s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
|
144 | 246 | br label %for.cond
|
@@ -167,12 +269,33 @@ for.end: ; preds = %for.cond
|
167 | 269 | }
|
168 | 270 |
|
169 | 271 | define <4 x i32> @test_nocse3(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
|
| 272 | +; CHECK-LABEL: define <4 x i32> @test_nocse3( |
| 273 | +; CHECK-SAME: ptr [[A:%.*]], [2 x <4 x i32>] [[S_COERCE:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { |
| 274 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 275 | +; CHECK-NEXT: [[S_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 0 |
| 276 | +; CHECK-NEXT: [[S_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[S_COERCE]], 1 |
| 277 | +; CHECK-NEXT: br label %[[FOR_COND:.*]] |
| 278 | +; CHECK: [[FOR_COND]]: |
| 279 | +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] |
| 280 | +; CHECK-NEXT: [[RES_0:%.*]] = phi <4 x i32> [ undef, %[[ENTRY]] ], [ [[CALL:%.*]], %[[FOR_BODY]] ] |
| 281 | +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]] |
| 282 | +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END:.*]] |
| 283 | +; CHECK: [[FOR_BODY]]: |
| 284 | +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8> |
| 285 | +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8> |
| 286 | +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[S_COERCE_FCA_1_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], ptr [[A]]) |
| 287 | +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], ptr [[A]]) |
| 288 | +; CHECK-NEXT: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr [[A]]) |
| 289 | +; CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 0 |
| 290 | +; CHECK-NEXT: [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[VLD3_FCA_0_EXTRACT]], <4 x i32> [[VLD3_FCA_0_EXTRACT]]) |
| 291 | +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 |
| 292 | +; CHECK-NEXT: br label %[[FOR_COND]] |
| 293 | +; CHECK: [[FOR_END]]: |
| 294 | +; CHECK-NEXT: ret <4 x i32> [[RES_0]] |
| 295 | +; |
170 | 296 | entry:
|
171 | 297 | ; Check that @llvm.aarch64.neon.st3 is not optimized away by Early CSE due to
|
172 | 298 | ; mismatch between st2 and st3.
|
173 |
| -; CHECK-LABEL: @test_nocse3 |
174 |
| -; CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0 |
175 |
| -; CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0 |
176 | 299 | %s.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %s.coerce, 0
|
177 | 300 | %s.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %s.coerce, 1
|
178 | 301 | br label %for.cond
|
@@ -214,6 +337,12 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr)
|
214 | 337 | declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr)
|
215 | 338 |
|
216 | 339 | define internal fastcc <4 x i32> @vaddq_s32(<4 x i32> %__p0, <4 x i32> %__p1) {
|
| 340 | +; CHECK-LABEL: define internal fastcc <4 x i32> @vaddq_s32( |
| 341 | +; CHECK-SAME: <4 x i32> [[__P0:%.*]], <4 x i32> [[__P1:%.*]]) #[[ATTR0]] { |
| 342 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 343 | +; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[__P0]], [[__P1]] |
| 344 | +; CHECK-NEXT: ret <4 x i32> [[ADD]] |
| 345 | +; |
217 | 346 | entry:
|
218 | 347 | %add = add <4 x i32> %__p0, %__p1
|
219 | 348 | ret <4 x i32> %add
|
|
0 commit comments