@@ -28,13 +28,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
28
28
; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
29
29
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4
30
30
; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4
31
- ; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 1
32
31
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
33
- ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1
34
- ; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
35
32
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
36
33
; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32>
37
- ; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP6]] to i32
38
34
; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
39
35
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
40
36
; CHECK-NEXT: [[TMP31:%.*]] = zext <2 x i8> [[TMP22]] to <2 x i32>
@@ -50,7 +46,6 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
50
46
; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]]
51
47
; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
52
48
; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32>
53
- ; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP7]] to i32
54
49
; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
55
50
; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32>
56
51
; CHECK-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP51]], [[TMP57]]
@@ -64,8 +59,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
64
59
; CHECK-NEXT: [[TMP34:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]]
65
60
; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]]
66
61
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP34]], i32 0
67
- ; CHECK-NEXT: [[TMP45 :%.*]] = extractelement <2 x i32> [[TMP34]], i32 1
68
- ; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[TMP45 ]], [[TMP43]]
62
+ ; CHECK-NEXT: [[CONV_2 :%.*]] = extractelement <2 x i32> [[TMP34]], i32 1
63
+ ; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[CONV_2 ]], [[TMP43]]
69
64
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP44]], i32 0
70
65
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP44]], i32 1
71
66
; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP47]], [[TMP46]]
@@ -120,15 +115,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
120
115
; CHECK-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP78]], [[TMP80]]
121
116
; CHECK-NEXT: [[ADD95:%.*]] = add i32 [[ADD94]], [[ADD48_2]]
122
117
; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[ADD48_2]], [[ADD94]]
123
- ; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[TMP77]], 15
124
- ; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537
125
- ; CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535
126
- ; CHECK-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP45]], 15
127
- ; CHECK-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537
128
- ; CHECK-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535
129
- ; CHECK-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_3]], [[ADD55_2]]
130
- ; CHECK-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD55_2]], [[ADD55_3]]
131
- ; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[CONV9_2]], 15
118
+ ; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP77]], 15
132
119
; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537
133
120
; CHECK-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535
134
121
; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[CONV_2]], 15
@@ -244,32 +231,53 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
244
231
; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD95]]
245
232
; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB86_3]], [[SUB86]]
246
233
; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB86_3]]
247
- ; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I ]], [[ADD103]]
234
+ ; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I_1 ]], [[ADD103]]
248
235
; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP77]]
249
- ; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51 ]], [[ADD105]]
250
- ; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP45 ]]
236
+ ; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51_1 ]], [[ADD105]]
237
+ ; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[CONV_2 ]]
251
238
; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]]
252
239
; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP160]]
253
240
; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]]
254
241
; CHECK-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP127]]
255
242
; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]]
256
243
; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]]
257
244
; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[ADD112]], [[XOR_I63]]
258
- ; CHECK-NEXT: [[ADD78_1:%.*]] = add i32 [[ADD55_1]], [[ADD55]]
259
- ; CHECK-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD55_1]]
260
- ; CHECK-NEXT: [[ADD103_1:%.*]] = add i32 [[ADD94_1]], [[ADD78_1]]
245
+ ; CHECK-NEXT: [[TMP169:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1
246
+ ; CHECK-NEXT: [[TMP181:%.*]] = zext <2 x i8> [[TMP169]] to <2 x i32>
247
+ ; CHECK-NEXT: [[TMP152:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_2]], i32 0
248
+ ; CHECK-NEXT: [[TMP182:%.*]] = shufflevector <2 x i32> [[TMP152]], <2 x i32> poison, <2 x i32> zeroinitializer
249
+ ; CHECK-NEXT: [[TMP183:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_3]], i32 0
250
+ ; CHECK-NEXT: [[TMP184:%.*]] = shufflevector <2 x i32> [[TMP183]], <2 x i32> poison, <2 x i32> zeroinitializer
251
+ ; CHECK-NEXT: [[TMP191:%.*]] = sub <2 x i32> [[TMP182]], [[TMP184]]
252
+ ; CHECK-NEXT: [[TMP192:%.*]] = add <2 x i32> [[TMP182]], [[TMP184]]
253
+ ; CHECK-NEXT: [[TMP194:%.*]] = shufflevector <2 x i32> [[TMP191]], <2 x i32> [[TMP192]], <2 x i32> <i32 0, i32 3>
254
+ ; CHECK-NEXT: [[TMP195:%.*]] = lshr <2 x i32> [[TMP181]], splat (i32 15)
255
+ ; CHECK-NEXT: [[TMP196:%.*]] = and <2 x i32> [[TMP195]], splat (i32 65537)
256
+ ; CHECK-NEXT: [[TMP198:%.*]] = mul <2 x i32> [[TMP196]], splat (i32 65535)
257
+ ; CHECK-NEXT: [[TMP202:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55]], i32 0
258
+ ; CHECK-NEXT: [[TMP203:%.*]] = shufflevector <2 x i32> [[TMP202]], <2 x i32> poison, <2 x i32> zeroinitializer
259
+ ; CHECK-NEXT: [[TMP205:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_1]], i32 0
260
+ ; CHECK-NEXT: [[TMP206:%.*]] = shufflevector <2 x i32> [[TMP205]], <2 x i32> poison, <2 x i32> zeroinitializer
261
+ ; CHECK-NEXT: [[TMP207:%.*]] = sub <2 x i32> [[TMP203]], [[TMP206]]
262
+ ; CHECK-NEXT: [[TMP210:%.*]] = add <2 x i32> [[TMP203]], [[TMP206]]
263
+ ; CHECK-NEXT: [[TMP168:%.*]] = shufflevector <2 x i32> [[TMP207]], <2 x i32> [[TMP210]], <2 x i32> <i32 0, i32 3>
264
+ ; CHECK-NEXT: [[ADD94_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 1
265
+ ; CHECK-NEXT: [[ADD78_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 1
261
266
; CHECK-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]]
262
- ; CHECK-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]]
267
+ ; CHECK-NEXT: [[TMP220:%.*]] = add <2 x i32> [[TMP194]], [[TMP168]]
268
+ ; CHECK-NEXT: [[SUB102_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 0
269
+ ; CHECK-NEXT: [[SUB86_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 0
270
+ ; CHECK-NEXT: [[TMP174:%.*]] = shufflevector <2 x i32> [[TMP168]], <2 x i32> [[TMP194]], <2 x i32> <i32 0, i32 2>
263
271
; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]]
264
- ; CHECK-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]]
265
- ; CHECK-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[CONV9_2]]
266
- ; CHECK-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]]
267
- ; CHECK-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[CONV_2]]
272
+ ; CHECK-NEXT: [[TMP175:%.*]] = add <2 x i32> [[TMP198]], [[TMP220]]
273
+ ; CHECK-NEXT: [[TMP221:%.*]] = xor <2 x i32> [[TMP175]], [[TMP181]]
268
274
; CHECK-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]]
269
275
; CHECK-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP162]]
270
276
; CHECK-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]]
271
277
; CHECK-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP129]]
278
+ ; CHECK-NEXT: [[XOR_I53_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 0
272
279
; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD105_3]]
280
+ ; CHECK-NEXT: [[XOR_I_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 1
273
281
; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]]
274
282
; CHECK-NEXT: [[ADD112_5:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]]
275
283
; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_5]], [[XOR_I63_1]]
0 commit comments