@@ -28,9 +28,13 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
28
28
; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
29
29
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4
30
30
; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4
31
+ ; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 1
31
32
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
33
+ ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1
34
+ ; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
32
35
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
33
36
; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32>
37
+ ; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP6]] to i32
34
38
; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
35
39
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
36
40
; CHECK-NEXT: [[TMP31:%.*]] = zext <2 x i8> [[TMP22]] to <2 x i32>
@@ -46,6 +50,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
46
50
; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]]
47
51
; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
48
52
; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32>
53
+ ; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP7]] to i32
49
54
; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
50
55
; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32>
51
56
; CHECK-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP51]], [[TMP57]]
@@ -59,8 +64,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
59
64
; CHECK-NEXT: [[TMP34:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]]
60
65
; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]]
61
66
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP34]], i32 0
62
- ; CHECK-NEXT: [[CONV_2 :%.*]] = extractelement <2 x i32> [[TMP34]], i32 1
63
- ; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[CONV_2 ]], [[TMP43]]
67
+ ; CHECK-NEXT: [[TMP45 :%.*]] = extractelement <2 x i32> [[TMP34]], i32 1
68
+ ; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[TMP45 ]], [[TMP43]]
64
69
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP44]], i32 0
65
70
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP44]], i32 1
66
71
; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP47]], [[TMP46]]
@@ -115,7 +120,15 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
115
120
; CHECK-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP78]], [[TMP80]]
116
121
; CHECK-NEXT: [[ADD95:%.*]] = add i32 [[ADD94]], [[ADD48_2]]
117
122
; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[ADD48_2]], [[ADD94]]
118
- ; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP77]], 15
123
+ ; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[TMP77]], 15
124
+ ; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537
125
+ ; CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535
126
+ ; CHECK-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP45]], 15
127
+ ; CHECK-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537
128
+ ; CHECK-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535
129
+ ; CHECK-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_3]], [[ADD55_2]]
130
+ ; CHECK-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD55_2]], [[ADD55_3]]
131
+ ; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[CONV9_2]], 15
119
132
; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537
120
133
; CHECK-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535
121
134
; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[CONV_2]], 15
@@ -231,53 +244,32 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
231
244
; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD95]]
232
245
; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB86_3]], [[SUB86]]
233
246
; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB86_3]]
234
- ; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I_1 ]], [[ADD103]]
247
+ ; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I ]], [[ADD103]]
235
248
; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP77]]
236
- ; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51_1 ]], [[ADD105]]
237
- ; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[CONV_2 ]]
249
+ ; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51 ]], [[ADD105]]
250
+ ; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP45 ]]
238
251
; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]]
239
252
; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP160]]
240
253
; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]]
241
254
; CHECK-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP127]]
242
255
; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]]
243
256
; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]]
244
257
; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[ADD112]], [[XOR_I63]]
245
- ; CHECK-NEXT: [[TMP169:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1
246
- ; CHECK-NEXT: [[TMP181:%.*]] = zext <2 x i8> [[TMP169]] to <2 x i32>
247
- ; CHECK-NEXT: [[TMP152:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_2]], i32 0
248
- ; CHECK-NEXT: [[TMP182:%.*]] = shufflevector <2 x i32> [[TMP152]], <2 x i32> poison, <2 x i32> zeroinitializer
249
- ; CHECK-NEXT: [[TMP183:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_3]], i32 0
250
- ; CHECK-NEXT: [[TMP184:%.*]] = shufflevector <2 x i32> [[TMP183]], <2 x i32> poison, <2 x i32> zeroinitializer
251
- ; CHECK-NEXT: [[TMP191:%.*]] = sub <2 x i32> [[TMP182]], [[TMP184]]
252
- ; CHECK-NEXT: [[TMP192:%.*]] = add <2 x i32> [[TMP182]], [[TMP184]]
253
- ; CHECK-NEXT: [[TMP194:%.*]] = shufflevector <2 x i32> [[TMP191]], <2 x i32> [[TMP192]], <2 x i32> <i32 0, i32 3>
254
- ; CHECK-NEXT: [[TMP195:%.*]] = lshr <2 x i32> [[TMP181]], splat (i32 15)
255
- ; CHECK-NEXT: [[TMP196:%.*]] = and <2 x i32> [[TMP195]], splat (i32 65537)
256
- ; CHECK-NEXT: [[TMP198:%.*]] = mul <2 x i32> [[TMP196]], splat (i32 65535)
257
- ; CHECK-NEXT: [[TMP202:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55]], i32 0
258
- ; CHECK-NEXT: [[TMP203:%.*]] = shufflevector <2 x i32> [[TMP202]], <2 x i32> poison, <2 x i32> zeroinitializer
259
- ; CHECK-NEXT: [[TMP205:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_1]], i32 0
260
- ; CHECK-NEXT: [[TMP206:%.*]] = shufflevector <2 x i32> [[TMP205]], <2 x i32> poison, <2 x i32> zeroinitializer
261
- ; CHECK-NEXT: [[TMP207:%.*]] = sub <2 x i32> [[TMP203]], [[TMP206]]
262
- ; CHECK-NEXT: [[TMP210:%.*]] = add <2 x i32> [[TMP203]], [[TMP206]]
263
- ; CHECK-NEXT: [[TMP168:%.*]] = shufflevector <2 x i32> [[TMP207]], <2 x i32> [[TMP210]], <2 x i32> <i32 0, i32 3>
264
- ; CHECK-NEXT: [[ADD94_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 1
265
- ; CHECK-NEXT: [[ADD78_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 1
258
+ ; CHECK-NEXT: [[ADD78_1:%.*]] = add i32 [[ADD55_1]], [[ADD55]]
259
+ ; CHECK-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD55_1]]
260
+ ; CHECK-NEXT: [[ADD103_1:%.*]] = add i32 [[ADD94_1]], [[ADD78_1]]
266
261
; CHECK-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]]
267
- ; CHECK-NEXT: [[TMP220:%.*]] = add <2 x i32> [[TMP194]], [[TMP168]]
268
- ; CHECK-NEXT: [[SUB102_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 0
269
- ; CHECK-NEXT: [[SUB86_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 0
270
- ; CHECK-NEXT: [[TMP174:%.*]] = shufflevector <2 x i32> [[TMP168]], <2 x i32> [[TMP194]], <2 x i32> <i32 0, i32 2>
262
+ ; CHECK-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]]
271
263
; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]]
272
- ; CHECK-NEXT: [[TMP175:%.*]] = add <2 x i32> [[TMP198]], [[TMP220]]
273
- ; CHECK-NEXT: [[TMP221:%.*]] = xor <2 x i32> [[TMP175]], [[TMP181]]
264
+ ; CHECK-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]]
265
+ ; CHECK-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[CONV9_2]]
266
+ ; CHECK-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]]
267
+ ; CHECK-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[CONV_2]]
274
268
; CHECK-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]]
275
269
; CHECK-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP162]]
276
270
; CHECK-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]]
277
271
; CHECK-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP129]]
278
- ; CHECK-NEXT: [[XOR_I53_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 0
279
272
; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD105_3]]
280
- ; CHECK-NEXT: [[XOR_I_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 1
281
273
; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]]
282
274
; CHECK-NEXT: [[ADD112_5:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]]
283
275
; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_5]], [[XOR_I63_1]]
0 commit comments