You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
%a.wide = sext <vscale x 16 x i8> %ato <vscale x 16 x i32>
1203
+
%mult = mulnuwnsw <vscale x 16 x i32> %a.wide, splat(i32 -1)
1204
+
%partial.reduce = tailcall <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %mult)
1205
+
ret <vscale x 4 x i32> %partial.reduce
1206
+
}
1207
+
1208
+
define <vscale x 4 x i32> @sdot_imm_does_not_fit(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a) {
1209
+
; CHECK-LABEL: sdot_imm_does_not_fit:
1210
+
; CHECK: // %bb.0: // %entry
1211
+
; CHECK-NEXT: sunpklo z2.h, z1.b
1212
+
; CHECK-NEXT: sunpkhi z1.h, z1.b
1213
+
; CHECK-NEXT: sunpklo z3.s, z2.h
1214
+
; CHECK-NEXT: sunpkhi z2.s, z2.h
1215
+
; CHECK-NEXT: sunpklo z4.s, z1.h
1216
+
; CHECK-NEXT: sunpkhi z1.s, z1.h
1217
+
; CHECK-NEXT: lsl z4.s, z4.s, #8
1218
+
; CHECK-NEXT: lsl z2.s, z2.s, #8
1219
+
; CHECK-NEXT: lsl z3.s, z3.s, #8
1220
+
; CHECK-NEXT: lsl z1.s, z1.s, #8
1221
+
; CHECK-NEXT: add z0.s, z0.s, z3.s
1222
+
; CHECK-NEXT: add z2.s, z2.s, z4.s
1223
+
; CHECK-NEXT: add z0.s, z0.s, z2.s
1224
+
; CHECK-NEXT: add z0.s, z0.s, z1.s
1225
+
; CHECK-NEXT: ret
1226
+
;
1227
+
; CHECK-NEWLOWERING-LABEL: sdot_imm_does_not_fit:
1228
+
; CHECK-NEWLOWERING: // %bb.0: // %entry
1229
+
; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z1.b
1230
+
; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b
1231
+
; CHECK-NEWLOWERING-NEXT: sunpklo z3.s, z2.h
1232
+
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
1233
+
; CHECK-NEWLOWERING-NEXT: sunpklo z4.s, z1.h
1234
+
; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
1235
+
; CHECK-NEWLOWERING-NEXT: lsl z4.s, z4.s, #8
1236
+
; CHECK-NEWLOWERING-NEXT: lsl z2.s, z2.s, #8
1237
+
; CHECK-NEWLOWERING-NEXT: lsl z3.s, z3.s, #8
1238
+
; CHECK-NEWLOWERING-NEXT: lsl z1.s, z1.s, #8
1239
+
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z3.s
1240
+
; CHECK-NEWLOWERING-NEXT: add z2.s, z2.s, z4.s
1241
+
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z2.s
1242
+
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z1.s
1243
+
; CHECK-NEWLOWERING-NEXT: ret
1244
+
entry:
1245
+
%a.wide = sext <vscale x 16 x i8> %ato <vscale x 16 x i32>
1246
+
%mult = mulnuwnsw <vscale x 16 x i32> %a.wide, splat(i32256)
1247
+
%partial.reduce = tailcall <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %mult)
1248
+
ret <vscale x 4 x i32> %partial.reduce
1249
+
}
1250
+
1251
+
define <vscale x 4 x i32> @udot_imm(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a) {
%a.wide = zext <vscale x 16 x i8> %ato <vscale x 16 x i32>
1275
+
%mult = mulnuwnsw <vscale x 16 x i32> %a.wide, splat(i32255)
1276
+
%partial.reduce = tailcall <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %mult)
1277
+
ret <vscale x 4 x i32> %partial.reduce
1278
+
}
1279
+
1280
+
define <vscale x 4 x i32> @udot_imm_does_not_fit(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a) {
1281
+
; CHECK-LABEL: udot_imm_does_not_fit:
1282
+
; CHECK: // %bb.0: // %entry
1283
+
; CHECK-NEXT: uunpklo z2.h, z1.b
1284
+
; CHECK-NEXT: uunpkhi z1.h, z1.b
1285
+
; CHECK-NEXT: uunpklo z3.s, z2.h
1286
+
; CHECK-NEXT: uunpkhi z2.s, z2.h
1287
+
; CHECK-NEXT: uunpklo z4.s, z1.h
1288
+
; CHECK-NEXT: uunpkhi z1.s, z1.h
1289
+
; CHECK-NEXT: lsl z4.s, z4.s, #8
1290
+
; CHECK-NEXT: lsl z2.s, z2.s, #8
1291
+
; CHECK-NEXT: lsl z3.s, z3.s, #8
1292
+
; CHECK-NEXT: lsl z1.s, z1.s, #8
1293
+
; CHECK-NEXT: add z0.s, z0.s, z3.s
1294
+
; CHECK-NEXT: add z2.s, z2.s, z4.s
1295
+
; CHECK-NEXT: add z0.s, z0.s, z2.s
1296
+
; CHECK-NEXT: add z0.s, z0.s, z1.s
1297
+
; CHECK-NEXT: ret
1298
+
;
1299
+
; CHECK-NEWLOWERING-LABEL: udot_imm_does_not_fit:
1300
+
; CHECK-NEWLOWERING: // %bb.0: // %entry
1301
+
; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z1.b
1302
+
; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b
1303
+
; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z2.h
1304
+
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
1305
+
; CHECK-NEWLOWERING-NEXT: uunpklo z4.s, z1.h
1306
+
; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
1307
+
; CHECK-NEWLOWERING-NEXT: lsl z4.s, z4.s, #8
1308
+
; CHECK-NEWLOWERING-NEXT: lsl z2.s, z2.s, #8
1309
+
; CHECK-NEWLOWERING-NEXT: lsl z3.s, z3.s, #8
1310
+
; CHECK-NEWLOWERING-NEXT: lsl z1.s, z1.s, #8
1311
+
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z3.s
1312
+
; CHECK-NEWLOWERING-NEXT: add z2.s, z2.s, z4.s
1313
+
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z2.s
1314
+
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z1.s
1315
+
; CHECK-NEWLOWERING-NEXT: ret
1316
+
entry:
1317
+
%a.wide = zext <vscale x 16 x i8> %ato <vscale x 16 x i32>
1318
+
%mult = mulnuwnsw <vscale x 16 x i32> %a.wide, splat(i32256)
1319
+
%partial.reduce = tailcall <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %mult)
0 commit comments