|
3 | 3 | ; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOI8MM
|
4 | 4 | ; RUN: llc -mtriple=aarch64 -mattr=+sve,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE
|
5 | 5 | ; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE2
|
6 |
| -; RUN: llc -mtriple=aarch64 -mattr=+sme -force-streaming -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SME |
| 6 | +; RUN: llc -mtriple=aarch64 -mattr=+sve,+sme,+i8mm -force-streaming -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SME |
7 | 7 |
|
8 | 8 | define <vscale x 4 x i32> @udot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
9 | 9 | ; CHECK-LABEL: udot:
|
@@ -299,12 +299,43 @@ define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
|
299 | 299 | ;
|
300 | 300 | ; CHECK-NEWLOWERING-LABEL: usdot_8to64:
|
301 | 301 | ; CHECK-NEWLOWERING: // %bb.0: // %entry
|
302 |
| -; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0 |
303 |
| -; CHECK-NEWLOWERING-NEXT: usdot z4.s, z2.b, z3.b |
304 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s |
305 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s |
306 |
| -; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d |
307 |
| -; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d |
| 302 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z4.h, z2.b |
| 303 | +; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z2.b |
| 304 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z5.h, z3.b |
| 305 | +; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z3.b |
| 306 | +; CHECK-NEWLOWERING-NEXT: ptrue p0.d |
| 307 | +; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h |
| 308 | +; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z2.h |
| 309 | +; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z5.h |
| 310 | +; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h |
| 311 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h |
| 312 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h |
| 313 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h |
| 314 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h |
| 315 | +; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z6.s |
| 316 | +; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z7.s |
| 317 | +; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z24.s |
| 318 | +; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z25.s |
| 319 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z6.d, z6.s |
| 320 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s |
| 321 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z24.d, z24.s |
| 322 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z25.d, z25.s |
| 323 | +; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d |
| 324 | +; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z4.s |
| 325 | +; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z5.s |
| 326 | +; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d |
| 327 | +; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z2.s |
| 328 | +; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z3.s |
| 329 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s |
| 330 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s |
| 331 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s |
| 332 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s |
| 333 | +; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z6.d, z24.d |
| 334 | +; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z7.d, z25.d |
| 335 | +; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d |
| 336 | +; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d |
| 337 | +; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z5.d |
| 338 | +; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z3.d |
308 | 339 | ; CHECK-NEWLOWERING-NEXT: ret
|
309 | 340 | entry:
|
310 | 341 | %a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i64>
|
@@ -369,12 +400,43 @@ define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
|
369 | 400 | ;
|
370 | 401 | ; CHECK-NEWLOWERING-LABEL: sudot_8to64:
|
371 | 402 | ; CHECK-NEWLOWERING: // %bb.0: // %entry
|
372 |
| -; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0 |
373 |
| -; CHECK-NEWLOWERING-NEXT: usdot z4.s, z3.b, z2.b |
374 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s |
375 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s |
376 |
| -; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d |
377 |
| -; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d |
| 403 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z4.h, z2.b |
| 404 | +; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z2.b |
| 405 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z5.h, z3.b |
| 406 | +; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z3.b |
| 407 | +; CHECK-NEWLOWERING-NEXT: ptrue p0.d |
| 408 | +; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h |
| 409 | +; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z2.h |
| 410 | +; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z5.h |
| 411 | +; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h |
| 412 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h |
| 413 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h |
| 414 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h |
| 415 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h |
| 416 | +; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z6.s |
| 417 | +; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z7.s |
| 418 | +; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z24.s |
| 419 | +; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z25.s |
| 420 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z6.d, z6.s |
| 421 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s |
| 422 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z24.d, z24.s |
| 423 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z25.d, z25.s |
| 424 | +; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d |
| 425 | +; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z4.s |
| 426 | +; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z5.s |
| 427 | +; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d |
| 428 | +; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z2.s |
| 429 | +; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z3.s |
| 430 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s |
| 431 | +; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s |
| 432 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s |
| 433 | +; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s |
| 434 | +; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z6.d, z24.d |
| 435 | +; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z7.d, z25.d |
| 436 | +; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d |
| 437 | +; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d |
| 438 | +; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z5.d |
| 439 | +; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z3.d |
378 | 440 | ; CHECK-NEWLOWERING-NEXT: ret
|
379 | 441 | entry:
|
380 | 442 | %a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i64>
|
|
0 commit comments