|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM |
3 |
| -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M |
4 |
| -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM |
5 |
| -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M |
| 2 | +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32NOM |
| 3 | +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32M |
| 4 | +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64NOM |
| 5 | +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64M |
| 6 | + |
| 7 | +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M |
| 8 | +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M |
6 | 9 |
|
7 | 10 | define i8 @extractelt_v16i8(ptr %x) nounwind {
|
8 | 11 | ; CHECK-LABEL: extractelt_v16i8:
|
@@ -66,14 +69,37 @@ define i64 @extractelt_v2i64(ptr %x) nounwind {
|
66 | 69 | ret i64 %b
|
67 | 70 | }
|
68 | 71 |
|
69 |
| -define half @extractelt_v8f16(ptr %x) nounwind { |
70 |
| -; CHECK-LABEL: extractelt_v8f16: |
| 72 | +define bfloat @extractelt_v8bf16(ptr %x) nounwind { |
| 73 | +; CHECK-LABEL: extractelt_v8bf16: |
71 | 74 | ; CHECK: # %bb.0:
|
72 | 75 | ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
|
73 | 76 | ; CHECK-NEXT: vle16.v v8, (a0)
|
74 | 77 | ; CHECK-NEXT: vslidedown.vi v8, v8, 7
|
75 |
| -; CHECK-NEXT: vfmv.f.s fa0, v8 |
| 78 | +; CHECK-NEXT: vmv.x.s a0, v8 |
| 79 | +; CHECK-NEXT: fmv.h.x fa0, a0 |
76 | 80 | ; CHECK-NEXT: ret
|
| 81 | + %a = load <8 x bfloat>, ptr %x |
| 82 | + %b = extractelement <8 x bfloat> %a, i32 7 |
| 83 | + ret bfloat %b |
| 84 | +} |
| 85 | + |
| 86 | +define half @extractelt_v8f16(ptr %x) nounwind { |
| 87 | +; ZVFH-LABEL: extractelt_v8f16: |
| 88 | +; ZVFH: # %bb.0: |
| 89 | +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma |
| 90 | +; ZVFH-NEXT: vle16.v v8, (a0) |
| 91 | +; ZVFH-NEXT: vslidedown.vi v8, v8, 7 |
| 92 | +; ZVFH-NEXT: vfmv.f.s fa0, v8 |
| 93 | +; ZVFH-NEXT: ret |
| 94 | +; |
| 95 | +; ZVFHMIN-LABEL: extractelt_v8f16: |
| 96 | +; ZVFHMIN: # %bb.0: |
| 97 | +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma |
| 98 | +; ZVFHMIN-NEXT: vle16.v v8, (a0) |
| 99 | +; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 |
| 100 | +; ZVFHMIN-NEXT: vmv.x.s a0, v8 |
| 101 | +; ZVFHMIN-NEXT: fmv.h.x fa0, a0 |
| 102 | +; ZVFHMIN-NEXT: ret |
77 | 103 | %a = load <8 x half>, ptr %x
|
78 | 104 | %b = extractelement <8 x half> %a, i32 7
|
79 | 105 | ret half %b
|
@@ -171,15 +197,40 @@ define i64 @extractelt_v4i64(ptr %x) nounwind {
|
171 | 197 | ret i64 %b
|
172 | 198 | }
|
173 | 199 |
|
174 |
| -define half @extractelt_v16f16(ptr %x) nounwind { |
175 |
| -; CHECK-LABEL: extractelt_v16f16: |
| 200 | +define bfloat @extractelt_v16bf16(ptr %x) nounwind { |
| 201 | +; CHECK-LABEL: extractelt_v16bf16: |
176 | 202 | ; CHECK: # %bb.0:
|
177 | 203 | ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
|
178 | 204 | ; CHECK-NEXT: vle16.v v8, (a0)
|
179 | 205 | ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
180 | 206 | ; CHECK-NEXT: vslidedown.vi v8, v8, 7
|
181 |
| -; CHECK-NEXT: vfmv.f.s fa0, v8 |
| 207 | +; CHECK-NEXT: vmv.x.s a0, v8 |
| 208 | +; CHECK-NEXT: fmv.h.x fa0, a0 |
182 | 209 | ; CHECK-NEXT: ret
|
| 210 | + %a = load <16 x bfloat>, ptr %x |
| 211 | + %b = extractelement <16 x bfloat> %a, i32 7 |
| 212 | + ret bfloat %b |
| 213 | +} |
| 214 | + |
| 215 | +define half @extractelt_v16f16(ptr %x) nounwind { |
| 216 | +; ZVFH-LABEL: extractelt_v16f16: |
| 217 | +; ZVFH: # %bb.0: |
| 218 | +; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma |
| 219 | +; ZVFH-NEXT: vle16.v v8, (a0) |
| 220 | +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma |
| 221 | +; ZVFH-NEXT: vslidedown.vi v8, v8, 7 |
| 222 | +; ZVFH-NEXT: vfmv.f.s fa0, v8 |
| 223 | +; ZVFH-NEXT: ret |
| 224 | +; |
| 225 | +; ZVFHMIN-LABEL: extractelt_v16f16: |
| 226 | +; ZVFHMIN: # %bb.0: |
| 227 | +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma |
| 228 | +; ZVFHMIN-NEXT: vle16.v v8, (a0) |
| 229 | +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma |
| 230 | +; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 |
| 231 | +; ZVFHMIN-NEXT: vmv.x.s a0, v8 |
| 232 | +; ZVFHMIN-NEXT: fmv.h.x fa0, a0 |
| 233 | +; ZVFHMIN-NEXT: ret |
183 | 234 | %a = load <16 x half>, ptr %x
|
184 | 235 | %b = extractelement <16 x half> %a, i32 7
|
185 | 236 | ret half %b
|
@@ -398,15 +449,49 @@ define i64 @extractelt_v2i64_idx(ptr %x, i32 zeroext %idx) nounwind {
|
398 | 449 | ret i64 %c
|
399 | 450 | }
|
400 | 451 |
|
401 |
| -define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind { |
402 |
| -; CHECK-LABEL: extractelt_v8f16_idx: |
| 452 | +define bfloat @extractelt_v8bf16_idx(ptr %x, i32 zeroext %idx) nounwind { |
| 453 | +; CHECK-LABEL: extractelt_v8bf16_idx: |
403 | 454 | ; CHECK: # %bb.0:
|
404 | 455 | ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
|
405 | 456 | ; CHECK-NEXT: vle16.v v8, (a0)
|
406 |
| -; CHECK-NEXT: vfadd.vv v8, v8, v8 |
407 |
| -; CHECK-NEXT: vslidedown.vx v8, v8, a1 |
408 |
| -; CHECK-NEXT: vfmv.f.s fa0, v8 |
| 457 | +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 |
| 458 | +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| 459 | +; CHECK-NEXT: vfadd.vv v8, v10, v10 |
| 460 | +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| 461 | +; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 |
| 462 | +; CHECK-NEXT: vslidedown.vx v8, v10, a1 |
| 463 | +; CHECK-NEXT: vmv.x.s a0, v8 |
| 464 | +; CHECK-NEXT: fmv.h.x fa0, a0 |
409 | 465 | ; CHECK-NEXT: ret
|
| 466 | + %a = load <8 x bfloat>, ptr %x |
| 467 | + %b = fadd <8 x bfloat> %a, %a |
| 468 | + %c = extractelement <8 x bfloat> %b, i32 %idx |
| 469 | + ret bfloat %c |
| 470 | +} |
| 471 | + |
| 472 | +define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind { |
| 473 | +; ZVFH-LABEL: extractelt_v8f16_idx: |
| 474 | +; ZVFH: # %bb.0: |
| 475 | +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma |
| 476 | +; ZVFH-NEXT: vle16.v v8, (a0) |
| 477 | +; ZVFH-NEXT: vfadd.vv v8, v8, v8 |
| 478 | +; ZVFH-NEXT: vslidedown.vx v8, v8, a1 |
| 479 | +; ZVFH-NEXT: vfmv.f.s fa0, v8 |
| 480 | +; ZVFH-NEXT: ret |
| 481 | +; |
| 482 | +; ZVFHMIN-LABEL: extractelt_v8f16_idx: |
| 483 | +; ZVFHMIN: # %bb.0: |
| 484 | +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma |
| 485 | +; ZVFHMIN-NEXT: vle16.v v8, (a0) |
| 486 | +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 |
| 487 | +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| 488 | +; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 |
| 489 | +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| 490 | +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 |
| 491 | +; ZVFHMIN-NEXT: vslidedown.vx v8, v10, a1 |
| 492 | +; ZVFHMIN-NEXT: vmv.x.s a0, v8 |
| 493 | +; ZVFHMIN-NEXT: fmv.h.x fa0, a0 |
| 494 | +; ZVFHMIN-NEXT: ret |
410 | 495 | %a = load <8 x half>, ptr %x
|
411 | 496 | %b = fadd <8 x half> %a, %a
|
412 | 497 | %c = extractelement <8 x half> %b, i32 %idx
|
@@ -513,15 +598,49 @@ define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
|
513 | 598 | ret i64 %c
|
514 | 599 | }
|
515 | 600 |
|
516 |
| -define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind { |
517 |
| -; CHECK-LABEL: extractelt_v16f16_idx: |
| 601 | +define bfloat @extractelt_v16bf16_idx(ptr %x, i32 zeroext %idx) nounwind { |
| 602 | +; CHECK-LABEL: extractelt_v16bf16_idx: |
518 | 603 | ; CHECK: # %bb.0:
|
519 | 604 | ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
|
520 | 605 | ; CHECK-NEXT: vle16.v v8, (a0)
|
521 |
| -; CHECK-NEXT: vfadd.vv v8, v8, v8 |
522 |
| -; CHECK-NEXT: vslidedown.vx v8, v8, a1 |
523 |
| -; CHECK-NEXT: vfmv.f.s fa0, v8 |
| 606 | +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 |
| 607 | +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| 608 | +; CHECK-NEXT: vfadd.vv v8, v12, v12 |
| 609 | +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma |
| 610 | +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 |
| 611 | +; CHECK-NEXT: vslidedown.vx v8, v12, a1 |
| 612 | +; CHECK-NEXT: vmv.x.s a0, v8 |
| 613 | +; CHECK-NEXT: fmv.h.x fa0, a0 |
524 | 614 | ; CHECK-NEXT: ret
|
| 615 | + %a = load <16 x bfloat>, ptr %x |
| 616 | + %b = fadd <16 x bfloat> %a, %a |
| 617 | + %c = extractelement <16 x bfloat> %b, i32 %idx |
| 618 | + ret bfloat %c |
| 619 | +} |
| 620 | + |
| 621 | +define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind { |
| 622 | +; ZVFH-LABEL: extractelt_v16f16_idx: |
| 623 | +; ZVFH: # %bb.0: |
| 624 | +; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma |
| 625 | +; ZVFH-NEXT: vle16.v v8, (a0) |
| 626 | +; ZVFH-NEXT: vfadd.vv v8, v8, v8 |
| 627 | +; ZVFH-NEXT: vslidedown.vx v8, v8, a1 |
| 628 | +; ZVFH-NEXT: vfmv.f.s fa0, v8 |
| 629 | +; ZVFH-NEXT: ret |
| 630 | +; |
| 631 | +; ZVFHMIN-LABEL: extractelt_v16f16_idx: |
| 632 | +; ZVFHMIN: # %bb.0: |
| 633 | +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma |
| 634 | +; ZVFHMIN-NEXT: vle16.v v8, (a0) |
| 635 | +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 |
| 636 | +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| 637 | +; ZVFHMIN-NEXT: vfadd.vv v8, v12, v12 |
| 638 | +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma |
| 639 | +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8 |
| 640 | +; ZVFHMIN-NEXT: vslidedown.vx v8, v12, a1 |
| 641 | +; ZVFHMIN-NEXT: vmv.x.s a0, v8 |
| 642 | +; ZVFHMIN-NEXT: fmv.h.x fa0, a0 |
| 643 | +; ZVFHMIN-NEXT: ret |
525 | 644 | %a = load <16 x half>, ptr %x
|
526 | 645 | %b = fadd <16 x half> %a, %a
|
527 | 646 | %c = extractelement <16 x half> %b, i32 %idx
|
@@ -939,8 +1058,8 @@ define i32 @extractelt_mul_v4i32(<4 x i32> %x) {
|
939 | 1058 | define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
|
940 | 1059 | ; RV32NOM-LABEL: extractelt_sdiv_v4i32:
|
941 | 1060 | ; RV32NOM: # %bb.0:
|
942 |
| -; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0) |
943 |
| -; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0) |
| 1061 | +; RV32NOM-NEXT: lui a0, %hi(.LCPI46_0) |
| 1062 | +; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI46_0) |
944 | 1063 | ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
945 | 1064 | ; RV32NOM-NEXT: vle32.v v9, (a0)
|
946 | 1065 | ; RV32NOM-NEXT: vmulh.vv v9, v8, v9
|
@@ -975,8 +1094,8 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
|
975 | 1094 | ;
|
976 | 1095 | ; RV64NOM-LABEL: extractelt_sdiv_v4i32:
|
977 | 1096 | ; RV64NOM: # %bb.0:
|
978 |
| -; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0) |
979 |
| -; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0) |
| 1097 | +; RV64NOM-NEXT: lui a0, %hi(.LCPI46_0) |
| 1098 | +; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI46_0) |
980 | 1099 | ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
981 | 1100 | ; RV64NOM-NEXT: vle32.v v9, (a0)
|
982 | 1101 | ; RV64NOM-NEXT: vmulh.vv v9, v8, v9
|
|
0 commit comments