Skip to content

Commit c28b1a1

Browse files
authored
[LegalizeTypes][RISCV] Use SPLAT_VECTOR_PARTS to legalize splat BUILD_VECTOR (#107290)
If the element type needs to be expanded, we can use SPLAT_VECTOR_PARTS if the target supports it. There's already a DAGCombine to turn BUILD_VECTOR into SPLAT_VECTOR if the target makes SPLAT_VECTOR legal, but it doesn't fire for vectors that need to be split.
1 parent eb2929d commit c28b1a1

15 files changed

+500
-1000
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,15 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
376376
assert(OldVT == VecVT.getVectorElementType() &&
377377
"BUILD_VECTOR operand type doesn't match vector element type!");
378378

379+
if (VecVT.isInteger() && TLI.isOperationLegal(ISD::SPLAT_VECTOR, VecVT) &&
380+
TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR_PARTS, VecVT)) {
381+
if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
382+
SDValue Lo, Hi;
383+
GetExpandedOp(V, Lo, Hi);
384+
return DAG.getNode(ISD::SPLAT_VECTOR_PARTS, dl, VecVT, Lo, Hi);
385+
}
386+
}
387+
379388
// Build a vector of twice the length out of the expanded elements.
380389
// For example <3 x i64> -> <6 x i32>.
381390
SmallVector<SDValue, 16> NewElts;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
1414
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1515
; RV32-NEXT: vfmv.f.s fa0, v8
1616
; RV32-NEXT: call llrintf
17-
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
18-
; RV32-NEXT: vmv.v.x v8, a0
19-
; RV32-NEXT: vslide1down.vx v8, v8, a1
17+
; RV32-NEXT: sw a1, 4(sp)
18+
; RV32-NEXT: sw a0, 0(sp)
19+
; RV32-NEXT: mv a0, sp
20+
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
21+
; RV32-NEXT: vlse64.v v8, (a0), zero
2022
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
2123
; RV32-NEXT: addi sp, sp, 16
2224
; RV32-NEXT: ret
@@ -669,9 +671,11 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
669671
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
670672
; RV32-NEXT: vfmv.f.s fa0, v8
671673
; RV32-NEXT: call llrint
672-
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
673-
; RV32-NEXT: vmv.v.x v8, a0
674-
; RV32-NEXT: vslide1down.vx v8, v8, a1
674+
; RV32-NEXT: sw a1, 4(sp)
675+
; RV32-NEXT: sw a0, 0(sp)
676+
; RV32-NEXT: mv a0, sp
677+
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
678+
; RV32-NEXT: vlse64.v v8, (a0), zero
675679
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
676680
; RV32-NEXT: addi sp, sp, 16
677681
; RV32-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll

Lines changed: 19 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -397,43 +397,22 @@ define void @masked_load_v32i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
397397
declare <32 x i32> @llvm.masked.load.v32i32(ptr, i32, <32 x i1>, <32 x i32>)
398398

399399
define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
400-
; RV32-LABEL: masked_load_v32i64:
401-
; RV32: # %bb.0:
402-
; RV32-NEXT: addi a3, a1, 128
403-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
404-
; RV32-NEXT: vle64.v v0, (a1)
405-
; RV32-NEXT: vle64.v v24, (a3)
406-
; RV32-NEXT: li a1, 32
407-
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
408-
; RV32-NEXT: vmv.v.i v16, 0
409-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
410-
; RV32-NEXT: vmseq.vv v8, v0, v16
411-
; RV32-NEXT: vmseq.vv v0, v24, v16
412-
; RV32-NEXT: addi a1, a0, 128
413-
; RV32-NEXT: vle64.v v16, (a1), v0.t
414-
; RV32-NEXT: vmv1r.v v0, v8
415-
; RV32-NEXT: vle64.v v8, (a0), v0.t
416-
; RV32-NEXT: vse64.v v8, (a2)
417-
; RV32-NEXT: addi a0, a2, 128
418-
; RV32-NEXT: vse64.v v16, (a0)
419-
; RV32-NEXT: ret
420-
;
421-
; RV64-LABEL: masked_load_v32i64:
422-
; RV64: # %bb.0:
423-
; RV64-NEXT: addi a3, a1, 128
424-
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
425-
; RV64-NEXT: vle64.v v16, (a1)
426-
; RV64-NEXT: vle64.v v24, (a3)
427-
; RV64-NEXT: vmseq.vi v8, v16, 0
428-
; RV64-NEXT: vmseq.vi v0, v24, 0
429-
; RV64-NEXT: addi a1, a0, 128
430-
; RV64-NEXT: vle64.v v16, (a1), v0.t
431-
; RV64-NEXT: vmv1r.v v0, v8
432-
; RV64-NEXT: vle64.v v8, (a0), v0.t
433-
; RV64-NEXT: vse64.v v8, (a2)
434-
; RV64-NEXT: addi a0, a2, 128
435-
; RV64-NEXT: vse64.v v16, (a0)
436-
; RV64-NEXT: ret
400+
; CHECK-LABEL: masked_load_v32i64:
401+
; CHECK: # %bb.0:
402+
; CHECK-NEXT: addi a3, a1, 128
403+
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
404+
; CHECK-NEXT: vle64.v v16, (a1)
405+
; CHECK-NEXT: vle64.v v24, (a3)
406+
; CHECK-NEXT: vmseq.vi v8, v16, 0
407+
; CHECK-NEXT: vmseq.vi v0, v24, 0
408+
; CHECK-NEXT: addi a1, a0, 128
409+
; CHECK-NEXT: vle64.v v16, (a1), v0.t
410+
; CHECK-NEXT: vmv1r.v v0, v8
411+
; CHECK-NEXT: vle64.v v8, (a0), v0.t
412+
; CHECK-NEXT: vse64.v v8, (a2)
413+
; CHECK-NEXT: addi a0, a2, 128
414+
; CHECK-NEXT: vse64.v v16, (a0)
415+
; CHECK-NEXT: ret
437416
%m = load <32 x i64>, ptr %m_ptr
438417
%mask = icmp eq <32 x i64> %m, zeroinitializer
439418
%load = call <32 x i64> @llvm.masked.load.v32i64(ptr %a, i32 8, <32 x i1> %mask, <32 x i64> undef)
@@ -547,3 +526,6 @@ define void @masked_load_v256i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
547526
ret void
548527
}
549528
declare <256 x i8> @llvm.masked.load.v256i8(ptr, i32, <256 x i1>, <256 x i8>)
529+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
530+
; RV32: {{.*}}
531+
; RV64: {{.*}}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll

Lines changed: 41 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -397,87 +397,44 @@ define void @masked_store_v32i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
397397
declare void @llvm.masked.store.v32i32.p0(<32 x i32>, ptr, i32, <32 x i1>)
398398

399399
define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
400-
; RV32-LABEL: masked_store_v32i64:
401-
; RV32: # %bb.0:
402-
; RV32-NEXT: addi sp, sp, -16
403-
; RV32-NEXT: csrr a3, vlenb
404-
; RV32-NEXT: slli a3, a3, 4
405-
; RV32-NEXT: sub sp, sp, a3
406-
; RV32-NEXT: addi a3, a2, 128
407-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
408-
; RV32-NEXT: vle64.v v24, (a2)
409-
; RV32-NEXT: vle64.v v8, (a3)
410-
; RV32-NEXT: csrr a2, vlenb
411-
; RV32-NEXT: slli a2, a2, 3
412-
; RV32-NEXT: add a2, sp, a2
413-
; RV32-NEXT: addi a2, a2, 16
414-
; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
415-
; RV32-NEXT: li a2, 32
416-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
417-
; RV32-NEXT: vmv.v.i v8, 0
418-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
419-
; RV32-NEXT: vmseq.vv v7, v24, v8
420-
; RV32-NEXT: addi a2, a0, 128
421-
; RV32-NEXT: vle64.v v24, (a2)
422-
; RV32-NEXT: vle64.v v16, (a0)
423-
; RV32-NEXT: addi a0, sp, 16
424-
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
425-
; RV32-NEXT: csrr a0, vlenb
426-
; RV32-NEXT: slli a0, a0, 3
427-
; RV32-NEXT: add a0, sp, a0
428-
; RV32-NEXT: addi a0, a0, 16
429-
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
430-
; RV32-NEXT: vmseq.vv v0, v16, v8
431-
; RV32-NEXT: addi a0, a1, 128
432-
; RV32-NEXT: vse64.v v24, (a0), v0.t
433-
; RV32-NEXT: vmv1r.v v0, v7
434-
; RV32-NEXT: addi a0, sp, 16
435-
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
436-
; RV32-NEXT: vse64.v v8, (a1), v0.t
437-
; RV32-NEXT: csrr a0, vlenb
438-
; RV32-NEXT: slli a0, a0, 4
439-
; RV32-NEXT: add sp, sp, a0
440-
; RV32-NEXT: addi sp, sp, 16
441-
; RV32-NEXT: ret
442-
;
443-
; RV64-LABEL: masked_store_v32i64:
444-
; RV64: # %bb.0:
445-
; RV64-NEXT: addi sp, sp, -16
446-
; RV64-NEXT: csrr a3, vlenb
447-
; RV64-NEXT: slli a3, a3, 4
448-
; RV64-NEXT: sub sp, sp, a3
449-
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
450-
; RV64-NEXT: vle64.v v8, (a2)
451-
; RV64-NEXT: addi a2, a2, 128
452-
; RV64-NEXT: vle64.v v16, (a2)
453-
; RV64-NEXT: csrr a2, vlenb
454-
; RV64-NEXT: slli a2, a2, 3
455-
; RV64-NEXT: add a2, sp, a2
456-
; RV64-NEXT: addi a2, a2, 16
457-
; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
458-
; RV64-NEXT: vmseq.vi v0, v8, 0
459-
; RV64-NEXT: vle64.v v24, (a0)
460-
; RV64-NEXT: addi a0, a0, 128
461-
; RV64-NEXT: vle64.v v8, (a0)
462-
; RV64-NEXT: addi a0, sp, 16
463-
; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
464-
; RV64-NEXT: csrr a0, vlenb
465-
; RV64-NEXT: slli a0, a0, 3
466-
; RV64-NEXT: add a0, sp, a0
467-
; RV64-NEXT: addi a0, a0, 16
468-
; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
469-
; RV64-NEXT: vmseq.vi v8, v16, 0
470-
; RV64-NEXT: vse64.v v24, (a1), v0.t
471-
; RV64-NEXT: addi a0, a1, 128
472-
; RV64-NEXT: vmv1r.v v0, v8
473-
; RV64-NEXT: addi a1, sp, 16
474-
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
475-
; RV64-NEXT: vse64.v v8, (a0), v0.t
476-
; RV64-NEXT: csrr a0, vlenb
477-
; RV64-NEXT: slli a0, a0, 4
478-
; RV64-NEXT: add sp, sp, a0
479-
; RV64-NEXT: addi sp, sp, 16
480-
; RV64-NEXT: ret
400+
; CHECK-LABEL: masked_store_v32i64:
401+
; CHECK: # %bb.0:
402+
; CHECK-NEXT: addi sp, sp, -16
403+
; CHECK-NEXT: csrr a3, vlenb
404+
; CHECK-NEXT: slli a3, a3, 4
405+
; CHECK-NEXT: sub sp, sp, a3
406+
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
407+
; CHECK-NEXT: vle64.v v8, (a2)
408+
; CHECK-NEXT: addi a2, a2, 128
409+
; CHECK-NEXT: vle64.v v16, (a2)
410+
; CHECK-NEXT: csrr a2, vlenb
411+
; CHECK-NEXT: slli a2, a2, 3
412+
; CHECK-NEXT: add a2, sp, a2
413+
; CHECK-NEXT: addi a2, a2, 16
414+
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
415+
; CHECK-NEXT: vmseq.vi v0, v8, 0
416+
; CHECK-NEXT: vle64.v v24, (a0)
417+
; CHECK-NEXT: addi a0, a0, 128
418+
; CHECK-NEXT: vle64.v v8, (a0)
419+
; CHECK-NEXT: addi a0, sp, 16
420+
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
421+
; CHECK-NEXT: csrr a0, vlenb
422+
; CHECK-NEXT: slli a0, a0, 3
423+
; CHECK-NEXT: add a0, sp, a0
424+
; CHECK-NEXT: addi a0, a0, 16
425+
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
426+
; CHECK-NEXT: vmseq.vi v8, v16, 0
427+
; CHECK-NEXT: vse64.v v24, (a1), v0.t
428+
; CHECK-NEXT: addi a0, a1, 128
429+
; CHECK-NEXT: vmv1r.v v0, v8
430+
; CHECK-NEXT: addi a1, sp, 16
431+
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
432+
; CHECK-NEXT: vse64.v v8, (a0), v0.t
433+
; CHECK-NEXT: csrr a0, vlenb
434+
; CHECK-NEXT: slli a0, a0, 4
435+
; CHECK-NEXT: add sp, sp, a0
436+
; CHECK-NEXT: addi sp, sp, 16
437+
; CHECK-NEXT: ret
481438
%m = load <32 x i64>, ptr %m_ptr
482439
%mask = icmp eq <32 x i64> %m, zeroinitializer
483440
%val = load <32 x i64>, ptr %val_ptr
@@ -683,3 +640,6 @@ define void @masked_store_v256i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
683640
ret void
684641
}
685642
declare void @llvm.masked.store.v256i8.p0(<256 x i8>, ptr, i32, <256 x i1>)
643+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
644+
; RV32: {{.*}}
645+
; RV64: {{.*}}

0 commit comments

Comments
 (0)