Skip to content

Commit ac220b6

Browse files
author
Manish Kausik H
committed
[SelectionDAG] Use unaligned store to legalize EXTRACT_VECTOR_ELT type
This patch sets the alignment of store instructions generated during type legalization of extractelement instruction, after considering stack alignment. Fixes #98044
1 parent 5c2bdc5 commit ac220b6

13 files changed

+258
-520
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/Analysis/MemoryLocation.h"
2525
#include "llvm/Analysis/VectorUtils.h"
2626
#include "llvm/CodeGen/ISDOpcodes.h"
27+
#include "llvm/CodeGen/MachineFrameInfo.h"
2728
#include "llvm/IR/DataLayout.h"
2829
#include "llvm/Support/ErrorHandling.h"
2930
#include "llvm/Support/TypeSize.h"
@@ -3531,7 +3532,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
35313532
// Store the vector to the stack.
35323533
// In cases where the vector is illegal it will be broken down into parts
35333534
// and stored in parts - we should use the alignment for the smallest part.
3534-
Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
3535+
Align SmallestAlign =
3536+
std::min(DAG.getSubtarget().getFrameLowering()->getStackAlign(),
3537+
DAG.getReducedAlign(VecVT, /*UseABI=*/false));
35353538
SDValue StackPtr =
35363539
DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
35373540
auto &MF = DAG.getMachineFunction();

llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll

Lines changed: 44 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -585,19 +585,13 @@ define double @extractelt_nxv16f64_0(<vscale x 16 x double> %v) {
585585
define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
586586
; RV32-LABEL: extractelt_nxv16f64_neg1:
587587
; RV32: # %bb.0:
588-
; RV32-NEXT: addi sp, sp, -80
589-
; RV32-NEXT: .cfi_def_cfa_offset 80
590-
; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
591-
; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
592-
; RV32-NEXT: .cfi_offset ra, -4
593-
; RV32-NEXT: .cfi_offset s0, -8
594-
; RV32-NEXT: addi s0, sp, 80
595-
; RV32-NEXT: .cfi_def_cfa s0, 0
588+
; RV32-NEXT: addi sp, sp, -16
589+
; RV32-NEXT: .cfi_def_cfa_offset 16
596590
; RV32-NEXT: csrr a0, vlenb
597591
; RV32-NEXT: slli a0, a0, 4
598592
; RV32-NEXT: sub sp, sp, a0
599-
; RV32-NEXT: andi sp, sp, -64
600-
; RV32-NEXT: addi a0, sp, 64
593+
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
594+
; RV32-NEXT: addi a0, sp, 16
601595
; RV32-NEXT: vs8r.v v8, (a0)
602596
; RV32-NEXT: csrr a1, vlenb
603597
; RV32-NEXT: slli a2, a1, 3
@@ -606,27 +600,21 @@ define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
606600
; RV32-NEXT: slli a1, a1, 4
607601
; RV32-NEXT: add a0, a1, a0
608602
; RV32-NEXT: fld fa0, -8(a0)
609-
; RV32-NEXT: addi sp, s0, -80
610-
; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
611-
; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
612-
; RV32-NEXT: addi sp, sp, 80
603+
; RV32-NEXT: csrr a0, vlenb
604+
; RV32-NEXT: slli a0, a0, 4
605+
; RV32-NEXT: add sp, sp, a0
606+
; RV32-NEXT: addi sp, sp, 16
613607
; RV32-NEXT: ret
614608
;
615609
; RV64-LABEL: extractelt_nxv16f64_neg1:
616610
; RV64: # %bb.0:
617-
; RV64-NEXT: addi sp, sp, -80
618-
; RV64-NEXT: .cfi_def_cfa_offset 80
619-
; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
620-
; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
621-
; RV64-NEXT: .cfi_offset ra, -8
622-
; RV64-NEXT: .cfi_offset s0, -16
623-
; RV64-NEXT: addi s0, sp, 80
624-
; RV64-NEXT: .cfi_def_cfa s0, 0
611+
; RV64-NEXT: addi sp, sp, -16
612+
; RV64-NEXT: .cfi_def_cfa_offset 16
625613
; RV64-NEXT: csrr a0, vlenb
626614
; RV64-NEXT: slli a0, a0, 4
627615
; RV64-NEXT: sub sp, sp, a0
628-
; RV64-NEXT: andi sp, sp, -64
629-
; RV64-NEXT: addi a0, sp, 64
616+
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
617+
; RV64-NEXT: addi a0, sp, 16
630618
; RV64-NEXT: vs8r.v v8, (a0)
631619
; RV64-NEXT: csrr a2, vlenb
632620
; RV64-NEXT: slli a1, a2, 3
@@ -643,10 +631,10 @@ define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
643631
; RV64-NEXT: slli a2, a2, 3
644632
; RV64-NEXT: add a0, a0, a2
645633
; RV64-NEXT: fld fa0, 0(a0)
646-
; RV64-NEXT: addi sp, s0, -80
647-
; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
648-
; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
649-
; RV64-NEXT: addi sp, sp, 80
634+
; RV64-NEXT: csrr a0, vlenb
635+
; RV64-NEXT: slli a0, a0, 4
636+
; RV64-NEXT: add sp, sp, a0
637+
; RV64-NEXT: addi sp, sp, 16
650638
; RV64-NEXT: ret
651639
%r = extractelement <vscale x 16 x double> %v, i32 -1
652640
ret double %r
@@ -664,75 +652,34 @@ define double @extractelt_nxv16f64_imm(<vscale x 16 x double> %v) {
664652
}
665653

666654
define double @extractelt_nxv16f64_idx(<vscale x 16 x double> %v, i32 zeroext %idx) {
667-
; RV32-LABEL: extractelt_nxv16f64_idx:
668-
; RV32: # %bb.0:
669-
; RV32-NEXT: csrr a1, vlenb
670-
; RV32-NEXT: slli a2, a1, 1
671-
; RV32-NEXT: addi a2, a2, -1
672-
; RV32-NEXT: bltu a0, a2, .LBB54_2
673-
; RV32-NEXT: # %bb.1:
674-
; RV32-NEXT: mv a0, a2
675-
; RV32-NEXT: .LBB54_2:
676-
; RV32-NEXT: addi sp, sp, -80
677-
; RV32-NEXT: .cfi_def_cfa_offset 80
678-
; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
679-
; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
680-
; RV32-NEXT: .cfi_offset ra, -4
681-
; RV32-NEXT: .cfi_offset s0, -8
682-
; RV32-NEXT: addi s0, sp, 80
683-
; RV32-NEXT: .cfi_def_cfa s0, 0
684-
; RV32-NEXT: csrr a2, vlenb
685-
; RV32-NEXT: slli a2, a2, 4
686-
; RV32-NEXT: sub sp, sp, a2
687-
; RV32-NEXT: andi sp, sp, -64
688-
; RV32-NEXT: slli a0, a0, 3
689-
; RV32-NEXT: addi a2, sp, 64
690-
; RV32-NEXT: add a0, a2, a0
691-
; RV32-NEXT: vs8r.v v8, (a2)
692-
; RV32-NEXT: slli a1, a1, 3
693-
; RV32-NEXT: add a1, a2, a1
694-
; RV32-NEXT: vs8r.v v16, (a1)
695-
; RV32-NEXT: fld fa0, 0(a0)
696-
; RV32-NEXT: addi sp, s0, -80
697-
; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
698-
; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
699-
; RV32-NEXT: addi sp, sp, 80
700-
; RV32-NEXT: ret
701-
;
702-
; RV64-LABEL: extractelt_nxv16f64_idx:
703-
; RV64: # %bb.0:
704-
; RV64-NEXT: csrr a1, vlenb
705-
; RV64-NEXT: slli a2, a1, 1
706-
; RV64-NEXT: addi a2, a2, -1
707-
; RV64-NEXT: bltu a0, a2, .LBB54_2
708-
; RV64-NEXT: # %bb.1:
709-
; RV64-NEXT: mv a0, a2
710-
; RV64-NEXT: .LBB54_2:
711-
; RV64-NEXT: addi sp, sp, -80
712-
; RV64-NEXT: .cfi_def_cfa_offset 80
713-
; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
714-
; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
715-
; RV64-NEXT: .cfi_offset ra, -8
716-
; RV64-NEXT: .cfi_offset s0, -16
717-
; RV64-NEXT: addi s0, sp, 80
718-
; RV64-NEXT: .cfi_def_cfa s0, 0
719-
; RV64-NEXT: csrr a2, vlenb
720-
; RV64-NEXT: slli a2, a2, 4
721-
; RV64-NEXT: sub sp, sp, a2
722-
; RV64-NEXT: andi sp, sp, -64
723-
; RV64-NEXT: slli a0, a0, 3
724-
; RV64-NEXT: addi a2, sp, 64
725-
; RV64-NEXT: add a0, a2, a0
726-
; RV64-NEXT: vs8r.v v8, (a2)
727-
; RV64-NEXT: slli a1, a1, 3
728-
; RV64-NEXT: add a1, a2, a1
729-
; RV64-NEXT: vs8r.v v16, (a1)
730-
; RV64-NEXT: fld fa0, 0(a0)
731-
; RV64-NEXT: addi sp, s0, -80
732-
; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
733-
; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
734-
; RV64-NEXT: addi sp, sp, 80
735-
; RV64-NEXT: ret
655+
; CHECK-LABEL: extractelt_nxv16f64_idx:
656+
; CHECK: # %bb.0:
657+
; CHECK-NEXT: csrr a1, vlenb
658+
; CHECK-NEXT: slli a2, a1, 1
659+
; CHECK-NEXT: addi a2, a2, -1
660+
; CHECK-NEXT: bltu a0, a2, .LBB54_2
661+
; CHECK-NEXT: # %bb.1:
662+
; CHECK-NEXT: mv a0, a2
663+
; CHECK-NEXT: .LBB54_2:
664+
; CHECK-NEXT: addi sp, sp, -16
665+
; CHECK-NEXT: .cfi_def_cfa_offset 16
666+
; CHECK-NEXT: csrr a2, vlenb
667+
; CHECK-NEXT: slli a2, a2, 4
668+
; CHECK-NEXT: sub sp, sp, a2
669+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
670+
; CHECK-NEXT: slli a0, a0, 3
671+
; CHECK-NEXT: addi a2, sp, 16
672+
; CHECK-NEXT: add a0, a2, a0
673+
; CHECK-NEXT: vs8r.v v8, (a2)
674+
; CHECK-NEXT: slli a1, a1, 3
675+
; CHECK-NEXT: add a1, a2, a1
676+
; CHECK-NEXT: vs8r.v v16, (a1)
677+
; CHECK-NEXT: fld fa0, 0(a0)
678+
; CHECK-NEXT: csrr a0, vlenb
679+
; CHECK-NEXT: slli a0, a0, 4
680+
; CHECK-NEXT: add sp, sp, a0
681+
; CHECK-NEXT: addi sp, sp, 16
682+
; CHECK-NEXT: ret
736683
%r = extractelement <vscale x 16 x double> %v, i32 %idx
737684
ret double %r
738685
}

llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll

Lines changed: 38 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -122,85 +122,41 @@ define i1 @extractelt_nxv64i1(ptr %x, i64 %idx) nounwind {
122122
}
123123

124124
define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
125-
; RV32-LABEL: extractelt_nxv128i1:
126-
; RV32: # %bb.0:
127-
; RV32-NEXT: csrr a2, vlenb
128-
; RV32-NEXT: slli a3, a2, 4
129-
; RV32-NEXT: addi a3, a3, -1
130-
; RV32-NEXT: bltu a1, a3, .LBB7_2
131-
; RV32-NEXT: # %bb.1:
132-
; RV32-NEXT: mv a1, a3
133-
; RV32-NEXT: .LBB7_2:
134-
; RV32-NEXT: addi sp, sp, -80
135-
; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
136-
; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
137-
; RV32-NEXT: addi s0, sp, 80
138-
; RV32-NEXT: csrr a3, vlenb
139-
; RV32-NEXT: slli a3, a3, 4
140-
; RV32-NEXT: sub sp, sp, a3
141-
; RV32-NEXT: andi sp, sp, -64
142-
; RV32-NEXT: slli a2, a2, 3
143-
; RV32-NEXT: add a3, a0, a2
144-
; RV32-NEXT: vl8r.v v16, (a3)
145-
; RV32-NEXT: vl8r.v v24, (a0)
146-
; RV32-NEXT: addi a0, sp, 64
147-
; RV32-NEXT: add a1, a0, a1
148-
; RV32-NEXT: vsetvli a3, zero, e8, m8, ta, ma
149-
; RV32-NEXT: vmseq.vi v8, v16, 0
150-
; RV32-NEXT: vmseq.vi v0, v24, 0
151-
; RV32-NEXT: vmv.v.i v16, 0
152-
; RV32-NEXT: vmerge.vim v24, v16, 1, v0
153-
; RV32-NEXT: vs8r.v v24, (a0)
154-
; RV32-NEXT: add a0, a0, a2
155-
; RV32-NEXT: vmv1r.v v0, v8
156-
; RV32-NEXT: vmerge.vim v8, v16, 1, v0
157-
; RV32-NEXT: vs8r.v v8, (a0)
158-
; RV32-NEXT: lbu a0, 0(a1)
159-
; RV32-NEXT: addi sp, s0, -80
160-
; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
161-
; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
162-
; RV32-NEXT: addi sp, sp, 80
163-
; RV32-NEXT: ret
164-
;
165-
; RV64-LABEL: extractelt_nxv128i1:
166-
; RV64: # %bb.0:
167-
; RV64-NEXT: csrr a2, vlenb
168-
; RV64-NEXT: slli a3, a2, 4
169-
; RV64-NEXT: addi a3, a3, -1
170-
; RV64-NEXT: bltu a1, a3, .LBB7_2
171-
; RV64-NEXT: # %bb.1:
172-
; RV64-NEXT: mv a1, a3
173-
; RV64-NEXT: .LBB7_2:
174-
; RV64-NEXT: addi sp, sp, -80
175-
; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
176-
; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
177-
; RV64-NEXT: addi s0, sp, 80
178-
; RV64-NEXT: csrr a3, vlenb
179-
; RV64-NEXT: slli a3, a3, 4
180-
; RV64-NEXT: sub sp, sp, a3
181-
; RV64-NEXT: andi sp, sp, -64
182-
; RV64-NEXT: slli a2, a2, 3
183-
; RV64-NEXT: add a3, a0, a2
184-
; RV64-NEXT: vl8r.v v16, (a3)
185-
; RV64-NEXT: vl8r.v v24, (a0)
186-
; RV64-NEXT: addi a0, sp, 64
187-
; RV64-NEXT: add a1, a0, a1
188-
; RV64-NEXT: vsetvli a3, zero, e8, m8, ta, ma
189-
; RV64-NEXT: vmseq.vi v8, v16, 0
190-
; RV64-NEXT: vmseq.vi v0, v24, 0
191-
; RV64-NEXT: vmv.v.i v16, 0
192-
; RV64-NEXT: vmerge.vim v24, v16, 1, v0
193-
; RV64-NEXT: vs8r.v v24, (a0)
194-
; RV64-NEXT: add a0, a0, a2
195-
; RV64-NEXT: vmv1r.v v0, v8
196-
; RV64-NEXT: vmerge.vim v8, v16, 1, v0
197-
; RV64-NEXT: vs8r.v v8, (a0)
198-
; RV64-NEXT: lbu a0, 0(a1)
199-
; RV64-NEXT: addi sp, s0, -80
200-
; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
201-
; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
202-
; RV64-NEXT: addi sp, sp, 80
203-
; RV64-NEXT: ret
125+
; CHECK-LABEL: extractelt_nxv128i1:
126+
; CHECK: # %bb.0:
127+
; CHECK-NEXT: csrr a2, vlenb
128+
; CHECK-NEXT: slli a3, a2, 4
129+
; CHECK-NEXT: addi a3, a3, -1
130+
; CHECK-NEXT: bltu a1, a3, .LBB7_2
131+
; CHECK-NEXT: # %bb.1:
132+
; CHECK-NEXT: mv a1, a3
133+
; CHECK-NEXT: .LBB7_2:
134+
; CHECK-NEXT: addi sp, sp, -16
135+
; CHECK-NEXT: csrr a3, vlenb
136+
; CHECK-NEXT: slli a3, a3, 4
137+
; CHECK-NEXT: sub sp, sp, a3
138+
; CHECK-NEXT: slli a2, a2, 3
139+
; CHECK-NEXT: add a3, a0, a2
140+
; CHECK-NEXT: vl8r.v v16, (a3)
141+
; CHECK-NEXT: vl8r.v v24, (a0)
142+
; CHECK-NEXT: addi a0, sp, 16
143+
; CHECK-NEXT: add a1, a0, a1
144+
; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma
145+
; CHECK-NEXT: vmseq.vi v8, v16, 0
146+
; CHECK-NEXT: vmseq.vi v0, v24, 0
147+
; CHECK-NEXT: vmv.v.i v16, 0
148+
; CHECK-NEXT: vmerge.vim v24, v16, 1, v0
149+
; CHECK-NEXT: vs8r.v v24, (a0)
150+
; CHECK-NEXT: add a0, a0, a2
151+
; CHECK-NEXT: vmv1r.v v0, v8
152+
; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
153+
; CHECK-NEXT: vs8r.v v8, (a0)
154+
; CHECK-NEXT: lbu a0, 0(a1)
155+
; CHECK-NEXT: csrr a1, vlenb
156+
; CHECK-NEXT: slli a1, a1, 4
157+
; CHECK-NEXT: add sp, sp, a1
158+
; CHECK-NEXT: addi sp, sp, 16
159+
; CHECK-NEXT: ret
204160
%a = load <vscale x 128 x i8>, ptr %x
205161
%b = icmp eq <vscale x 128 x i8> %a, zeroinitializer
206162
%c = extractelement <vscale x 128 x i1> %b, i64 %idx
@@ -311,3 +267,6 @@ define i1 @extractelt_nxv64i1_idx0(ptr %x) nounwind {
311267
%c = extractelement <vscale x 64 x i1> %b, i64 0
312268
ret i1 %c
313269
}
270+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
271+
; RV32: {{.*}}
272+
; RV64: {{.*}}

0 commit comments

Comments
 (0)