Skip to content

[TwoAddressInstruction] Update LiveIntervals after INSERT_SUBREG with undef read #66211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1868,12 +1868,16 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// %reg.subidx.
LaneBitmask LaneMask =
TRI->getSubRegIndexLaneMask(mi->getOperand(0).getSubReg());
SlotIndex Idx = LIS->getInstructionIndex(*mi);
SlotIndex Idx = LIS->getInstructionIndex(*mi).getRegSlot();
for (auto &S : LI.subranges()) {
if ((S.LaneMask & LaneMask).none()) {
LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx);
LiveRange::iterator DefSeg = std::next(UseSeg);
S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
LiveRange::iterator DefSeg = S.FindSegmentContaining(Idx);
if (mi->getOperand(0).isUndef()) {
S.removeValNo(DefSeg->valno);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBH I am pretty nervous about all this low level fiddling around with live ranges. I would be much happier if there was some higher level API I could use to do this kind of update.

} else {
LiveRange::iterator UseSeg = std::prev(DefSeg);
S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
}
}
}

Expand Down
52 changes: 35 additions & 17 deletions llvm/test/CodeGen/Thumb2/mve-fmath.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS

define arm_aapcs_vfpcc <4 x float> @sqrt_float32_t(<4 x float> %src) {
; CHECK-LABEL: sqrt_float32_t:
Expand Down Expand Up @@ -1085,21 +1087,37 @@ entry:
}

define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) {
; CHECK-LABEL: copysign_float64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: vmov r0, lr, d2
; CHECK-NEXT: vmov r0, r3, d1
; CHECK-NEXT: vmov r12, r2, d0
; CHECK-NEXT: lsrs r1, r1, #31
; CHECK-NEXT: bfi r3, r1, #31, #1
; CHECK-NEXT: lsr.w r1, lr, #31
; CHECK-NEXT: bfi r2, r1, #31, #1
; CHECK-NEXT: vmov d1, r0, r3
; CHECK-NEXT: vmov d0, r12, r2
; CHECK-NEXT: pop {r7, pc}
; CHECK-LV-LABEL: copysign_float64_t:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: .save {r7, lr}
; CHECK-LV-NEXT: push {r7, lr}
; CHECK-LV-NEXT: vmov r0, r1, d3
; CHECK-LV-NEXT: vmov r0, lr, d2
; CHECK-LV-NEXT: vmov r0, r3, d1
; CHECK-LV-NEXT: vmov r12, r2, d0
; CHECK-LV-NEXT: lsrs r1, r1, #31
; CHECK-LV-NEXT: bfi r3, r1, #31, #1
; CHECK-LV-NEXT: lsr.w r1, lr, #31
; CHECK-LV-NEXT: bfi r2, r1, #31, #1
; CHECK-LV-NEXT: vmov d1, r0, r3
; CHECK-LV-NEXT: vmov d0, r12, r2
; CHECK-LV-NEXT: pop {r7, pc}
;
; CHECK-LIS-LABEL: copysign_float64_t:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: .save {r4, lr}
; CHECK-LIS-NEXT: push {r4, lr}
; CHECK-LIS-NEXT: vmov r0, r12, d3
; CHECK-LIS-NEXT: vmov r0, lr, d2
; CHECK-LIS-NEXT: vmov r4, r3, d1
; CHECK-LIS-NEXT: vmov r1, r2, d0
; CHECK-LIS-NEXT: lsr.w r0, r12, #31
; CHECK-LIS-NEXT: bfi r3, r0, #31, #1
; CHECK-LIS-NEXT: lsr.w r0, lr, #31
; CHECK-LIS-NEXT: bfi r2, r0, #31, #1
; CHECK-LIS-NEXT: vmov d1, r4, r3
; CHECK-LIS-NEXT: vmov d0, r1, r2
; CHECK-LIS-NEXT: pop {r4, pc}
entry:
%0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2)
ret <2 x double> %0
Expand Down
189 changes: 127 additions & 62 deletions llvm/test/CodeGen/Thumb2/mve-shuffle.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECKFP
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LV,CHECKFP
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS
; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LIS,CHECKFP

define arm_aapcs_vfpcc <4 x i32> @shuffle1_i32(<4 x i32> %src) {
; CHECK-LABEL: shuffle1_i32:
Expand Down Expand Up @@ -221,18 +223,31 @@ entry:
}

define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
; CHECK-LABEL: shuffle3_i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmovx.f16 s2, s5
; CHECK-NEXT: vmovx.f16 s0, s4
; CHECK-NEXT: vins.f16 s5, s4
; CHECK-NEXT: vins.f16 s2, s0
; CHECK-NEXT: vmov.f32 s3, s5
; CHECK-NEXT: vmovx.f16 s1, s7
; CHECK-NEXT: vmov.f32 s0, s6
; CHECK-NEXT: vins.f16 s1, s7
; CHECK-NEXT: bx lr
; CHECK-LV-LABEL: shuffle3_i16:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: vmov q1, q0
; CHECK-LV-NEXT: vmovx.f16 s2, s5
; CHECK-LV-NEXT: vmovx.f16 s0, s4
; CHECK-LV-NEXT: vins.f16 s5, s4
; CHECK-LV-NEXT: vins.f16 s2, s0
; CHECK-LV-NEXT: vmov.f32 s3, s5
; CHECK-LV-NEXT: vmovx.f16 s1, s7
; CHECK-LV-NEXT: vmov.f32 s0, s6
; CHECK-LV-NEXT: vins.f16 s1, s7
; CHECK-LV-NEXT: bx lr
;
; CHECK-LIS-LABEL: shuffle3_i16:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: vmovx.f16 s5, s3
; CHECK-LIS-NEXT: vmovx.f16 s6, s1
; CHECK-LIS-NEXT: vmovx.f16 s4, s0
; CHECK-LIS-NEXT: vins.f16 s1, s0
; CHECK-LIS-NEXT: vins.f16 s6, s4
; CHECK-LIS-NEXT: vins.f16 s5, s3
; CHECK-LIS-NEXT: vmov.f32 s7, s1
; CHECK-LIS-NEXT: vmov.f32 s4, s2
; CHECK-LIS-NEXT: vmov q0, q1
; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
ret <8 x i16> %out
Expand Down Expand Up @@ -476,42 +491,79 @@ entry:
}

define arm_aapcs_vfpcc <16 x i8> @shuffle3_i8(<16 x i8> %src) {
; CHECK-LABEL: shuffle3_i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmov.u8 r0, q0[4]
; CHECK-NEXT: vmov.8 q0[0], r0
; CHECK-NEXT: vmov.u8 r0, q1[5]
; CHECK-NEXT: vmov.8 q0[1], r0
; CHECK-NEXT: vmov.u8 r0, q1[15]
; CHECK-NEXT: vmov.8 q0[2], r0
; CHECK-NEXT: vmov.u8 r0, q1[7]
; CHECK-NEXT: vmov.8 q0[3], r0
; CHECK-NEXT: vmov.u8 r0, q1[14]
; CHECK-NEXT: vmov.8 q0[4], r0
; CHECK-NEXT: vmov.u8 r0, q1[9]
; CHECK-NEXT: vmov.8 q0[5], r0
; CHECK-NEXT: vmov.u8 r0, q1[6]
; CHECK-NEXT: vmov.8 q0[6], r0
; CHECK-NEXT: vmov.u8 r0, q1[3]
; CHECK-NEXT: vmov.8 q0[7], r0
; CHECK-NEXT: vmov.u8 r0, q1[10]
; CHECK-NEXT: vmov.8 q0[8], r0
; CHECK-NEXT: vmov.u8 r0, q1[12]
; CHECK-NEXT: vmov.8 q0[9], r0
; CHECK-NEXT: vmov.u8 r0, q1[1]
; CHECK-NEXT: vmov.8 q0[10], r0
; CHECK-NEXT: vmov.u8 r0, q1[13]
; CHECK-NEXT: vmov.8 q0[11], r0
; CHECK-NEXT: vmov.u8 r0, q1[2]
; CHECK-NEXT: vmov.8 q0[12], r0
; CHECK-NEXT: vmov.u8 r0, q1[8]
; CHECK-NEXT: vmov.8 q0[13], r0
; CHECK-NEXT: vmov.u8 r0, q1[0]
; CHECK-NEXT: vmov.8 q0[14], r0
; CHECK-NEXT: vmov.u8 r0, q1[11]
; CHECK-NEXT: vmov.8 q0[15], r0
; CHECK-NEXT: bx lr
; CHECK-LV-LABEL: shuffle3_i8:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: vmov q1, q0
; CHECK-LV-NEXT: vmov.u8 r0, q0[4]
; CHECK-LV-NEXT: vmov.8 q0[0], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[5]
; CHECK-LV-NEXT: vmov.8 q0[1], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[15]
; CHECK-LV-NEXT: vmov.8 q0[2], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[7]
; CHECK-LV-NEXT: vmov.8 q0[3], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[14]
; CHECK-LV-NEXT: vmov.8 q0[4], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[9]
; CHECK-LV-NEXT: vmov.8 q0[5], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[6]
; CHECK-LV-NEXT: vmov.8 q0[6], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[3]
; CHECK-LV-NEXT: vmov.8 q0[7], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[10]
; CHECK-LV-NEXT: vmov.8 q0[8], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[12]
; CHECK-LV-NEXT: vmov.8 q0[9], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[1]
; CHECK-LV-NEXT: vmov.8 q0[10], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[13]
; CHECK-LV-NEXT: vmov.8 q0[11], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[2]
; CHECK-LV-NEXT: vmov.8 q0[12], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[8]
; CHECK-LV-NEXT: vmov.8 q0[13], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[0]
; CHECK-LV-NEXT: vmov.8 q0[14], r0
; CHECK-LV-NEXT: vmov.u8 r0, q1[11]
; CHECK-LV-NEXT: vmov.8 q0[15], r0
; CHECK-LV-NEXT: bx lr
;
; CHECK-LIS-LABEL: shuffle3_i8:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: vmov.u8 r0, q0[4]
; CHECK-LIS-NEXT: vmov.8 q1[0], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[5]
; CHECK-LIS-NEXT: vmov.8 q1[1], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[15]
; CHECK-LIS-NEXT: vmov.8 q1[2], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[7]
; CHECK-LIS-NEXT: vmov.8 q1[3], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[14]
; CHECK-LIS-NEXT: vmov.8 q1[4], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[9]
; CHECK-LIS-NEXT: vmov.8 q1[5], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[6]
; CHECK-LIS-NEXT: vmov.8 q1[6], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[3]
; CHECK-LIS-NEXT: vmov.8 q1[7], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[10]
; CHECK-LIS-NEXT: vmov.8 q1[8], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[12]
; CHECK-LIS-NEXT: vmov.8 q1[9], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[1]
; CHECK-LIS-NEXT: vmov.8 q1[10], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[13]
; CHECK-LIS-NEXT: vmov.8 q1[11], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[2]
; CHECK-LIS-NEXT: vmov.8 q1[12], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[8]
; CHECK-LIS-NEXT: vmov.8 q1[13], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[0]
; CHECK-LIS-NEXT: vmov.8 q1[14], r0
; CHECK-LIS-NEXT: vmov.u8 r0, q0[11]
; CHECK-LIS-NEXT: vmov.8 q1[15], r0
; CHECK-LIS-NEXT: vmov q0, q1
; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 15, i32 7, i32 14, i32 9, i32 6, i32 3, i32 10, i32 12, i32 1, i32 13, i32 2, i32 8, i32 0, i32 11>
ret <16 x i8> %out
Expand Down Expand Up @@ -1143,18 +1195,31 @@ entry:
}

define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
; CHECK-LABEL: shuffle3_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vmovx.f16 s2, s5
; CHECK-NEXT: vmovx.f16 s0, s4
; CHECK-NEXT: vins.f16 s5, s4
; CHECK-NEXT: vins.f16 s2, s0
; CHECK-NEXT: vmov.f32 s3, s5
; CHECK-NEXT: vmovx.f16 s1, s7
; CHECK-NEXT: vmov.f32 s0, s6
; CHECK-NEXT: vins.f16 s1, s7
; CHECK-NEXT: bx lr
; CHECK-LV-LABEL: shuffle3_f16:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: vmov q1, q0
; CHECK-LV-NEXT: vmovx.f16 s2, s5
; CHECK-LV-NEXT: vmovx.f16 s0, s4
; CHECK-LV-NEXT: vins.f16 s5, s4
; CHECK-LV-NEXT: vins.f16 s2, s0
; CHECK-LV-NEXT: vmov.f32 s3, s5
; CHECK-LV-NEXT: vmovx.f16 s1, s7
; CHECK-LV-NEXT: vmov.f32 s0, s6
; CHECK-LV-NEXT: vins.f16 s1, s7
; CHECK-LV-NEXT: bx lr
;
; CHECK-LIS-LABEL: shuffle3_f16:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: vmovx.f16 s5, s3
; CHECK-LIS-NEXT: vmovx.f16 s6, s1
; CHECK-LIS-NEXT: vmovx.f16 s4, s0
; CHECK-LIS-NEXT: vins.f16 s1, s0
; CHECK-LIS-NEXT: vins.f16 s6, s4
; CHECK-LIS-NEXT: vins.f16 s5, s3
; CHECK-LIS-NEXT: vmov.f32 s7, s1
; CHECK-LIS-NEXT: vmov.f32 s4, s2
; CHECK-LIS-NEXT: vmov q0, q1
; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
ret <8 x half> %out
Expand Down
Loading