@@ -5553,9 +5553,15 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
5553
5553
unsigned MemSizeBits = ExtLd.getMMO ().getMemoryType ().getSizeInBits ();
5554
5554
bool IsPre = ExtLd.isPre ();
5555
5555
bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5556
- bool InsertIntoXReg = false ;
5556
+ unsigned InsertIntoSubReg = 0 ;
5557
5557
bool IsDst64 = Ty.getSizeInBits () == 64 ;
5558
5558
5559
+ // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5560
+ // long as they are scalar.
5561
+ bool IsFPR = RBI.getRegBank (Dst, MRI, TRI)->getID () == AArch64::FPRRegBankID;
5562
+ if ((IsSExt && IsFPR) || Ty.isVector ())
5563
+ return false ;
5564
+
5559
5565
unsigned Opc = 0 ;
5560
5566
LLT NewLdDstTy;
5561
5567
LLT s32 = LLT::scalar (32 );
@@ -5568,9 +5574,13 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
5568
5574
else
5569
5575
Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5570
5576
NewLdDstTy = IsDst64 ? s64 : s32;
5577
+ } else if (IsFPR) {
5578
+ Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5579
+ InsertIntoSubReg = AArch64::bsub;
5580
+ NewLdDstTy = LLT::scalar (MemSizeBits);
5571
5581
} else {
5572
5582
Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5573
- InsertIntoXReg = IsDst64;
5583
+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0 ;
5574
5584
NewLdDstTy = s32;
5575
5585
}
5576
5586
} else if (MemSizeBits == 16 ) {
@@ -5580,27 +5590,32 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
5580
5590
else
5581
5591
Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5582
5592
NewLdDstTy = IsDst64 ? s64 : s32;
5593
+ } else if (IsFPR) {
5594
+ Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5595
+ InsertIntoSubReg = AArch64::hsub;
5596
+ NewLdDstTy = LLT::scalar (MemSizeBits);
5583
5597
} else {
5584
5598
Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5585
- InsertIntoXReg = IsDst64;
5599
+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0 ;
5586
5600
NewLdDstTy = s32;
5587
5601
}
5588
5602
} else if (MemSizeBits == 32 ) {
5589
5603
if (IsSExt) {
5590
5604
Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5591
5605
NewLdDstTy = s64;
5606
+ } else if (IsFPR) {
5607
+ Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5608
+ InsertIntoSubReg = AArch64::ssub;
5609
+ NewLdDstTy = LLT::scalar (MemSizeBits);
5592
5610
} else {
5593
5611
Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5594
- InsertIntoXReg = IsDst64;
5612
+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0 ;
5595
5613
NewLdDstTy = s32;
5596
5614
}
5597
5615
} else {
5598
5616
llvm_unreachable (" Unexpected size for indexed load" );
5599
5617
}
5600
5618
5601
- if (RBI.getRegBank (Dst, MRI, TRI)->getID () == AArch64::FPRRegBankID)
5602
- return false ; // We should be on gpr.
5603
-
5604
5619
auto Cst = getIConstantVRegVal (Offset, MRI);
5605
5620
if (!Cst)
5606
5621
return false ; // Shouldn't happen, but just in case.
@@ -5610,15 +5625,18 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
5610
5625
LdMI.cloneMemRefs (ExtLd);
5611
5626
constrainSelectedInstRegOperands (*LdMI, TII, TRI, RBI);
5612
5627
// Make sure to select the load with the MemTy as the dest type, and then
5613
- // insert into X reg if needed.
5614
- if (InsertIntoXReg ) {
5628
+ // insert into a larger reg if needed.
5629
+ if (InsertIntoSubReg ) {
5615
5630
// Generate a SUBREG_TO_REG.
5616
5631
auto SubToReg = MIB.buildInstr (TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5617
5632
.addImm (0 )
5618
5633
.addUse (LdMI.getReg (1 ))
5619
- .addImm (AArch64::sub_32);
5620
- RBI.constrainGenericRegister (SubToReg.getReg (0 ), AArch64::GPR64RegClass,
5621
- MRI);
5634
+ .addImm (InsertIntoSubReg);
5635
+ RBI.constrainGenericRegister (
5636
+ SubToReg.getReg (0 ),
5637
+ *getRegClassForTypeOnBank (MRI.getType (Dst),
5638
+ *RBI.getRegBank (Dst, MRI, TRI)),
5639
+ MRI);
5622
5640
} else {
5623
5641
auto Copy = MIB.buildCopy (Dst, LdMI.getReg (1 ));
5624
5642
selectCopy (*Copy, TII, MRI, TRI, RBI);
0 commit comments