@@ -713,6 +713,26 @@ Register AMDGPURegisterBankInfo::buildReadFirstLaneSrc(MachineIRBuilder &B,
713
713
return Dst;
714
714
}
715
715
716
+ // Create new vgpr destination register for MI then move it to current
717
+ // MI's sgpr destination using one or more V_READFIRSTLANE_B32 instructions.
718
+ void AMDGPURegisterBankInfo::buildReadFirstLaneDst (MachineIRBuilder &B,
719
+ MachineInstr &MI) const {
720
+ MachineRegisterInfo &MRI = *B.getMRI ();
721
+ Register Dst = MI.getOperand (0 ).getReg ();
722
+ const RegisterBank *DstBank = getRegBank (Dst, MRI, *TRI);
723
+ if (DstBank != &AMDGPU::SGPRRegBank)
724
+ return ;
725
+
726
+ Register VgprDst = MRI.createGenericVirtualRegister (MRI.getType (Dst));
727
+ MRI.setRegBank (VgprDst, AMDGPU::VGPRRegBank);
728
+
729
+ MI.getOperand (0 ).setReg (VgprDst);
730
+ MachineBasicBlock *MBB = MI.getParent ();
731
+ B.setInsertPt (*MBB, std::next (MI.getIterator ()));
732
+ // readFirstLane VgprDst into Dst after MI.
733
+ return buildReadFirstLaneForType (B, Dst, VgprDst);
734
+ }
735
+
716
736
void AMDGPURegisterBankInfo::buildReadFirstLaneB32 (MachineIRBuilder &B,
717
737
Register SgprDst,
718
738
Register VgprSrc) const {
@@ -736,32 +756,42 @@ void AMDGPURegisterBankInfo::buildReadFirstLaneSequenceOfB32(
736
756
}
737
757
738
758
B.buildUnmerge (VgprSrcParts, VgprSrc);
739
- for (unsigned i = 0 ; i < NumElts; ++i) {
759
+ for (unsigned i = 0 ; i < NumElts; ++i)
740
760
buildReadFirstLaneB32 (B, SgprDstParts[i], VgprSrcParts[i]);
741
- }
761
+
742
762
B.buildMergeLikeInstr (SgprDst, SgprDstParts);
743
763
}
744
764
745
765
void AMDGPURegisterBankInfo::buildReadFirstLaneForType (MachineIRBuilder &B,
746
766
Register SgprDst,
747
767
Register VgprSrc) const {
748
768
MachineRegisterInfo &MRI = *B.getMRI ();
769
+ LLT S16 = LLT::scalar (16 );
749
770
LLT S32 = LLT::scalar (32 );
750
771
LLT S64 = LLT::scalar (64 );
751
772
LLT Ty = MRI.getType (SgprDst);
752
773
753
- if (Ty == S32 || Ty == LLT::pointer (3 , 32 )) {
754
- return buildReadFirstLaneB32 (B, SgprDst, VgprSrc);
774
+ if (Ty == S16) {
775
+ Register VgprSrc32 = MRI.createGenericVirtualRegister (S32);
776
+ MRI.setRegBank (VgprSrc32, AMDGPU::VGPRRegBank);
777
+ Register SgprDst32 = MRI.createGenericVirtualRegister (S32);
778
+ MRI.setRegBank (SgprDst32, AMDGPU::SGPRRegBank);
779
+
780
+ B.buildAnyExt (VgprSrc32, VgprSrc);
781
+ buildReadFirstLaneB32 (B, SgprDst32, VgprSrc32);
782
+ B.buildTrunc (SgprDst, SgprDst32);
783
+ return ;
755
784
}
756
785
757
- if (Ty == S64 || Ty == LLT::pointer (0 , 64 ) || Ty == LLT::pointer (1 , 64 )) {
786
+ if (Ty == S32 || Ty == LLT::pointer (3 , 32 ))
787
+ return buildReadFirstLaneB32 (B, SgprDst, VgprSrc);
788
+
789
+ if (Ty == S64 || Ty == LLT::pointer (0 , 64 ) || Ty == LLT::pointer (1 , 64 ))
758
790
return buildReadFirstLaneSequenceOfB32 (B, SgprDst, VgprSrc, 2 );
759
- }
760
791
761
- if (Ty.isVector () && Ty.getElementType () == S32) {
792
+ if (Ty.isVector () && Ty.getElementType () == S32)
762
793
return buildReadFirstLaneSequenceOfB32 (B, SgprDst, VgprSrc,
763
794
Ty.getNumElements ());
764
- }
765
795
766
796
llvm_unreachable (" Type not supported" );
767
797
}
@@ -1036,6 +1066,17 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
1036
1066
MI.getOperand (OpIdx).setReg (Reg);
1037
1067
}
1038
1068
1069
+ // MI has uniform inputs and output but only available machine instruction has
1070
+ // vgpr dest. Make it uniform by moving dst to sgpr using readfirstlane.
1071
+ void AMDGPURegisterBankInfo::constrainVgprDstOpWithReadfirstlane (
1072
+ MachineIRBuilder &B, MachineInstr &MI,
1073
+ const OperandsMapper &OpdMapper) const {
1074
+ const RegisterBank *DstBank =
1075
+ OpdMapper.getInstrMapping ().getOperandMapping (0 ).BreakDown [0 ].RegBank ;
1076
+ if (DstBank != &AMDGPU::VGPRRegBank)
1077
+ buildReadFirstLaneDst (B, MI);
1078
+ }
1079
+
1039
1080
// / Split \p Ty into 2 pieces. The first will have \p FirstSize bits, and the
1040
1081
// / rest will be in the remainder.
1041
1082
static std::pair<LLT, LLT> splitUnequalType (LLT Ty, unsigned FirstSize) {
@@ -2117,6 +2158,21 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
2117
2158
B.setInstrAndDebugLoc (MI);
2118
2159
unsigned Opc = MI.getOpcode ();
2119
2160
MachineRegisterInfo &MRI = OpdMapper.getMRI ();
2161
+
2162
+ // Switch for uniformity info based regbank selection.
2163
+ // Keep in sinc with switches in AMDGPURegBankSelect and getInstrMapping.
2164
+ switch (Opc) {
2165
+ case AMDGPU::G_FADD: {
2166
+ applyDefaultMapping (OpdMapper);
2167
+ unsigned Size = MRI.getType (MI.getOperand (0 ).getReg ()).getSizeInBits ();
2168
+ if (!Subtarget.hasSALUFloatInsts () || (Size != 32 && Size != 16 ))
2169
+ constrainVgprDstOpWithReadfirstlane (B, MI, OpdMapper);
2170
+ return ;
2171
+ }
2172
+ default :
2173
+ break ;
2174
+ }
2175
+
2120
2176
switch (Opc) {
2121
2177
case AMDGPU::G_CONSTANT:
2122
2178
case AMDGPU::G_IMPLICIT_DEF: {
@@ -3372,6 +3428,28 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
3372
3428
MI.getNumOperands ());
3373
3429
}
3374
3430
3431
+ const RegisterBankInfo::InstructionMapping &
3432
+ AMDGPURegisterBankInfo::getDefaultMappingVOPWithPreassignedDef (
3433
+ const MachineInstr &MI) const {
3434
+ SmallVector<const ValueMapping *, 8 > OpdsMapping (MI.getNumOperands ());
3435
+ const MachineRegisterInfo &MRI = MI.getMF ()->getRegInfo ();
3436
+ // Dst reg bank should have been set already by uniformity info
3437
+ OpdsMapping[0 ] =
3438
+ getPreAssignedOpMapping (MI.getOperand (0 ).getReg (), MRI, *TRI);
3439
+
3440
+ for (unsigned i = 1 , e = MI.getNumOperands (); i != e; ++i) {
3441
+ const MachineOperand &Op = MI.getOperand (i);
3442
+ if (!Op.isReg ())
3443
+ continue ;
3444
+
3445
+ unsigned Size = getSizeInBits (Op.getReg (), MRI, *TRI);
3446
+ unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3447
+ OpdsMapping[i] = AMDGPU::getValueMapping (BankID, Size );
3448
+ }
3449
+ return getInstructionMapping (1 , 1 , getOperandsMapping (OpdsMapping),
3450
+ MI.getNumOperands ());
3451
+ }
3452
+
3375
3453
const RegisterBankInfo::InstructionMapping &
3376
3454
AMDGPURegisterBankInfo::getDefaultMappingAllVGPR (const MachineInstr &MI) const {
3377
3455
const MachineFunction &MF = *MI.getParent ()->getParent ();
@@ -3524,6 +3602,20 @@ AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
3524
3602
return AMDGPU::getValueMapping (AMDGPU::VGPRRegBankID, Size );
3525
3603
}
3526
3604
3605
+ const RegisterBankInfo::ValueMapping *
3606
+ AMDGPURegisterBankInfo::getPreAssignedOpMapping (
3607
+ Register Reg, const MachineRegisterInfo &MRI,
3608
+ const TargetRegisterInfo &TRI) const {
3609
+ const RegisterBank *Bank = getRegBank (Reg, MRI, TRI);
3610
+ assert (Bank);
3611
+ unsigned BankId = Bank->getID ();
3612
+ unsigned Size = getSizeInBits (Reg, MRI, TRI);
3613
+ assert (BankId == AMDGPU::SGPRRegBankID ||
3614
+ BankId == (Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID));
3615
+
3616
+ return AMDGPU::getValueMapping (BankId, Size );
3617
+ }
3618
+
3527
3619
const RegisterBankInfo::ValueMapping *
3528
3620
AMDGPURegisterBankInfo::getAGPROpMapping (Register Reg,
3529
3621
const MachineRegisterInfo &MRI,
@@ -3640,6 +3732,24 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
3640
3732
3641
3733
SmallVector<const ValueMapping*, 8 > OpdsMapping (MI.getNumOperands ());
3642
3734
3735
+ // Switch for uniformity info based regbank selection.
3736
+ // Requires pre-selected, by AMDGPURegBankSelect, reg-banks on dst registers.
3737
+ // Keep in sinc with switches in AMDGPURegBankSelect and applyMappingImpl.
3738
+ switch (MI.getOpcode ()) {
3739
+ case AMDGPU::G_FADD: {
3740
+ Register Dst = MI.getOperand (0 ).getReg ();
3741
+ unsigned Size = MRI.getType (Dst).getSizeInBits ();
3742
+ const RegisterBank *DstBank = getRegBank (Dst, MRI, *TRI);
3743
+ assert (DstBank);
3744
+ if (Subtarget.hasSALUFloatInsts () && (Size == 32 || Size == 16 ) &&
3745
+ DstBank == &AMDGPU::SGPRRegBank)
3746
+ return getDefaultMappingSOP (MI);
3747
+ return getDefaultMappingVOPWithPreassignedDef (MI);
3748
+ }
3749
+ default :
3750
+ break ;
3751
+ }
3752
+
3643
3753
switch (MI.getOpcode ()) {
3644
3754
default :
3645
3755
return getInvalidInstructionMapping ();
@@ -3735,7 +3845,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
3735
3845
if (isSALUMapping (MI))
3736
3846
return getDefaultMappingSOP (MI);
3737
3847
return getDefaultMappingVOP (MI);
3738
- case AMDGPU::G_FADD:
3739
3848
case AMDGPU::G_FSUB:
3740
3849
case AMDGPU::G_FMUL:
3741
3850
case AMDGPU::G_FMA:
0 commit comments