@@ -718,6 +718,26 @@ Register AMDGPURegisterBankInfo::buildReadFirstLaneSrc(MachineIRBuilder &B,
718
718
return Dst;
719
719
}
720
720
721
+ // Create new vgpr destination register for MI then move it to current
722
+ // MI's sgpr destination using one or more V_READFIRSTLANE_B32 instructions.
723
+ void AMDGPURegisterBankInfo::buildReadFirstLaneDst (MachineIRBuilder &B,
724
+ MachineInstr &MI) const {
725
+ MachineRegisterInfo &MRI = *B.getMRI ();
726
+ Register Dst = MI.getOperand (0 ).getReg ();
727
+ const RegisterBank *DstBank = getRegBank (Dst, MRI, *TRI);
728
+ if (DstBank != &AMDGPU::SGPRRegBank)
729
+ return ;
730
+
731
+ Register VgprDst = MRI.createGenericVirtualRegister (MRI.getType (Dst));
732
+ MRI.setRegBank (VgprDst, AMDGPU::VGPRRegBank);
733
+
734
+ MI.getOperand (0 ).setReg (VgprDst);
735
+ MachineBasicBlock *MBB = MI.getParent ();
736
+ B.setInsertPt (*MBB, std::next (MI.getIterator ()));
737
+ // readFirstLane VgprDst into Dst after MI.
738
+ return buildReadFirstLaneForType (B, Dst, VgprDst);
739
+ }
740
+
721
741
void AMDGPURegisterBankInfo::buildReadFirstLaneB32 (MachineIRBuilder &B,
722
742
Register SgprDst,
723
743
Register VgprSrc) const {
@@ -741,32 +761,42 @@ void AMDGPURegisterBankInfo::buildReadFirstLaneSequenceOfB32(
741
761
}
742
762
743
763
B.buildUnmerge (VgprSrcParts, VgprSrc);
744
- for (unsigned i = 0 ; i < NumElts; ++i) {
764
+ for (unsigned i = 0 ; i < NumElts; ++i)
745
765
buildReadFirstLaneB32 (B, SgprDstParts[i], VgprSrcParts[i]);
746
- }
766
+
747
767
B.buildMergeLikeInstr (SgprDst, SgprDstParts);
748
768
}
749
769
750
770
void AMDGPURegisterBankInfo::buildReadFirstLaneForType (MachineIRBuilder &B,
751
771
Register SgprDst,
752
772
Register VgprSrc) const {
753
773
MachineRegisterInfo &MRI = *B.getMRI ();
774
+ LLT S16 = LLT::scalar (16 );
754
775
LLT S32 = LLT::scalar (32 );
755
776
LLT S64 = LLT::scalar (64 );
756
777
LLT Ty = MRI.getType (SgprDst);
757
778
758
- if (Ty == S32 || Ty == LLT::pointer (3 , 32 )) {
759
- return buildReadFirstLaneB32 (B, SgprDst, VgprSrc);
779
+ if (Ty == S16) {
780
+ Register VgprSrc32 = MRI.createGenericVirtualRegister (S32);
781
+ MRI.setRegBank (VgprSrc32, AMDGPU::VGPRRegBank);
782
+ Register SgprDst32 = MRI.createGenericVirtualRegister (S32);
783
+ MRI.setRegBank (SgprDst32, AMDGPU::SGPRRegBank);
784
+
785
+ B.buildAnyExt (VgprSrc32, VgprSrc);
786
+ buildReadFirstLaneB32 (B, SgprDst32, VgprSrc32);
787
+ B.buildTrunc (SgprDst, SgprDst32);
788
+ return ;
760
789
}
761
790
762
- if (Ty == S64 || Ty == LLT::pointer (0 , 64 ) || Ty == LLT::pointer (1 , 64 )) {
791
+ if (Ty == S32 || Ty == LLT::pointer (3 , 32 ))
792
+ return buildReadFirstLaneB32 (B, SgprDst, VgprSrc);
793
+
794
+ if (Ty == S64 || Ty == LLT::pointer (0 , 64 ) || Ty == LLT::pointer (1 , 64 ))
763
795
return buildReadFirstLaneSequenceOfB32 (B, SgprDst, VgprSrc, 2 );
764
- }
765
796
766
- if (Ty.isVector () && Ty.getElementType () == S32) {
797
+ if (Ty.isVector () && Ty.getElementType () == S32)
767
798
return buildReadFirstLaneSequenceOfB32 (B, SgprDst, VgprSrc,
768
799
Ty.getNumElements ());
769
- }
770
800
771
801
llvm_unreachable (" Type not supported" );
772
802
}
@@ -1041,6 +1071,17 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
1041
1071
MI.getOperand (OpIdx).setReg (Reg);
1042
1072
}
1043
1073
1074
+ // MI has uniform inputs and output but only available machine instruction has
1075
+ // vgpr dest. Make it uniform by moving dst to sgpr using readfirstlane.
1076
+ void AMDGPURegisterBankInfo::constrainVgprDstOpWithReadfirstlane (
1077
+ MachineIRBuilder &B, MachineInstr &MI,
1078
+ const OperandsMapper &OpdMapper) const {
1079
+ const RegisterBank *DstBank =
1080
+ OpdMapper.getInstrMapping ().getOperandMapping (0 ).BreakDown [0 ].RegBank ;
1081
+ if (DstBank != &AMDGPU::VGPRRegBank)
1082
+ buildReadFirstLaneDst (B, MI);
1083
+ }
1084
+
1044
1085
// / Split \p Ty into 2 pieces. The first will have \p FirstSize bits, and the
1045
1086
// / rest will be in the remainder.
1046
1087
static std::pair<LLT, LLT> splitUnequalType (LLT Ty, unsigned FirstSize) {
@@ -2197,6 +2238,21 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
2197
2238
B.setInstrAndDebugLoc (MI);
2198
2239
unsigned Opc = MI.getOpcode ();
2199
2240
MachineRegisterInfo &MRI = OpdMapper.getMRI ();
2241
+
2242
+ // Switch for uniformity info based regbank selection.
2243
+ // Keep in sinc with switches in AMDGPURegBankSelect and getInstrMapping.
2244
+ switch (Opc) {
2245
+ case AMDGPU::G_FADD: {
2246
+ applyDefaultMapping (OpdMapper);
2247
+ unsigned Size = MRI.getType (MI.getOperand (0 ).getReg ()).getSizeInBits ();
2248
+ if (!Subtarget.hasSALUFloatInsts () || (Size != 32 && Size != 16 ))
2249
+ constrainVgprDstOpWithReadfirstlane (B, MI, OpdMapper);
2250
+ return ;
2251
+ }
2252
+ default :
2253
+ break ;
2254
+ }
2255
+
2200
2256
switch (Opc) {
2201
2257
case AMDGPU::G_CONSTANT:
2202
2258
case AMDGPU::G_IMPLICIT_DEF: {
@@ -3571,6 +3627,28 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
3571
3627
MI.getNumOperands ());
3572
3628
}
3573
3629
3630
+ const RegisterBankInfo::InstructionMapping &
3631
+ AMDGPURegisterBankInfo::getDefaultMappingVOPWithPreassignedDef (
3632
+ const MachineInstr &MI) const {
3633
+ SmallVector<const ValueMapping *, 8 > OpdsMapping (MI.getNumOperands ());
3634
+ const MachineRegisterInfo &MRI = MI.getMF ()->getRegInfo ();
3635
+ // Dst reg bank should have been set already by uniformity info
3636
+ OpdsMapping[0 ] =
3637
+ getPreAssignedOpMapping (MI.getOperand (0 ).getReg (), MRI, *TRI);
3638
+
3639
+ for (unsigned i = 1 , e = MI.getNumOperands (); i != e; ++i) {
3640
+ const MachineOperand &Op = MI.getOperand (i);
3641
+ if (!Op.isReg ())
3642
+ continue ;
3643
+
3644
+ unsigned Size = getSizeInBits (Op.getReg (), MRI, *TRI);
3645
+ unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3646
+ OpdsMapping[i] = AMDGPU::getValueMapping (BankID, Size );
3647
+ }
3648
+ return getInstructionMapping (1 , 1 , getOperandsMapping (OpdsMapping),
3649
+ MI.getNumOperands ());
3650
+ }
3651
+
3574
3652
const RegisterBankInfo::InstructionMapping &
3575
3653
AMDGPURegisterBankInfo::getDefaultMappingAllVGPR (const MachineInstr &MI) const {
3576
3654
const MachineFunction &MF = *MI.getParent ()->getParent ();
@@ -3723,6 +3801,20 @@ AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
3723
3801
return AMDGPU::getValueMapping (AMDGPU::VGPRRegBankID, Size );
3724
3802
}
3725
3803
3804
+ const RegisterBankInfo::ValueMapping *
3805
+ AMDGPURegisterBankInfo::getPreAssignedOpMapping (
3806
+ Register Reg, const MachineRegisterInfo &MRI,
3807
+ const TargetRegisterInfo &TRI) const {
3808
+ const RegisterBank *Bank = getRegBank (Reg, MRI, TRI);
3809
+ assert (Bank);
3810
+ unsigned BankId = Bank->getID ();
3811
+ unsigned Size = getSizeInBits (Reg, MRI, TRI);
3812
+ assert (BankId == AMDGPU::SGPRRegBankID ||
3813
+ BankId == (Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID));
3814
+
3815
+ return AMDGPU::getValueMapping (BankId, Size );
3816
+ }
3817
+
3726
3818
const RegisterBankInfo::ValueMapping *
3727
3819
AMDGPURegisterBankInfo::getAGPROpMapping (Register Reg,
3728
3820
const MachineRegisterInfo &MRI,
@@ -3839,6 +3931,24 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
3839
3931
3840
3932
SmallVector<const ValueMapping*, 8 > OpdsMapping (MI.getNumOperands ());
3841
3933
3934
+ // Switch for uniformity info based regbank selection.
3935
+ // Requires pre-selected, by AMDGPURegBankSelect, reg-banks on dst registers.
3936
+ // Keep in sinc with switches in AMDGPURegBankSelect and applyMappingImpl.
3937
+ switch (MI.getOpcode ()) {
3938
+ case AMDGPU::G_FADD: {
3939
+ Register Dst = MI.getOperand (0 ).getReg ();
3940
+ unsigned Size = MRI.getType (Dst).getSizeInBits ();
3941
+ const RegisterBank *DstBank = getRegBank (Dst, MRI, *TRI);
3942
+ assert (DstBank);
3943
+ if (Subtarget.hasSALUFloatInsts () && (Size == 32 || Size == 16 ) &&
3944
+ DstBank == &AMDGPU::SGPRRegBank)
3945
+ return getDefaultMappingSOP (MI);
3946
+ return getDefaultMappingVOPWithPreassignedDef (MI);
3947
+ }
3948
+ default :
3949
+ break ;
3950
+ }
3951
+
3842
3952
switch (MI.getOpcode ()) {
3843
3953
default :
3844
3954
return getInvalidInstructionMapping ();
@@ -3936,7 +4046,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
3936
4046
if (isSALUMapping (MI))
3937
4047
return getDefaultMappingSOP (MI);
3938
4048
return getDefaultMappingVOP (MI);
3939
- case AMDGPU::G_FADD:
3940
4049
case AMDGPU::G_FSUB:
3941
4050
case AMDGPU::G_FMUL:
3942
4051
case AMDGPU::G_FMA:
0 commit comments