@@ -700,58 +700,69 @@ static LLT getHalfSizedType(LLT Ty) {
700
700
701
701
// Build one or more V_READFIRSTLANE_B32 instructions to move the given vector
702
702
// source value into a scalar register.
703
- Register AMDGPURegisterBankInfo::buildReadFirstLane (MachineIRBuilder &B,
704
- MachineRegisterInfo &MRI,
705
- Register Src) const {
703
+ Register AMDGPURegisterBankInfo::buildReadFirstLaneSrc (MachineIRBuilder &B,
704
+ Register Src) const {
705
+ MachineRegisterInfo &MRI = *B. getMRI ();
706
706
LLT Ty = MRI.getType (Src);
707
707
const RegisterBank *Bank = getRegBank (Src, MRI, *TRI);
708
708
709
- if (Bank == &AMDGPU::SGPRRegBank)
710
- return Src;
711
-
712
- unsigned Bits = Ty.getSizeInBits ();
713
- assert (Bits % 32 == 0 );
714
-
715
709
if (Bank != &AMDGPU::VGPRRegBank) {
716
710
// We need to copy from AGPR to VGPR
717
711
Src = B.buildCopy (Ty, Src).getReg (0 );
718
712
MRI.setRegBank (Src, AMDGPU::VGPRRegBank);
719
713
}
720
714
715
+ return buildReadFirstLaneForType (B, Ty, Src).getReg (0 );
716
+ }
717
+
718
+ MachineInstrBuilder AMDGPURegisterBankInfo::buildReadFirstLaneB32 (
719
+ MachineIRBuilder &B, const DstOp &SgprDst, const SrcOp &VgprSrc) const {
720
+ MachineRegisterInfo &MRI = *B.getMRI ();
721
+ auto RFL = B.buildInstr (AMDGPU::V_READFIRSTLANE_B32, {SgprDst}, {VgprSrc});
722
+ MRI.setRegClass (RFL.getReg (0 ), &AMDGPU::SReg_32RegClass);
723
+ MRI.setRegClass (RFL.getReg (1 ), &AMDGPU::VGPR_32RegClass);
724
+ return RFL;
725
+ }
726
+
727
+ MachineInstrBuilder AMDGPURegisterBankInfo::buildReadFirstLaneSequenceOfB32 (
728
+ MachineIRBuilder &B, const DstOp &SgprDst, const SrcOp &VgprSrc,
729
+ unsigned NumElts) const {
730
+ MachineRegisterInfo &MRI = *B.getMRI ();
721
731
LLT S32 = LLT::scalar (32 );
722
- unsigned NumParts = Bits / 32 ;
723
- SmallVector<Register, 8 > SrcParts;
724
- SmallVector<Register, 8 > DstParts;
732
+ SmallVector<Register, 8 > SgprDstParts;
725
733
726
- if (Bits == 32 ) {
727
- SrcParts.push_back (Src);
728
- } else {
729
- auto Unmerge = B.buildUnmerge (S32, Src);
730
- for (unsigned i = 0 ; i < NumParts; ++i)
731
- SrcParts.push_back (Unmerge.getReg (i));
734
+ auto Unmerge = B.buildUnmerge (S32, VgprSrc);
735
+ for (unsigned i = 0 ; i < NumElts; ++i) {
736
+ SgprDstParts.push_back (
737
+ buildReadFirstLaneB32 (B, S32, Unmerge.getReg (i)).getReg (0 ));
732
738
}
733
739
734
- for ( unsigned i = 0 ; i < NumParts; ++i) {
735
- Register SrcPart = SrcParts[i] ;
736
- Register DstPart = MRI. createVirtualRegister (&AMDGPU::SReg_32RegClass) ;
737
- MRI. setType (DstPart, NumParts == 1 ? Ty : S32);
740
+ auto Merge = B. buildMergeLikeInstr (SgprDst, SgprDstParts);
741
+ MRI. setRegBank (Merge. getReg ( 0 ), AMDGPU::SGPRRegBank) ;
742
+ return Merge ;
743
+ }
738
744
739
- const TargetRegisterClass *Constrained =
740
- constrainGenericRegister (SrcPart, AMDGPU::VGPR_32RegClass, MRI);
741
- (void )Constrained;
742
- assert (Constrained && " Failed to constrain readfirstlane src reg" );
745
+ MachineInstrBuilder AMDGPURegisterBankInfo::buildReadFirstLaneForType (
746
+ MachineIRBuilder &B, const DstOp &SgprDst, const SrcOp &VgprSrc) const {
747
+ MachineRegisterInfo &MRI = *B.getMRI ();
748
+ LLT S32 = LLT::scalar (32 );
749
+ LLT S64 = LLT::scalar (64 );
750
+ LLT Ty = SgprDst.getLLTTy (MRI);
743
751
744
- B.buildInstr (AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
752
+ if (Ty == S32 || (Ty.isPointer () && Ty.getSizeInBits () == 32 )) {
753
+ return buildReadFirstLaneB32 (B, SgprDst, VgprSrc);
754
+ }
745
755
746
- DstParts.push_back (DstPart);
756
+ if (Ty == S64 || (Ty.isPointer () && Ty.getSizeInBits () == 64 )) {
757
+ return buildReadFirstLaneSequenceOfB32 (B, SgprDst, VgprSrc, 2 );
747
758
}
748
759
749
- if (Bits == 32 )
750
- return DstParts[0 ];
760
+ if (Ty.isVector () && Ty.getElementType () == S32) {
761
+ return buildReadFirstLaneSequenceOfB32 (B, SgprDst, VgprSrc,
762
+ Ty.getNumElements ());
763
+ }
751
764
752
- Register Dst = B.buildMergeLikeInstr (Ty, DstParts).getReg (0 );
753
- MRI.setRegBank (Dst, AMDGPU::SGPRRegBank);
754
- return Dst;
765
+ llvm_unreachable (" Type not supported" );
755
766
}
756
767
757
768
// / Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
@@ -888,7 +899,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
888
899
B.setMBB (*LoopBB);
889
900
}
890
901
891
- Register CurrentLaneReg = buildReadFirstLane (B, MRI , OpReg);
902
+ Register CurrentLaneReg = buildReadFirstLaneSrc (B , OpReg);
892
903
893
904
// Build the comparison(s).
894
905
unsigned OpSize = OpTy.getSizeInBits ();
@@ -1020,7 +1031,7 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
1020
1031
if (Bank == &AMDGPU::SGPRRegBank)
1021
1032
return ;
1022
1033
1023
- Reg = buildReadFirstLane (B, MRI , Reg);
1034
+ Reg = buildReadFirstLaneSrc (B , Reg);
1024
1035
MI.getOperand (OpIdx).setReg (Reg);
1025
1036
}
1026
1037
@@ -1603,7 +1614,7 @@ bool AMDGPURegisterBankInfo::applyMappingMAD_64_32(
1603
1614
MRI.setRegBank (DstHi, AMDGPU::VGPRRegBank);
1604
1615
1605
1616
if (!DstOnValu) {
1606
- DstHi = buildReadFirstLane (B, MRI , DstHi);
1617
+ DstHi = buildReadFirstLaneSrc (B , DstHi);
1607
1618
} else {
1608
1619
MulHiInVgpr = true ;
1609
1620
}
0 commit comments