@@ -700,58 +700,75 @@ static LLT getHalfSizedType(LLT Ty) {
700
700
701
701
// Build one or more V_READFIRSTLANE_B32 instructions to move the given vector
702
702
// source value into a scalar register.
703
- Register AMDGPURegisterBankInfo::buildReadFirstLane (MachineIRBuilder &B,
704
- MachineRegisterInfo &MRI,
705
- Register Src) const {
703
+ Register AMDGPURegisterBankInfo::buildReadFirstLaneSrc (MachineIRBuilder &B,
704
+ Register Src) const {
705
+ MachineRegisterInfo &MRI = *B. getMRI ();
706
706
LLT Ty = MRI.getType (Src);
707
707
const RegisterBank *Bank = getRegBank (Src, MRI, *TRI);
708
708
709
- if (Bank == &AMDGPU::SGPRRegBank)
710
- return Src;
711
-
712
- unsigned Bits = Ty.getSizeInBits ();
713
- assert (Bits % 32 == 0 );
714
-
715
709
if (Bank != &AMDGPU::VGPRRegBank) {
716
710
// We need to copy from AGPR to VGPR
717
711
Src = B.buildCopy (Ty, Src).getReg (0 );
718
712
MRI.setRegBank (Src, AMDGPU::VGPRRegBank);
719
713
}
720
714
715
+ Register Dst = MRI.createGenericVirtualRegister (Ty);
716
+ MRI.setRegBank (Dst, AMDGPU::SGPRRegBank);
717
+ buildReadFirstLaneForType (B, Dst, Src);
718
+ return Dst;
719
+ }
720
+
721
+ void AMDGPURegisterBankInfo::buildReadFirstLaneB32 (MachineIRBuilder &B,
722
+ Register SgprDst,
723
+ Register VgprSrc) const {
724
+ MachineRegisterInfo &MRI = *B.getMRI ();
725
+ B.buildInstr (AMDGPU::V_READFIRSTLANE_B32, {SgprDst}, {VgprSrc});
726
+ MRI.setRegClass (VgprSrc, &AMDGPU::VGPR_32RegClass);
727
+ MRI.setRegClass (SgprDst, &AMDGPU::SReg_32RegClass);
728
+ }
729
+
730
+ void AMDGPURegisterBankInfo::buildReadFirstLaneSequenceOfB32 (
731
+ MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
732
+ unsigned NumElts) const {
733
+ MachineRegisterInfo &MRI = *B.getMRI ();
721
734
LLT S32 = LLT::scalar (32 );
722
- unsigned NumParts = Bits / 32 ;
723
- SmallVector<Register, 8 > SrcParts;
724
- SmallVector<Register, 8 > DstParts;
735
+ SmallVector<Register, 8 > VgprSrcParts;
736
+ SmallVector<Register, 8 > SgprDstParts;
725
737
726
- if (Bits == 32 ) {
727
- SrcParts.push_back (Src);
728
- } else {
729
- auto Unmerge = B.buildUnmerge (S32, Src);
730
- for (unsigned i = 0 ; i < NumParts; ++i)
731
- SrcParts.push_back (Unmerge.getReg (i));
738
+ for (unsigned i = 0 ; i < NumElts; ++i) {
739
+ VgprSrcParts.push_back (MRI.createGenericVirtualRegister (S32));
740
+ SgprDstParts.push_back (MRI.createGenericVirtualRegister (S32));
732
741
}
733
742
734
- for (unsigned i = 0 ; i < NumParts; ++i) {
735
- Register SrcPart = SrcParts[i];
736
- Register DstPart = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
737
- MRI.setType (DstPart, NumParts == 1 ? Ty : S32);
743
+ B.buildUnmerge (VgprSrcParts, VgprSrc);
744
+ for (unsigned i = 0 ; i < NumElts; ++i) {
745
+ buildReadFirstLaneB32 (B, SgprDstParts[i], VgprSrcParts[i]);
746
+ }
747
+ B.buildMergeLikeInstr (SgprDst, SgprDstParts);
748
+ }
738
749
739
- const TargetRegisterClass *Constrained =
740
- constrainGenericRegister (SrcPart, AMDGPU::VGPR_32RegClass, MRI);
741
- (void )Constrained;
742
- assert (Constrained && " Failed to constrain readfirstlane src reg" );
750
+ void AMDGPURegisterBankInfo::buildReadFirstLaneForType (MachineIRBuilder &B,
751
+ Register SgprDst,
752
+ Register VgprSrc) const {
753
+ MachineRegisterInfo &MRI = *B.getMRI ();
754
+ LLT S32 = LLT::scalar (32 );
755
+ LLT S64 = LLT::scalar (64 );
756
+ LLT Ty = MRI.getType (SgprDst);
743
757
744
- B.buildInstr (AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
758
+ if (Ty == S32 || Ty == LLT::pointer (3 , 32 )) {
759
+ return buildReadFirstLaneB32 (B, SgprDst, VgprSrc);
760
+ }
745
761
746
- DstParts.push_back (DstPart);
762
+ if (Ty == S64 || Ty == LLT::pointer (0 , 64 ) || Ty == LLT::pointer (1 , 64 )) {
763
+ return buildReadFirstLaneSequenceOfB32 (B, SgprDst, VgprSrc, 2 );
747
764
}
748
765
749
- if (Bits == 32 )
750
- return DstParts[0 ];
766
+ if (Ty.isVector () && Ty.getElementType () == S32) {
767
+ return buildReadFirstLaneSequenceOfB32 (B, SgprDst, VgprSrc,
768
+ Ty.getNumElements ());
769
+ }
751
770
752
- Register Dst = B.buildMergeLikeInstr (Ty, DstParts).getReg (0 );
753
- MRI.setRegBank (Dst, AMDGPU::SGPRRegBank);
754
- return Dst;
771
+ llvm_unreachable (" Type not supported" );
755
772
}
756
773
757
774
// / Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
@@ -888,7 +905,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
888
905
B.setMBB (*LoopBB);
889
906
}
890
907
891
- Register CurrentLaneReg = buildReadFirstLane (B, MRI , OpReg);
908
+ Register CurrentLaneReg = buildReadFirstLaneSrc (B , OpReg);
892
909
893
910
// Build the comparison(s).
894
911
unsigned OpSize = OpTy.getSizeInBits ();
@@ -1020,7 +1037,7 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
1020
1037
if (Bank == &AMDGPU::SGPRRegBank)
1021
1038
return ;
1022
1039
1023
- Reg = buildReadFirstLane (B, MRI , Reg);
1040
+ Reg = buildReadFirstLaneSrc (B , Reg);
1024
1041
MI.getOperand (OpIdx).setReg (Reg);
1025
1042
}
1026
1043
@@ -1603,7 +1620,7 @@ bool AMDGPURegisterBankInfo::applyMappingMAD_64_32(
1603
1620
MRI.setRegBank (DstHi, AMDGPU::VGPRRegBank);
1604
1621
1605
1622
if (!DstOnValu) {
1606
- DstHi = buildReadFirstLane (B, MRI , DstHi);
1623
+ DstHi = buildReadFirstLaneSrc (B , DstHi);
1607
1624
} else {
1608
1625
MulHiInVgpr = true ;
1609
1626
}
0 commit comments