@@ -3602,6 +3602,26 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
3602
3602
}
3603
3603
3604
3604
switch (MI.getOpcode ()) {
3605
+ case AMDGPU::S_UADDO_PSEUDO:
3606
+ case AMDGPU::S_USUBO_PSEUDO: {
3607
+ const DebugLoc &DL = MI.getDebugLoc ();
3608
+ MachineOperand &Dest0 = MI.getOperand (0 );
3609
+ MachineOperand &Dest1 = MI.getOperand (1 );
3610
+ MachineOperand &Src0 = MI.getOperand (2 );
3611
+ MachineOperand &Src1 = MI.getOperand (3 );
3612
+
3613
+ unsigned Opc = (MI.getOpcode () == AMDGPU::S_UADDO_PSEUDO)
3614
+ ? AMDGPU::S_ADD_I32
3615
+ : AMDGPU::S_SUB_I32;
3616
+ BuildMI (*BB, MI, DL, TII->get (Opc), Dest0.getReg ()).add (Src0).add (Src1);
3617
+
3618
+ BuildMI (*BB, MI, DL, TII->get (AMDGPU::S_CSELECT_B64), Dest1.getReg ())
3619
+ .addImm (1 )
3620
+ .addImm (0 );
3621
+
3622
+ MI.eraseFromParent ();
3623
+ return BB;
3624
+ }
3605
3625
case AMDGPU::S_ADD_U64_PSEUDO:
3606
3626
case AMDGPU::S_SUB_U64_PSEUDO: {
3607
3627
MachineRegisterInfo &MRI = BB->getParent ()->getRegInfo ();
@@ -3617,35 +3637,146 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
3617
3637
Register DestSub0 = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
3618
3638
Register DestSub1 = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
3619
3639
3620
- MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm (MI, MRI,
3621
- Src0, BoolRC, AMDGPU::sub0,
3622
- &AMDGPU::SReg_32RegClass);
3623
- MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm (MI, MRI,
3624
- Src0, BoolRC, AMDGPU::sub1,
3625
- &AMDGPU::SReg_32RegClass);
3640
+ MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm (
3641
+ MI, MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
3642
+ MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm (
3643
+ MI, MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
3626
3644
3627
- MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm (MI, MRI,
3628
- Src1, BoolRC, AMDGPU::sub0,
3629
- &AMDGPU::SReg_32RegClass);
3630
- MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm (MI, MRI,
3631
- Src1, BoolRC, AMDGPU::sub1,
3632
- &AMDGPU::SReg_32RegClass);
3645
+ MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm (
3646
+ MI, MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
3647
+ MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm (
3648
+ MI, MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
3633
3649
3634
3650
bool IsAdd = (MI.getOpcode () == AMDGPU::S_ADD_U64_PSEUDO);
3635
3651
3636
3652
unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
3637
3653
unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
3638
- BuildMI (*BB, MI, DL, TII->get (LoOpc), DestSub0)
3639
- .add (Src0Sub0)
3640
- .add (Src1Sub0);
3641
- BuildMI (*BB, MI, DL, TII->get (HiOpc), DestSub1)
3642
- .add (Src0Sub1)
3643
- .add (Src1Sub1);
3654
+ BuildMI (*BB, MI, DL, TII->get (LoOpc), DestSub0).add (Src0Sub0).add (Src1Sub0);
3655
+ BuildMI (*BB, MI, DL, TII->get (HiOpc), DestSub1).add (Src0Sub1).add (Src1Sub1);
3644
3656
BuildMI (*BB, MI, DL, TII->get (TargetOpcode::REG_SEQUENCE), Dest.getReg ())
3645
- .addReg (DestSub0)
3646
- .addImm (AMDGPU::sub0)
3647
- .addReg (DestSub1)
3648
- .addImm (AMDGPU::sub1);
3657
+ .addReg (DestSub0)
3658
+ .addImm (AMDGPU::sub0)
3659
+ .addReg (DestSub1)
3660
+ .addImm (AMDGPU::sub1);
3661
+ MI.eraseFromParent ();
3662
+ return BB;
3663
+ }
3664
+ case AMDGPU::V_ADD_U64_PSEUDO:
3665
+ case AMDGPU::V_SUB_U64_PSEUDO: {
3666
+ MachineRegisterInfo &MRI = BB->getParent ()->getRegInfo ();
3667
+ const GCNSubtarget &ST = MF->getSubtarget <GCNSubtarget>();
3668
+ const SIRegisterInfo *TRI = ST.getRegisterInfo ();
3669
+ const DebugLoc &DL = MI.getDebugLoc ();
3670
+
3671
+ bool IsAdd = (MI.getOpcode () == AMDGPU::V_ADD_U64_PSEUDO);
3672
+
3673
+ const auto *CarryRC = TRI->getRegClass (AMDGPU::SReg_1_XEXECRegClassID);
3674
+
3675
+ Register DestSub0 = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
3676
+ Register DestSub1 = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
3677
+
3678
+ Register CarryReg = MRI.createVirtualRegister (CarryRC);
3679
+ Register DeadCarryReg = MRI.createVirtualRegister (CarryRC);
3680
+
3681
+ MachineOperand &Dest = MI.getOperand (0 );
3682
+ MachineOperand &Src0 = MI.getOperand (1 );
3683
+ MachineOperand &Src1 = MI.getOperand (2 );
3684
+
3685
+ const TargetRegisterClass *Src0RC = Src0.isReg ()
3686
+ ? MRI.getRegClass (Src0.getReg ())
3687
+ : &AMDGPU::VReg_64RegClass;
3688
+ const TargetRegisterClass *Src1RC = Src1.isReg ()
3689
+ ? MRI.getRegClass (Src1.getReg ())
3690
+ : &AMDGPU::VReg_64RegClass;
3691
+
3692
+ const TargetRegisterClass *Src0SubRC =
3693
+ TRI->getSubRegClass (Src0RC, AMDGPU::sub0);
3694
+ const TargetRegisterClass *Src1SubRC =
3695
+ TRI->getSubRegClass (Src1RC, AMDGPU::sub1);
3696
+
3697
+ MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm (
3698
+ MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
3699
+ MachineOperand SrcReg1Sub0 = TII->buildExtractSubRegOrImm (
3700
+ MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
3701
+
3702
+ MachineOperand SrcReg0Sub1 = TII->buildExtractSubRegOrImm (
3703
+ MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
3704
+ MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm (
3705
+ MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
3706
+
3707
+ unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
3708
+ MachineInstr *LoHalf = BuildMI (*BB, MI, DL, TII->get (LoOpc), DestSub0)
3709
+ .addReg (CarryReg, RegState::Define)
3710
+ .add (SrcReg0Sub0)
3711
+ .add (SrcReg1Sub0)
3712
+ .addImm (0 ); // clamp bit
3713
+
3714
+ unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
3715
+ MachineInstr *HiHalf =
3716
+ BuildMI (*BB, MI, DL, TII->get (HiOpc), DestSub1)
3717
+ .addReg (DeadCarryReg, RegState::Define | RegState::Dead)
3718
+ .add (SrcReg0Sub1)
3719
+ .add (SrcReg1Sub1)
3720
+ .addReg (CarryReg, RegState::Kill)
3721
+ .addImm (0 ); // clamp bit
3722
+
3723
+ BuildMI (*BB, MI, DL, TII->get (TargetOpcode::REG_SEQUENCE), Dest.getReg ())
3724
+ .addReg (DestSub0)
3725
+ .addImm (AMDGPU::sub0)
3726
+ .addReg (DestSub1)
3727
+ .addImm (AMDGPU::sub1);
3728
+ TII->legalizeOperands (*LoHalf);
3729
+ TII->legalizeOperands (*HiHalf);
3730
+ MI.eraseFromParent ();
3731
+ return BB;
3732
+ }
3733
+ case AMDGPU::S_ADD_CO_PSEUDO:
3734
+ case AMDGPU::S_SUB_CO_PSEUDO: {
3735
+ // This pseudo has a chance to be selected
3736
+ // only from uniform add/subcarry node. All the VGPR operands
3737
+ // therefore assumed to be splat vectors.
3738
+ MachineRegisterInfo &MRI = BB->getParent ()->getRegInfo ();
3739
+ const GCNSubtarget &ST = MF->getSubtarget <GCNSubtarget>();
3740
+ const SIRegisterInfo *TRI = ST.getRegisterInfo ();
3741
+ MachineBasicBlock::iterator MII = MI;
3742
+ const DebugLoc &DL = MI.getDebugLoc ();
3743
+ MachineOperand &Dest = MI.getOperand (0 );
3744
+ MachineOperand &Src0 = MI.getOperand (2 );
3745
+ MachineOperand &Src1 = MI.getOperand (3 );
3746
+ MachineOperand &Src2 = MI.getOperand (4 );
3747
+ unsigned Opc = (MI.getOpcode () == AMDGPU::S_ADD_CO_PSEUDO)
3748
+ ? AMDGPU::S_ADDC_U32
3749
+ : AMDGPU::S_SUBB_U32;
3750
+ if (Src0.isReg () && TRI->isVectorRegister (MRI, Src0.getReg ())) {
3751
+ Register RegOp0 = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
3752
+ BuildMI (*BB, MII, DL, TII->get (AMDGPU::V_READFIRSTLANE_B32), RegOp0)
3753
+ .addReg (Src0.getReg ());
3754
+ Src0.setReg (RegOp0);
3755
+ }
3756
+ if (Src1.isReg () && TRI->isVectorRegister (MRI, Src1.getReg ())) {
3757
+ Register RegOp1 = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
3758
+ BuildMI (*BB, MII, DL, TII->get (AMDGPU::V_READFIRSTLANE_B32), RegOp1)
3759
+ .addReg (Src1.getReg ());
3760
+ Src1.setReg (RegOp1);
3761
+ }
3762
+ Register RegOp2 = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
3763
+ if (TRI->isVectorRegister (MRI, Src2.getReg ())) {
3764
+ BuildMI (*BB, MII, DL, TII->get (AMDGPU::V_READFIRSTLANE_B32), RegOp2)
3765
+ .addReg (Src2.getReg ());
3766
+ Src2.setReg (RegOp2);
3767
+ }
3768
+
3769
+ if (TRI->getRegSizeInBits (*MRI.getRegClass (Src2.getReg ())) == 64 ) {
3770
+ BuildMI (*BB, MII, DL, TII->get (AMDGPU::S_CMP_LG_U64))
3771
+ .addReg (Src2.getReg ())
3772
+ .addImm (0 );
3773
+ } else {
3774
+ BuildMI (*BB, MII, DL, TII->get (AMDGPU::S_CMPK_LG_U32))
3775
+ .addReg (Src2.getReg ())
3776
+ .addImm (0 );
3777
+ }
3778
+
3779
+ BuildMI (*BB, MII, DL, TII->get (Opc), Dest.getReg ()).add (Src0).add (Src1);
3649
3780
MI.eraseFromParent ();
3650
3781
return BB;
3651
3782
}
0 commit comments