@@ -178,8 +178,8 @@ class AArch64FastISel final : public FastISel {
178178 bool emitICmp (MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
179179 bool emitICmp_ri (MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
180180 bool emitFCmp (MVT RetVT, const Value *LHS, const Value *RHS);
181- bool emitLoad (MVT VT, unsigned &ResultReg, Address Addr, bool WantZExt = true ,
182- MachineMemOperand *MMO = nullptr );
181+ bool emitLoad (MVT VT, MVT ResultVT, unsigned &ResultReg, Address Addr,
182+ bool WantZExt = true , MachineMemOperand *MMO = nullptr );
183183 bool emitStore (MVT VT, unsigned SrcReg, Address Addr,
184184 MachineMemOperand *MMO = nullptr );
185185 unsigned emitIntExt (MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
@@ -260,6 +260,8 @@ class AArch64FastISel final : public FastISel {
260260static bool isIntExtFree (const Instruction *I) {
261261 assert ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
262262 " Unexpected integer extend instruction." );
263+ assert (!I->getType ()->isVectorTy () && I->getType ()->isIntegerTy () &&
264+ " Unexpected value type." );
263265 bool IsZExt = isa<ZExtInst>(I);
264266
265267 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand (0 )))
@@ -1589,8 +1591,9 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
15891591 return emitLogicalOp_ri (ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
15901592}
15911593
1592- bool AArch64FastISel::emitLoad (MVT VT, unsigned &ResultReg, Address Addr,
1593- bool WantZExt, MachineMemOperand *MMO) {
1594+ bool AArch64FastISel::emitLoad (MVT VT, MVT RetVT, unsigned &ResultReg,
1595+ Address Addr, bool WantZExt,
1596+ MachineMemOperand *MMO) {
15941597 // Simplify this down to something we can handle.
15951598 if (!simplifyAddress (Addr, VT))
15961599 return false ;
@@ -1607,24 +1610,40 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
16071610 ScaleFactor = 1 ;
16081611 }
16091612
1610- static const unsigned GPOpcTable[2 ][4 ][4 ] = {
1613+ static const unsigned GPOpcTable[2 ][8 ][4 ] = {
16111614 // Sign-extend.
1612- { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURSWi,
1615+ { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1616+ AArch64::LDURXi },
1617+ { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
16131618 AArch64::LDURXi },
1614- { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRSWui ,
1619+ { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui ,
16151620 AArch64::LDRXui },
1616- { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRSWroX,
1621+ { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1622+ AArch64::LDRXui },
1623+ { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1624+ AArch64::LDRXroX },
1625+ { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
16171626 AArch64::LDRXroX },
1618- { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRSWroW ,
1627+ { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW ,
16191628 AArch64::LDRXroW },
1629+ { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1630+ AArch64::LDRXroW }
16201631 },
16211632 // Zero-extend.
16221633 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
16231634 AArch64::LDURXi },
1635+ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1636+ AArch64::LDURXi },
16241637 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
16251638 AArch64::LDRXui },
1639+ { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1640+ AArch64::LDRXui },
1641+ { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1642+ AArch64::LDRXroX },
16261643 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
16271644 AArch64::LDRXroX },
1645+ { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1646+ AArch64::LDRXroW },
16281647 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
16291648 AArch64::LDRXroW }
16301649 }
@@ -1646,24 +1665,28 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
16461665 Addr.getExtendType () == AArch64_AM::SXTW)
16471666 Idx++;
16481667
1668+ bool IsRet64Bit = RetVT == MVT::i64 ;
16491669 switch (VT.SimpleTy ) {
16501670 default :
16511671 llvm_unreachable (" Unexpected value type." );
16521672 case MVT::i1: // Intentional fall-through.
16531673 case MVT::i8 :
1654- Opc = GPOpcTable[WantZExt][Idx][0 ];
1655- RC = &AArch64::GPR32RegClass;
1674+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0 ];
1675+ RC = (IsRet64Bit && !WantZExt) ?
1676+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
16561677 break ;
16571678 case MVT::i16 :
1658- Opc = GPOpcTable[WantZExt][Idx][1 ];
1659- RC = &AArch64::GPR32RegClass;
1679+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1 ];
1680+ RC = (IsRet64Bit && !WantZExt) ?
1681+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
16601682 break ;
16611683 case MVT::i32 :
1662- Opc = GPOpcTable[WantZExt][Idx][2 ];
1663- RC = WantZExt ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1684+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2 ];
1685+ RC = (IsRet64Bit && !WantZExt) ?
1686+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
16641687 break ;
16651688 case MVT::i64 :
1666- Opc = GPOpcTable[WantZExt][Idx][3 ];
1689+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit ][3 ];
16671690 RC = &AArch64::GPR64RegClass;
16681691 break ;
16691692 case MVT::f32 :
@@ -1682,15 +1705,22 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
16821705 TII.get (Opc), ResultReg);
16831706 addLoadStoreOperands (Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
16841707
1685- // For 32bit loads we do sign-extending loads to 64bit and then extract the
1686- // subreg. In the end this is just a NOOP.
1687- if (VT == MVT::i32 && !WantZExt)
1688- ResultReg = fastEmitInst_extractsubreg (MVT::i32 , ResultReg, /* IsKill=*/ true ,
1689- AArch64::sub_32);
1708+ // For zero-extending loads to 64bit we emit a 32bit load and then convert
1709+ // the w-reg to an x-reg. In the end this is just an noop and will be removed.
1710+ if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32 ) {
1711+ unsigned Reg64 = createResultReg (&AArch64::GPR64RegClass);
1712+ BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
1713+ TII.get (AArch64::SUBREG_TO_REG), Reg64)
1714+ .addImm (0 )
1715+ .addReg (ResultReg, getKillRegState (true ))
1716+ .addImm (AArch64::sub_32);
1717+ ResultReg = Reg64;
1718+ }
16901719
16911720 // Loading an i1 requires special handling.
16921721 if (VT == MVT::i1) {
1693- unsigned ANDReg = emitAnd_ri (MVT::i32 , ResultReg, /* IsKill=*/ true , 1 );
1722+ unsigned ANDReg = emitAnd_ri (IsRet64Bit ? MVT::i64 : MVT::i32 , ResultReg,
1723+ /* IsKill=*/ true , 1 );
16941724 assert (ANDReg && " Unexpected AND instruction emission failure." );
16951725 ResultReg = ANDReg;
16961726 }
@@ -1767,11 +1797,21 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
17671797 return false ;
17681798
17691799 bool WantZExt = true ;
1770- if (I->hasOneUse () && isa<SExtInst>(I->use_begin ()->getUser ()))
1771- WantZExt = false ;
1800+ MVT RetVT = VT;
1801+ if (I->hasOneUse ()) {
1802+ if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin ()->getUser ())) {
1803+ if (!isTypeSupported (ZE->getType (), RetVT, /* IsVectorAllowed=*/ false ))
1804+ RetVT = VT;
1805+ } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin ()->getUser ())) {
1806+ if (!isTypeSupported (SE->getType (), RetVT, /* IsVectorAllowed=*/ false ))
1807+ RetVT = VT;
1808+ WantZExt = false ;
1809+ }
1810+ }
17721811
17731812 unsigned ResultReg;
1774- if (!emitLoad (VT, ResultReg, Addr, WantZExt, createMachineMemOperandFor (I)))
1813+ if (!emitLoad (VT, RetVT, ResultReg, Addr, WantZExt,
1814+ createMachineMemOperandFor (I)))
17751815 return false ;
17761816
17771817 updateValueMap (I, ResultReg);
@@ -2897,7 +2937,7 @@ bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
28972937
28982938 bool RV;
28992939 unsigned ResultReg;
2900- RV = emitLoad (VT, ResultReg, Src);
2940+ RV = emitLoad (VT, VT, ResultReg, Src);
29012941 if (!RV)
29022942 return false ;
29032943
@@ -3917,51 +3957,37 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) {
39173957 if (!isTypeSupported (I->getOperand (0 )->getType (), SrcVT))
39183958 return false ;
39193959
3920- if (isIntExtFree (I)) {
3921- unsigned SrcReg = getRegForValue (I->getOperand (0 ));
3922- if (!SrcReg)
3923- return false ;
3924- bool SrcIsKill = hasTrivialKill (I->getOperand (0 ));
3925-
3926- const TargetRegisterClass *RC = (RetVT == MVT::i64 ) ?
3927- &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3928- unsigned ResultReg = createResultReg (RC);
3929- if (RetVT == MVT::i64 && SrcVT != MVT::i64 ) {
3930- BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
3931- TII.get (AArch64::SUBREG_TO_REG), ResultReg)
3932- .addImm (0 )
3933- .addReg (SrcReg, getKillRegState (SrcIsKill))
3934- .addImm (AArch64::sub_32);
3935- } else {
3936- BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
3937- TII.get (TargetOpcode::COPY), ResultReg)
3938- .addReg (SrcReg, getKillRegState (SrcIsKill));
3939- }
3940- updateValueMap (I, ResultReg);
3941- return true ;
3942- }
3943-
39443960 unsigned SrcReg = getRegForValue (I->getOperand (0 ));
39453961 if (!SrcReg)
39463962 return false ;
3947- bool SrcRegIsKill = hasTrivialKill (I->getOperand (0 ));
3963+ bool SrcIsKill = hasTrivialKill (I->getOperand (0 ));
39483964
3949- unsigned ResultReg = 0 ;
3950- if (isIntExtFree (I)) {
3951- if (RetVT == MVT::i64 ) {
3952- ResultReg = createResultReg (&AArch64::GPR64RegClass);
3953- BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
3954- TII.get (AArch64::SUBREG_TO_REG), ResultReg)
3955- .addImm (0 )
3956- .addReg (SrcReg, getKillRegState (SrcRegIsKill))
3957- .addImm (AArch64::sub_32);
3958- } else
3959- ResultReg = SrcReg;
3965+ // The load instruction selection code handles the sign-/zero-extension.
3966+ if (const auto *LI = dyn_cast<LoadInst>(I->getOperand (0 ))) {
3967+ if (LI->hasOneUse ()) {
3968+ updateValueMap (I, SrcReg);
3969+ return true ;
3970+ }
39603971 }
39613972
3962- if (!ResultReg)
3963- ResultReg = emitIntExt (SrcVT, SrcReg, RetVT, isa<ZExtInst>(I));
3973+ bool IsZExt = isa<ZExtInst>(I);
3974+ if (const auto *Arg = dyn_cast<Argument>(I->getOperand (0 ))) {
3975+ if ((IsZExt && Arg->hasZExtAttr ()) || (!IsZExt && Arg->hasSExtAttr ())) {
3976+ if (RetVT == MVT::i64 && SrcVT != MVT::i64 ) {
3977+ unsigned ResultReg = createResultReg (&AArch64::GPR64RegClass);
3978+ BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
3979+ TII.get (AArch64::SUBREG_TO_REG), ResultReg)
3980+ .addImm (0 )
3981+ .addReg (SrcReg, getKillRegState (SrcIsKill))
3982+ .addImm (AArch64::sub_32);
3983+ SrcReg = ResultReg;
3984+ }
3985+ updateValueMap (I, SrcReg);
3986+ return true ;
3987+ }
3988+ }
39643989
3990+ unsigned ResultReg = emitIntExt (SrcVT, SrcReg, RetVT, IsZExt);
39653991 if (!ResultReg)
39663992 return false ;
39673993
0 commit comments