diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a956074e50d86..7a9be6f2af9b4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -436,7 +436,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); if (Subtarget.is64Bit()) { - setOperationPromotedToType(ISD::CTTZ , MVT::i32, MVT::i64); setOperationAction(ISD::CTTZ , MVT::i64 , Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal); } @@ -3386,15 +3385,19 @@ bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT, } bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const { - // Speculate cttz only if we can directly use TZCNT or can promote to i32/i64. + // Speculate cttz only if we can directly use TZCNT/CMOV, can promote to + // i32/i64 or can rely on BSF passthrough value. return Subtarget.hasBMI() || Subtarget.canUseCMOV() || + Subtarget.hasBitScanPassThrough() || (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < (Subtarget.is64Bit() ? 64u : 32u)); } bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { - // Speculate ctlz only if we can directly use LZCNT. - return Subtarget.hasLZCNT() || Subtarget.canUseCMOV(); + // Speculate ctlz only if we can directly use LZCNT/CMOV, or can rely on BSR + // passthrough value. + return Subtarget.hasLZCNT() || Subtarget.canUseCMOV() || + Subtarget.hasBitScanPassThrough(); } bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const { @@ -28694,11 +28697,18 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget, Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); } + // Check if we can safely pass a result though BSR for zero sources. + SDValue PassThru = DAG.getUNDEF(OpVT); + if (Opc == ISD::CTLZ && Subtarget.hasBitScanPassThrough() && + !DAG.isKnownNeverZero(Op)) + PassThru = DAG.getConstant(NumBits + NumBits - 1, dl, OpVT); + // Issue a bsr (scan bits in reverse) which also sets EFLAGS. SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); - Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); + Op = DAG.getNode(X86ISD::BSR, dl, VTs, PassThru, Op); - if (Opc == ISD::CTLZ) { + // Skip CMOV if we're using a pass through value. + if (Opc == ISD::CTLZ && PassThru.isUndef()) { // If src is zero (i.e. bsr sets ZF), returns NumBits. SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), @@ -28721,16 +28731,22 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget, unsigned NumBits = VT.getScalarSizeInBits(); SDValue N0 = Op.getOperand(0); SDLoc dl(Op); + bool NonZeroSrc = DAG.isKnownNeverZero(N0); assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ && "Only scalar CTTZ requires custom lowering"); + // Check if we can safely pass a result though BSF for zero sources. + SDValue PassThru = DAG.getUNDEF(VT); + if (!NonZeroSrc && Subtarget.hasBitScanPassThrough()) + PassThru = DAG.getConstant(NumBits, dl, VT); + // Issue a bsf (scan bits forward) which also sets EFLAGS. SDVTList VTs = DAG.getVTList(VT, MVT::i32); - Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0); + Op = DAG.getNode(X86ISD::BSF, dl, VTs, PassThru, N0); - // If src is known never zero we can skip the CMOV. - if (DAG.isKnownNeverZero(N0)) + // Skip CMOV if src is never zero or we're using a pass through value. + if (NonZeroSrc || !PassThru.isUndef()) return Op; // If src is zero (i.e. bsf sets ZF), returns NumBits. @@ -38193,12 +38209,34 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known = KnownBits::mul(Known, Known2); break; } - case X86ISD::BSR: - // BSR(0) is undef, but any use of BSR already accounts for non-zero inputs. - // Similar KnownBits behaviour to CTLZ_ZERO_UNDEF. + case X86ISD::BSF: { + Known.Zero.setBitsFrom(Log2_32(BitWidth)); + + KnownBits Known2; + Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (Known2.isNonZero()) { + // If we have a known 1, its position is our upper bound. + unsigned PossibleTZ = Known2.countMaxTrailingZeros(); + unsigned LowBits = llvm::bit_width(PossibleTZ); + Known.Zero.setBitsFrom(LowBits); + } else if (!Op.getOperand(0).isUndef()) { + Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = Known.intersectWith(Known2); + } + break; + } + case X86ISD::BSR: { // TODO: Bound with input known bits? Known.Zero.setBitsFrom(Log2_32(BitWidth)); + + if (!Op.getOperand(0).isUndef() && + !DAG.isKnownNeverZero(Op.getOperand(1), Depth + 1)) { + KnownBits Known2; + Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = Known.intersectWith(Known2); + } break; + } case X86ISD::SETCC: Known.Zero.setBitsFrom(1); break; @@ -54243,7 +54281,7 @@ static SDValue combineXorSubCTLZ(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, } SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); - Op = DAG.getNode(X86ISD::BSR, DL, VTs, Op); + Op = DAG.getNode(X86ISD::BSR, DL, VTs, DAG.getUNDEF(OpVT), Op); if (VT == MVT::i8) Op = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Op); diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 7d4c5c0e10e49..9bda3fd7d951c 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -2213,12 +2213,12 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; // Bit scan instruction patterns to match explicit zero-undef behavior. -def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>; -def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>; -def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>; -def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>; -def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>; -def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>; +def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr (i16 (IMPLICIT_DEF)), GR16:$src)>; +def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr (i32 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr (i64 (IMPLICIT_DEF)), GR64:$src)>; +def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm (i16 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm (i32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm (i64 (IMPLICIT_DEF)), addr:$src)>; // When HasMOVBE is enabled it is possible to get a non-legalized // register-register 16 bit bswap. This maps it to a ROL instruction. diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index ea7af893ce103..ddbc7c55a6113 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -134,8 +134,8 @@ def SDTX86Cmpccxadd : SDTypeProfile<1, 4, [SDTCisSameAs<0, 2>, def X86MFence : SDNode<"X86ISD::MFENCE", SDTNone, [SDNPHasChain]>; -def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>; -def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>; +def X86bsf : SDNode<"X86ISD::BSF", SDTBinaryArithWithFlags>; +def X86bsr : SDNode<"X86ISD::BSR", SDTBinaryArithWithFlags>; def X86fshl : SDNode<"X86ISD::FSHL", SDTIntShiftDOp>; def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>; @@ -685,8 +685,9 @@ def anyext_sdiv : PatFrag<(ops node:$lhs), (anyext node:$lhs),[{ // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may // be copying from a truncate. AssertSext/AssertZext/AssertAlign aren't saying // anything about the upper 32 bits, they're probably just qualifying a -// CopyFromReg. FREEZE may be coming from a a truncate. Any other 32-bit -// operation will zero-extend up to 64 bits. +// CopyFromReg. FREEZE may be coming from a a truncate. BitScan fall through +// values may not zero the upper bits correctly. +// Any other 32-bit operation will zero-extend up to 64 bits. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && @@ -694,7 +695,9 @@ def def32 : PatLeaf<(i32 GR32:$src), [{ N->getOpcode() != ISD::AssertSext && N->getOpcode() != ISD::AssertZext && N->getOpcode() != ISD::AssertAlign && - N->getOpcode() != ISD::FREEZE; + N->getOpcode() != ISD::FREEZE && + !((N->getOpcode() == X86ISD::BSF || N->getOpcode() == X86ISD::BSR) && + (!N->getOperand(0).isUndef() && !isa(N->getOperand(0)))); }]>; // Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 1baac05827c47..794aa921ca254 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -5220,42 +5220,43 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, } /// Check whether the use can be converted to remove a comparison against zero. -static X86::CondCode isUseDefConvertible(const MachineInstr &MI) { +/// Returns the EFLAGS condition and the operand that we are comparing against zero. +static std::pair isUseDefConvertible(const MachineInstr &MI) { switch (MI.getOpcode()) { default: - return X86::COND_INVALID; + return std::make_pair(X86::COND_INVALID, ~0U); CASE_ND(NEG8r) CASE_ND(NEG16r) CASE_ND(NEG32r) CASE_ND(NEG64r) - return X86::COND_AE; + return std::make_pair(X86::COND_AE, 1U); case X86::LZCNT16rr: case X86::LZCNT32rr: case X86::LZCNT64rr: - return X86::COND_B; + return std::make_pair(X86::COND_B, 1U); case X86::POPCNT16rr: case X86::POPCNT32rr: case X86::POPCNT64rr: - return X86::COND_E; + return std::make_pair(X86::COND_E, 1U); case X86::TZCNT16rr: case X86::TZCNT32rr: case X86::TZCNT64rr: - return X86::COND_B; + return std::make_pair(X86::COND_B, 1U); case X86::BSF16rr: case X86::BSF32rr: case X86::BSF64rr: case X86::BSR16rr: case X86::BSR32rr: case X86::BSR64rr: - return X86::COND_E; + return std::make_pair(X86::COND_E, 2U); case X86::BLSI32rr: case X86::BLSI64rr: - return X86::COND_AE; + return std::make_pair(X86::COND_AE, 1U); case X86::BLSR32rr: case X86::BLSR64rr: case X86::BLSMSK32rr: case X86::BLSMSK64rr: - return X86::COND_B; + return std::make_pair(X86::COND_B, 1U); // TODO: TBM instructions. } } @@ -5336,6 +5337,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, bool ClearsOverflowFlag = false; bool ShouldUpdateCC = false; bool IsSwapped = false; + unsigned OpNo = 0; X86::CondCode NewCC = X86::COND_INVALID; int64_t ImmDelta = 0; @@ -5391,9 +5393,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // ... // EFLAGS not changed // testl %eax, %eax // <-- can be removed if (IsCmpZero) { - NewCC = isUseDefConvertible(Inst); - if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() && - Inst.getOperand(1).getReg() == SrcReg) { + std::tie(NewCC, OpNo) = isUseDefConvertible(Inst); + if (NewCC != X86::COND_INVALID && Inst.getOperand(OpNo).isReg() && + Inst.getOperand(OpNo).getReg() == SrcReg) { ShouldUpdateCC = true; MI = &Inst; break; diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 43c02c4f85844..290d91bb2ce69 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -247,55 +247,55 @@ def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), } // Constraints = "$src = $dst", SchedRW // Bit scan instructions. -let Defs = [EFLAGS] in { -def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), +let Defs = [EFLAGS], Constraints = "$fallback = $dst" in { +def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$fallback, GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, + [(set GR16:$dst, EFLAGS, (X86bsf GR16:$fallback, GR16:$src))]>, TB, OpSize16, Sched<[WriteBSF]>; -def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), +def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins GR16:$fallback, i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, + [(set GR16:$dst, EFLAGS, (X86bsf GR16:$fallback, (loadi16 addr:$src)))]>, TB, OpSize16, Sched<[WriteBSFLd]>; -def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), +def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$fallback, GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, + [(set GR32:$dst, EFLAGS, (X86bsf GR32:$fallback, GR32:$src))]>, TB, OpSize32, Sched<[WriteBSF]>; -def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), +def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins GR32:$fallback, i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, + [(set GR32:$dst, EFLAGS, (X86bsf GR32:$fallback, (loadi32 addr:$src)))]>, TB, OpSize32, Sched<[WriteBSFLd]>; -def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), +def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$fallback, GR64:$src), "bsf{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, + [(set GR64:$dst, EFLAGS, (X86bsf GR64:$fallback, GR64:$src))]>, TB, Sched<[WriteBSF]>; -def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), +def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins GR64:$fallback, i64mem:$src), "bsf{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, + [(set GR64:$dst, EFLAGS, (X86bsf GR64:$fallback, (loadi64 addr:$src)))]>, TB, Sched<[WriteBSFLd]>; -def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), +def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$fallback, GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, + [(set GR16:$dst, EFLAGS, (X86bsr GR16:$fallback, GR16:$src))]>, TB, OpSize16, Sched<[WriteBSR]>; -def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), +def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins GR16:$fallback, i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, + [(set GR16:$dst, EFLAGS, (X86bsr GR16:$fallback, (loadi16 addr:$src)))]>, TB, OpSize16, Sched<[WriteBSRLd]>; -def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), +def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$fallback, GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, + [(set GR32:$dst, EFLAGS, (X86bsr GR32:$fallback, GR32:$src))]>, TB, OpSize32, Sched<[WriteBSR]>; -def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), +def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins GR32:$fallback, i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, + [(set GR32:$dst, EFLAGS, (X86bsr GR32:$fallback, (loadi32 addr:$src)))]>, TB, OpSize32, Sched<[WriteBSRLd]>; -def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), +def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$fallback, GR64:$src), "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, + [(set GR64:$dst, EFLAGS, (X86bsr GR64:$fallback, GR64:$src))]>, TB, Sched<[WriteBSR]>; -def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), +def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins GR64:$fallback, i64mem:$src), "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, + [(set GR64:$dst, EFLAGS, (X86bsr GR64:$fallback, (loadi64 addr:$src)))]>, TB, Sched<[WriteBSRLd]>; } // Defs = [EFLAGS] diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index e3cb9ee8ce190..c399989f115d7 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -263,6 +263,11 @@ class X86Subtarget final : public X86GenSubtargetInfo { return hasBWI() && useAVX512Regs(); } + // Returns true if the destination register of a BSF/BSR instruction is + // not touched if the source register is zero. + // NOTE: i32->i64 implicit zext isn't guaranteed by BSR/BSF pass through. + bool hasBitScanPassThrough() const { return is64Bit(); } + bool isXRaySupported() const override { return is64Bit(); } /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for diff --git a/llvm/test/CodeGen/X86/bit_ceil.ll b/llvm/test/CodeGen/X86/bit_ceil.ll index 823453087f618..1f21fcac8341d 100644 --- a/llvm/test/CodeGen/X86/bit_ceil.ll +++ b/llvm/test/CodeGen/X86/bit_ceil.ll @@ -10,9 +10,8 @@ define i32 @bit_ceil_i32(i32 %x) { ; NOBMI: # %bb.0: ; NOBMI-NEXT: # kill: def $edi killed $edi def $rdi ; NOBMI-NEXT: leal -1(%rdi), %eax -; NOBMI-NEXT: bsrl %eax, %eax ; NOBMI-NEXT: movl $63, %ecx -; NOBMI-NEXT: cmovnel %eax, %ecx +; NOBMI-NEXT: bsrl %eax, %ecx ; NOBMI-NEXT: xorl $31, %ecx ; NOBMI-NEXT: negb %cl ; NOBMI-NEXT: movl $1, %edx @@ -47,9 +46,8 @@ define i32 @bit_ceil_i32(i32 %x) { define i32 @bit_ceil_i32_plus1(i32 noundef %x) { ; NOBMI-LABEL: bit_ceil_i32_plus1: ; NOBMI: # %bb.0: # %entry -; NOBMI-NEXT: bsrl %edi, %eax ; NOBMI-NEXT: movl $63, %ecx -; NOBMI-NEXT: cmovnel %eax, %ecx +; NOBMI-NEXT: bsrl %edi, %ecx ; NOBMI-NEXT: xorl $31, %ecx ; NOBMI-NEXT: negb %cl ; NOBMI-NEXT: movl $1, %edx @@ -86,9 +84,8 @@ define i64 @bit_ceil_i64(i64 %x) { ; NOBMI-LABEL: bit_ceil_i64: ; NOBMI: # %bb.0: ; NOBMI-NEXT: leaq -1(%rdi), %rax -; NOBMI-NEXT: bsrq %rax, %rax ; NOBMI-NEXT: movl $127, %ecx -; NOBMI-NEXT: cmovneq %rax, %rcx +; NOBMI-NEXT: bsrq %rax, %rcx ; NOBMI-NEXT: xorl $63, %ecx ; NOBMI-NEXT: negb %cl ; NOBMI-NEXT: movl $1, %edx @@ -122,9 +119,8 @@ define i64 @bit_ceil_i64(i64 %x) { define i64 @bit_ceil_i64_plus1(i64 noundef %x) { ; NOBMI-LABEL: bit_ceil_i64_plus1: ; NOBMI: # %bb.0: # %entry -; NOBMI-NEXT: bsrq %rdi, %rax ; NOBMI-NEXT: movl $127, %ecx -; NOBMI-NEXT: cmovneq %rax, %rcx +; NOBMI-NEXT: bsrq %rdi, %rcx ; NOBMI-NEXT: xorl $63, %ecx ; NOBMI-NEXT: negb %cl ; NOBMI-NEXT: movl $1, %edx diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index d9c6d7053be74..08262e4d34b26 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -227,9 +227,8 @@ define i64 @PR89533(<64 x i8> %a0) { ; SSE-NEXT: orl %eax, %edx ; SSE-NEXT: shlq $32, %rdx ; SSE-NEXT: orq %rcx, %rdx -; SSE-NEXT: bsfq %rdx, %rcx ; SSE-NEXT: movl $64, %eax -; SSE-NEXT: cmovneq %rcx, %rax +; SSE-NEXT: rep bsfq %rdx, %rax ; SSE-NEXT: retq ; ; AVX1-LABEL: PR89533: @@ -255,9 +254,8 @@ define i64 @PR89533(<64 x i8> %a0) { ; AVX1-NEXT: orl %eax, %edx ; AVX1-NEXT: shlq $32, %rdx ; AVX1-NEXT: orq %rcx, %rdx -; AVX1-NEXT: bsfq %rdx, %rcx ; AVX1-NEXT: movl $64, %eax -; AVX1-NEXT: cmovneq %rcx, %rax +; AVX1-NEXT: rep bsfq %rdx, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/ctlo.ll b/llvm/test/CodeGen/X86/ctlo.ll index 2f4fef82f1f17..fecb62fbc5aea 100644 --- a/llvm/test/CodeGen/X86/ctlo.ll +++ b/llvm/test/CodeGen/X86/ctlo.ll @@ -44,10 +44,9 @@ define i8 @ctlo_i8(i8 %x) { ; X64-LABEL: ctlo_i8: ; X64: # %bb.0: ; X64-NEXT: notb %dil -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: bsrl %eax, %ecx +; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: movl $15, %eax -; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: bsrl %ecx, %eax ; X64-NEXT: xorl $7, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -146,9 +145,8 @@ define i16 @ctlo_i16(i16 %x) { ; X64-LABEL: ctlo_i16: ; X64: # %bb.0: ; X64-NEXT: notl %edi -; X64-NEXT: bsrw %di, %cx ; X64-NEXT: movw $31, %ax -; X64-NEXT: cmovnew %cx, %ax +; X64-NEXT: bsrw %di, %ax ; X64-NEXT: xorl $15, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -232,9 +230,8 @@ define i32 @ctlo_i32(i32 %x) { ; X64-LABEL: ctlo_i32: ; X64: # %bb.0: ; X64-NEXT: notl %edi -; X64-NEXT: bsrl %edi, %ecx ; X64-NEXT: movl $63, %eax -; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: bsrl %edi, %eax ; X64-NEXT: xorl $31, %eax ; X64-NEXT: retq ; @@ -335,9 +332,8 @@ define i64 @ctlo_i64(i64 %x) { ; X64-LABEL: ctlo_i64: ; X64: # %bb.0: ; X64-NEXT: notq %rdi -; X64-NEXT: bsrq %rdi, %rcx ; X64-NEXT: movl $127, %eax -; X64-NEXT: cmovneq %rcx, %rax +; X64-NEXT: bsrq %rdi, %rax ; X64-NEXT: xorq $63, %rax ; X64-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/ctlz.ll b/llvm/test/CodeGen/X86/ctlz.ll index 68defaff78d37..0eabfeae853f7 100644 --- a/llvm/test/CodeGen/X86/ctlz.ll +++ b/llvm/test/CodeGen/X86/ctlz.ll @@ -246,10 +246,9 @@ define i8 @ctlz_i8_zero_test(i8 %n) { ; ; X64-LABEL: ctlz_i8_zero_test: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: bsrl %eax, %ecx +; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: movl $15, %eax -; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: bsrl %ecx, %eax ; X64-NEXT: xorl $7, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -317,9 +316,8 @@ define i16 @ctlz_i16_zero_test(i16 %n) { ; ; X64-LABEL: ctlz_i16_zero_test: ; X64: # %bb.0: -; X64-NEXT: bsrw %di, %cx ; X64-NEXT: movw $31, %ax -; X64-NEXT: cmovnew %cx, %ax +; X64-NEXT: bsrw %di, %ax ; X64-NEXT: xorl $15, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -372,9 +370,8 @@ define i32 @ctlz_i32_zero_test(i32 %n) { ; ; X64-LABEL: ctlz_i32_zero_test: ; X64: # %bb.0: -; X64-NEXT: bsrl %edi, %ecx ; X64-NEXT: movl $63, %eax -; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: bsrl %edi, %eax ; X64-NEXT: xorl $31, %eax ; X64-NEXT: retq ; @@ -442,9 +439,8 @@ define i64 @ctlz_i64_zero_test(i64 %n) { ; ; X64-LABEL: ctlz_i64_zero_test: ; X64: # %bb.0: -; X64-NEXT: bsrq %rdi, %rcx ; X64-NEXT: movl $127, %eax -; X64-NEXT: cmovneq %rcx, %rax +; X64-NEXT: bsrq %rdi, %rax ; X64-NEXT: xorq $63, %rax ; X64-NEXT: retq ; @@ -613,9 +609,8 @@ define i32 @ctlz_bsr_zero_test(i32 %n) { ; ; X64-LABEL: ctlz_bsr_zero_test: ; X64: # %bb.0: -; X64-NEXT: bsrl %edi, %ecx ; X64-NEXT: movl $63, %eax -; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: bsrl %edi, %eax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: ctlz_bsr_zero_test: @@ -983,10 +978,9 @@ define i8 @ctlz_xor7_i8_false(i8 %x) { ; ; X64-LABEL: ctlz_xor7_i8_false: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: bsrl %eax, %ecx +; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: movl $15, %eax -; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: bsrl %ecx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; @@ -1094,9 +1088,8 @@ define i32 @ctlz_xor31_i32_false(i32 %x) { ; ; X64-LABEL: ctlz_xor31_i32_false: ; X64: # %bb.0: -; X64-NEXT: bsrl %edi, %ecx ; X64-NEXT: movl $63, %eax -; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: bsrl %edi, %eax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: ctlz_xor31_i32_false: @@ -1239,9 +1232,8 @@ define i64 @ctlz_i32_sext(i32 %x) { ; ; X64-LABEL: ctlz_i32_sext: ; X64: # %bb.0: -; X64-NEXT: bsrl %edi, %ecx ; X64-NEXT: movl $63, %eax -; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: bsrl %edi, %eax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: ctlz_i32_sext: @@ -1302,9 +1294,8 @@ define i64 @ctlz_i32_zext(i32 %x) { ; ; X64-LABEL: ctlz_i32_zext: ; X64: # %bb.0: -; X64-NEXT: bsrl %edi, %ecx ; X64-NEXT: movl $63, %eax -; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: bsrl %edi, %eax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: ctlz_i32_zext: diff --git a/llvm/test/CodeGen/X86/cttz.ll b/llvm/test/CodeGen/X86/cttz.ll index 30e5cccfb2198..db949827af007 100644 --- a/llvm/test/CodeGen/X86/cttz.ll +++ b/llvm/test/CodeGen/X86/cttz.ll @@ -324,11 +324,8 @@ define i32 @cttz_i32_zero_test(i32 %n) { ; ; X64-LABEL: cttz_i32_zero_test: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: cttz_i32_zero_test: @@ -393,9 +390,8 @@ define i64 @cttz_i64_zero_test(i64 %n) { ; ; X64-LABEL: cttz_i64_zero_test: ; X64: # %bb.0: -; X64-NEXT: bsfq %rdi, %rcx ; X64-NEXT: movl $64, %eax -; X64-NEXT: cmovneq %rcx, %rax +; X64-NEXT: rep bsfq %rdi, %rax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: cttz_i64_zero_test: @@ -687,10 +683,8 @@ define i64 @cttz_i32_sext(i32 %x) { ; ; X64-LABEL: cttz_i32_sext: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: cttz_i32_sext: @@ -744,10 +738,8 @@ define i64 @cttz_i32_zext(i32 %x) { ; ; X64-LABEL: cttz_i32_zext: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: cttz_i32_zext: diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll index 6c0aaeb451e14..63336ffa7c6c8 100644 --- a/llvm/test/CodeGen/X86/known-never-zero.ll +++ b/llvm/test/CodeGen/X86/known-never-zero.ll @@ -51,12 +51,9 @@ define i32 @or_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: or_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: orl %esi, %edi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq %z = or i32 %x, %y %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -104,13 +101,11 @@ define i32 @select_maybe_zero(i1 %c, i32 %x) { ; X64-LABEL: select_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: orl $1, %esi -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: testb $1, %dil -; X64-NEXT: cmovnel %esi, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: cmovnel %esi, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %y = or i32 %x, 1 %z = select i1 %c, i32 %y, i32 0 @@ -201,14 +196,11 @@ define i32 @shl_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: shl_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: movl %edi, %ecx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %esi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rsi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %esi, %eax ; X64-NEXT: retq %z = shl nuw nsw i32 %y, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -252,12 +244,10 @@ define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: uaddsat_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: addl %esi, %edi -; X64-NEXT: movl $-1, %eax -; X64-NEXT: cmovael %edi, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $-1, %ecx +; X64-NEXT: cmovael %edi, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -306,13 +296,10 @@ define i32 @umax_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: umax_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: cmpl %esi, %edi ; X64-NEXT: cmoval %edi, %esi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rsi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %esi, %eax ; X64-NEXT: retq %z = call i32 @llvm.umax.i32(i32 %x, i32 %y) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -365,12 +352,10 @@ define i32 @umin_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: umin_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: cmpl $54, %edi -; X64-NEXT: movl $54, %eax -; X64-NEXT: cmovbl %edi, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $54, %ecx +; X64-NEXT: cmovbl %edi, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %z = call i32 @llvm.umin.i32(i32 %x, i32 54) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -479,12 +464,10 @@ define i32 @smin_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: smin_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: cmpl $54, %edi -; X64-NEXT: movl $54, %eax -; X64-NEXT: cmovll %edi, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $54, %ecx +; X64-NEXT: cmovll %edi, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %z = call i32 @llvm.smin.i32(i32 %x, i32 54) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -593,12 +576,10 @@ define i32 @smax_known_zero(i32 %x, i32 %y) { ; X64-LABEL: smax_known_zero: ; X64: # %bb.0: ; X64-NEXT: testl %edi, %edi -; X64-NEXT: movl $-1, %eax -; X64-NEXT: cmovnsl %edi, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $-1, %ecx +; X64-NEXT: cmovnsl %edi, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %z = call i32 @llvm.smax.i32(i32 %x, i32 -1) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -646,13 +627,10 @@ define i32 @rotr_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotr_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rorl %cl, %edi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq %shr = lshr i32 %x, %y %sub = sub i32 32, %y @@ -700,13 +678,10 @@ define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotr_with_fshr_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rorl %cl, %edi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq %z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -754,13 +729,10 @@ define i32 @rotl_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotl_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: roll %cl, %edi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq %shl = shl i32 %x, %y %sub = sub i32 32, %y @@ -808,13 +780,10 @@ define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotl_with_fshl_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: roll %cl, %edi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq %z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -880,14 +849,11 @@ define i32 @sra_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: sra_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: movl %edi, %ecx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: sarl %cl, %esi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rsi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %esi, %eax ; X64-NEXT: retq %z = ashr exact i32 %y, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -953,14 +919,11 @@ define i32 @srl_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: srl_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: movl %edi, %ecx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrl %cl, %esi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rsi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %esi, %eax ; X64-NEXT: retq %z = lshr exact i32 %y, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1007,11 +970,9 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) { ; X64-NEXT: movl %edi, %eax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divl %esi -; X64-NEXT: # kill: def $eax killed $eax def $rax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %ecx +; X64-NEXT: rep bsfl %eax, %ecx +; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq %z = udiv exact i32 %x, %y %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1058,11 +1019,9 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) { ; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %esi -; X64-NEXT: # kill: def $eax killed $eax def $rax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %ecx +; X64-NEXT: rep bsfl %eax, %ecx +; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq %z = sdiv exact i32 %x, %y %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1103,13 +1062,10 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) { ; ; X64-LABEL: add_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: orl $1, %edi ; X64-NEXT: addl %esi, %edi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq %x = or i32 %xx, 1 %z = add nsw i32 %x, %y @@ -1182,13 +1138,11 @@ define i32 @sub_maybe_zero(i32 %x) { ; ; X64-LABEL: sub_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: orl $64, %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: orl $64, %ecx +; X64-NEXT: subl %edi, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %y = or i32 %x, 64 %z = sub i32 %y, %x @@ -1208,12 +1162,9 @@ define i32 @sub_maybe_zero2(i32 %x) { ; ; X64-LABEL: sub_maybe_zero2: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: negl %edi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq %z = sub i32 0, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1233,13 +1184,10 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) { ; ; X64-LABEL: mul_known_nonzero_nsw: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: orl $256, %esi # imm = 0x100 ; X64-NEXT: imull %edi, %esi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rsi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %esi, %eax ; X64-NEXT: retq %y = or i32 %yy, 256 %z = mul nsw i32 %y, %x @@ -1260,13 +1208,10 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) { ; ; X64-LABEL: mul_known_nonzero_nuw: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: orl $256, %esi # imm = 0x100 ; X64-NEXT: imull %edi, %esi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rsi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %esi, %eax ; X64-NEXT: retq %y = or i32 %yy, 256 %z = mul nuw i32 %y, %x @@ -1286,12 +1231,9 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: mul_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: imull %esi, %edi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: orq %rdi, %rax -; X64-NEXT: rep bsfq %rax, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq %z = mul nuw nsw i32 %y, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1321,11 +1263,9 @@ define i32 @bitcast_known_nonzero(<2 x i16> %xx) { ; X64-NEXT: vcvttps2dq %xmm0, %xmm0 ; X64-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,256,u,u,u,u,u,u] -; X64-NEXT: vmovd %xmm0, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: vmovd %xmm0, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %x = shl nuw nsw <2 x i16> , %xx %z = bitcast <2 x i16> %x to i32 @@ -1344,11 +1284,9 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) { ; ; X64-LABEL: bitcast_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: vmovd %xmm0, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: vmovd %xmm0, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %z = bitcast <2 x i16> %x to i32 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1365,11 +1303,9 @@ define i32 @bitcast_from_float(float %x) { ; ; X64-LABEL: bitcast_from_float: ; X64: # %bb.0: -; X64-NEXT: vmovd %xmm0, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: vmovd %xmm0, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %z = bitcast float %x to i32 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1412,11 +1348,9 @@ define i32 @zext_maybe_zero(i16 %x) { ; ; X64-LABEL: zext_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: movzwl %di, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movzwl %di, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %z = zext i16 %x to i32 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1459,11 +1393,9 @@ define i32 @sext_maybe_zero(i16 %x) { ; ; X64-LABEL: sext_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: movswl %di, %eax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movswl %di, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %z = sext i16 %x to i32 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) diff --git a/llvm/test/CodeGen/X86/pr89877.ll b/llvm/test/CodeGen/X86/pr89877.ll index 19baad26583ad..a40ad8f941278 100644 --- a/llvm/test/CodeGen/X86/pr89877.ll +++ b/llvm/test/CodeGen/X86/pr89877.ll @@ -20,11 +20,9 @@ define i32 @sext_known_nonzero(i16 %xx) { ; X64-NEXT: movl $256, %eax # imm = 0x100 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %eax -; X64-NEXT: movswq %ax, %rax -; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: rep bsfq %rcx, %rax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movswl %ax, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: rep bsfl %ecx, %eax ; X64-NEXT: retq %x = shl i16 256, %xx %z = sext i16 %x to i32 diff --git a/llvm/test/CodeGen/X86/pr90847.ll b/llvm/test/CodeGen/X86/pr90847.ll index f2d43c3ed8d5b..11669f321704e 100644 --- a/llvm/test/CodeGen/X86/pr90847.ll +++ b/llvm/test/CodeGen/X86/pr90847.ll @@ -14,11 +14,9 @@ define i32 @PR90847(<8 x float> %x) nounwind { ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] ; AVX1-NEXT: vminps %ymm2, %ymm1, %ymm1 ; AVX1-NEXT: vcmpeqps %ymm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovmskps %ymm0, %eax -; AVX1-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; AVX1-NEXT: orq %rax, %rcx -; AVX1-NEXT: rep bsfq %rcx, %rax -; AVX1-NEXT: # kill: def $eax killed $eax killed $rax +; AVX1-NEXT: vmovmskps %ymm0, %ecx +; AVX1-NEXT: movl $32, %eax +; AVX1-NEXT: rep bsfl %ecx, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -31,11 +29,9 @@ define i32 @PR90847(<8 x float> %x) nounwind { ; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[2,3,0,1] ; AVX2-NEXT: vminps %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vcmpeqps %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 -; AVX2-NEXT: orq %rax, %rcx -; AVX2-NEXT: rep bsfq %rcx, %rax -; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: vmovmskps %ymm0, %ecx +; AVX2-NEXT: movl $32, %eax +; AVX2-NEXT: rep bsfl %ecx, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/pr92569.ll b/llvm/test/CodeGen/X86/pr92569.ll index 0fb4ed7905287..5f306e998398f 100644 --- a/llvm/test/CodeGen/X86/pr92569.ll +++ b/llvm/test/CodeGen/X86/pr92569.ll @@ -4,13 +4,11 @@ define void @PR92569(i64 %arg, <8 x i8> %arg1) { ; CHECK-LABEL: PR92569: ; CHECK: # %bb.0: -; CHECK-NEXT: bsfq %rdi, %rax -; CHECK-NEXT: movl $64, %ecx -; CHECK-NEXT: cmovneq %rax, %rcx -; CHECK-NEXT: shrb $3, %cl +; CHECK-NEXT: movl $64, %eax +; CHECK-NEXT: rep bsfq %rdi, %rax +; CHECK-NEXT: shrb $3, %al ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl %cl, %eax -; CHECK-NEXT: andl $15, %eax +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: movzbl -24(%rsp,%rax), %eax ; CHECK-NEXT: movl %eax, 0 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll index 6be79edbe51e1..426587a84ce17 100644 --- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -234,16 +234,15 @@ define i256 @test2(i256 %a) nounwind { ; ILP-NEXT: xorq $63, %rdx ; ILP-NEXT: andq %rsi, %r11 ; ILP-NEXT: movl $127, %esi -; ILP-NEXT: bsrq %r11, %r8 -; ILP-NEXT: cmoveq %rsi, %r8 -; ILP-NEXT: xorq $63, %r8 -; ILP-NEXT: addq $64, %r8 +; ILP-NEXT: bsrq %r11, %rsi +; ILP-NEXT: xorq $63, %rsi +; ILP-NEXT: addq $64, %rsi ; ILP-NEXT: testq %r10, %r10 -; ILP-NEXT: cmovneq %rdx, %r8 -; ILP-NEXT: subq $-128, %r8 +; ILP-NEXT: cmovneq %rdx, %rsi +; ILP-NEXT: subq $-128, %rsi ; ILP-NEXT: orq %rdi, %r9 -; ILP-NEXT: cmovneq %rcx, %r8 -; ILP-NEXT: movq %r8, (%rax) +; ILP-NEXT: cmovneq %rcx, %rsi +; ILP-NEXT: movq %rsi, (%rax) ; ILP-NEXT: movq $0, 8(%rax) ; ILP-NEXT: retq ; @@ -274,16 +273,15 @@ define i256 @test2(i256 %a) nounwind { ; HYBRID-NEXT: xorq $63, %rdx ; HYBRID-NEXT: andq %rsi, %r11 ; HYBRID-NEXT: movl $127, %esi -; HYBRID-NEXT: bsrq %r11, %r8 -; HYBRID-NEXT: cmoveq %rsi, %r8 -; HYBRID-NEXT: xorq $63, %r8 -; HYBRID-NEXT: addq $64, %r8 +; HYBRID-NEXT: bsrq %r11, %rsi +; HYBRID-NEXT: xorq $63, %rsi +; HYBRID-NEXT: addq $64, %rsi ; HYBRID-NEXT: testq %r10, %r10 -; HYBRID-NEXT: cmovneq %rdx, %r8 -; HYBRID-NEXT: subq $-128, %r8 +; HYBRID-NEXT: cmovneq %rdx, %rsi +; HYBRID-NEXT: subq $-128, %rsi ; HYBRID-NEXT: orq %rdi, %r9 -; HYBRID-NEXT: cmovneq %rcx, %r8 -; HYBRID-NEXT: movq %r8, (%rax) +; HYBRID-NEXT: cmovneq %rcx, %rsi +; HYBRID-NEXT: movq %rsi, (%rax) ; HYBRID-NEXT: movq $0, 8(%rax) ; HYBRID-NEXT: retq ; @@ -314,16 +312,15 @@ define i256 @test2(i256 %a) nounwind { ; BURR-NEXT: xorq $63, %rdx ; BURR-NEXT: andq %rsi, %r11 ; BURR-NEXT: movl $127, %esi -; BURR-NEXT: bsrq %r11, %r8 -; BURR-NEXT: cmoveq %rsi, %r8 -; BURR-NEXT: xorq $63, %r8 -; BURR-NEXT: addq $64, %r8 +; BURR-NEXT: bsrq %r11, %rsi +; BURR-NEXT: xorq $63, %rsi +; BURR-NEXT: addq $64, %rsi ; BURR-NEXT: testq %r10, %r10 -; BURR-NEXT: cmovneq %rdx, %r8 -; BURR-NEXT: subq $-128, %r8 +; BURR-NEXT: cmovneq %rdx, %rsi +; BURR-NEXT: subq $-128, %rsi ; BURR-NEXT: orq %rdi, %r9 -; BURR-NEXT: cmovneq %rcx, %r8 -; BURR-NEXT: movq %r8, (%rax) +; BURR-NEXT: cmovneq %rcx, %rsi +; BURR-NEXT: movq %rsi, (%rax) ; BURR-NEXT: movq $0, 8(%rax) ; BURR-NEXT: retq ; @@ -351,19 +348,18 @@ define i256 @test2(i256 %a) nounwind { ; SRC-NEXT: cmovneq %rcx, %rdx ; SRC-NEXT: bsrq %r10, %rcx ; SRC-NEXT: xorq $63, %rcx +; SRC-NEXT: movl $127, %esi ; SRC-NEXT: bsrq %r11, %rsi -; SRC-NEXT: movl $127, %r8d -; SRC-NEXT: cmovneq %rsi, %r8 -; SRC-NEXT: xorq $63, %r8 -; SRC-NEXT: addq $64, %r8 +; SRC-NEXT: xorq $63, %rsi +; SRC-NEXT: addq $64, %rsi ; SRC-NEXT: testq %r10, %r10 -; SRC-NEXT: cmovneq %rcx, %r8 -; SRC-NEXT: subq $-128, %r8 +; SRC-NEXT: cmovneq %rcx, %rsi +; SRC-NEXT: subq $-128, %rsi ; SRC-NEXT: orq %r9, %rdi -; SRC-NEXT: cmovneq %rdx, %r8 +; SRC-NEXT: cmovneq %rdx, %rsi ; SRC-NEXT: xorps %xmm0, %xmm0 ; SRC-NEXT: movaps %xmm0, 16(%rax) -; SRC-NEXT: movq %r8, (%rax) +; SRC-NEXT: movq %rsi, (%rax) ; SRC-NEXT: movq $0, 8(%rax) ; SRC-NEXT: retq ; @@ -372,12 +368,11 @@ define i256 @test2(i256 %a) nounwind { ; LIN-NEXT: movq %rdi, %rax ; LIN-NEXT: xorps %xmm0, %xmm0 ; LIN-NEXT: movaps %xmm0, 16(%rdi) -; LIN-NEXT: movq %rsi, %rdi -; LIN-NEXT: negq %rdi -; LIN-NEXT: andq %rsi, %rdi -; LIN-NEXT: bsrq %rdi, %rsi ; LIN-NEXT: movl $127, %edi -; LIN-NEXT: cmovneq %rsi, %rdi +; LIN-NEXT: movq %rsi, %r9 +; LIN-NEXT: negq %r9 +; LIN-NEXT: andq %rsi, %r9 +; LIN-NEXT: bsrq %r9, %rdi ; LIN-NEXT: xorq $63, %rdi ; LIN-NEXT: addq $64, %rdi ; LIN-NEXT: xorl %esi, %esi @@ -415,7 +410,6 @@ define i256 @test2(i256 %a) nounwind { define i256 @test3(i256 %n) nounwind { ; ILP-LABEL: test3: ; ILP: # %bb.0: -; ILP-NEXT: pushq %rbx ; ILP-NEXT: movq %rdi, %rax ; ILP-NEXT: xorps %xmm0, %xmm0 ; ILP-NEXT: movaps %xmm0, 16(%rdi) @@ -429,34 +423,32 @@ define i256 @test3(i256 %n) nounwind { ; ILP-NEXT: sbbq %r8, %r9 ; ILP-NEXT: notq %r8 ; ILP-NEXT: andq %r9, %r8 -; ILP-NEXT: bsrq %r8, %rbx +; ILP-NEXT: bsrq %r8, %r9 ; ILP-NEXT: notq %rdx ; ILP-NEXT: andq %r10, %rdx -; ILP-NEXT: bsrq %rdx, %r9 -; ILP-NEXT: xorq $63, %rbx +; ILP-NEXT: bsrq %rdx, %r10 +; ILP-NEXT: xorq $63, %r9 ; ILP-NEXT: notq %rcx ; ILP-NEXT: andq %r11, %rcx -; ILP-NEXT: bsrq %rcx, %r10 +; ILP-NEXT: bsrq %rcx, %r11 +; ILP-NEXT: xorq $63, %r11 +; ILP-NEXT: orq $64, %r11 +; ILP-NEXT: testq %r8, %r8 +; ILP-NEXT: cmovneq %r9, %r11 ; ILP-NEXT: xorq $63, %r10 -; ILP-NEXT: orq $64, %r10 ; ILP-NEXT: notq %rsi -; ILP-NEXT: testq %r8, %r8 -; ILP-NEXT: cmovneq %rbx, %r10 -; ILP-NEXT: xorq $63, %r9 ; ILP-NEXT: andq %rdi, %rsi ; ILP-NEXT: movl $127, %edi -; ILP-NEXT: bsrq %rsi, %rsi -; ILP-NEXT: cmoveq %rdi, %rsi -; ILP-NEXT: xorq $63, %rsi -; ILP-NEXT: addq $64, %rsi +; ILP-NEXT: bsrq %rsi, %rdi +; ILP-NEXT: xorq $63, %rdi +; ILP-NEXT: addq $64, %rdi ; ILP-NEXT: testq %rdx, %rdx -; ILP-NEXT: cmovneq %r9, %rsi -; ILP-NEXT: subq $-128, %rsi +; ILP-NEXT: cmovneq %r10, %rdi +; ILP-NEXT: subq $-128, %rdi ; ILP-NEXT: orq %r8, %rcx -; ILP-NEXT: cmovneq %r10, %rsi -; ILP-NEXT: movq %rsi, (%rax) +; ILP-NEXT: cmovneq %r11, %rdi +; ILP-NEXT: movq %rdi, (%rax) ; ILP-NEXT: movq $0, 8(%rax) -; ILP-NEXT: popq %rbx ; ILP-NEXT: retq ; ; HYBRID-LABEL: test3: @@ -491,16 +483,15 @@ define i256 @test3(i256 %n) nounwind { ; HYBRID-NEXT: notq %rsi ; HYBRID-NEXT: andq %rdi, %rsi ; HYBRID-NEXT: movl $127, %edi -; HYBRID-NEXT: bsrq %rsi, %rsi -; HYBRID-NEXT: cmoveq %rdi, %rsi -; HYBRID-NEXT: xorq $63, %rsi -; HYBRID-NEXT: addq $64, %rsi +; HYBRID-NEXT: bsrq %rsi, %rdi +; HYBRID-NEXT: xorq $63, %rdi +; HYBRID-NEXT: addq $64, %rdi ; HYBRID-NEXT: testq %rdx, %rdx -; HYBRID-NEXT: cmovneq %r10, %rsi -; HYBRID-NEXT: subq $-128, %rsi +; HYBRID-NEXT: cmovneq %r10, %rdi +; HYBRID-NEXT: subq $-128, %rdi ; HYBRID-NEXT: orq %r8, %rcx -; HYBRID-NEXT: cmovneq %r9, %rsi -; HYBRID-NEXT: movq %rsi, (%rax) +; HYBRID-NEXT: cmovneq %r9, %rdi +; HYBRID-NEXT: movq %rdi, (%rax) ; HYBRID-NEXT: movq $0, 8(%rax) ; HYBRID-NEXT: popq %rbx ; HYBRID-NEXT: retq @@ -537,16 +528,15 @@ define i256 @test3(i256 %n) nounwind { ; BURR-NEXT: notq %rsi ; BURR-NEXT: andq %rdi, %rsi ; BURR-NEXT: movl $127, %edi -; BURR-NEXT: bsrq %rsi, %rsi -; BURR-NEXT: cmoveq %rdi, %rsi -; BURR-NEXT: xorq $63, %rsi -; BURR-NEXT: addq $64, %rsi +; BURR-NEXT: bsrq %rsi, %rdi +; BURR-NEXT: xorq $63, %rdi +; BURR-NEXT: addq $64, %rdi ; BURR-NEXT: testq %rdx, %rdx -; BURR-NEXT: cmovneq %r10, %rsi -; BURR-NEXT: subq $-128, %rsi +; BURR-NEXT: cmovneq %r10, %rdi +; BURR-NEXT: subq $-128, %rdi ; BURR-NEXT: orq %r8, %rcx -; BURR-NEXT: cmovneq %r9, %rsi -; BURR-NEXT: movq %rsi, (%rax) +; BURR-NEXT: cmovneq %r9, %rdi +; BURR-NEXT: movq %rdi, (%rax) ; BURR-NEXT: movq $0, 8(%rax) ; BURR-NEXT: popq %rbx ; BURR-NEXT: retq @@ -579,9 +569,8 @@ define i256 @test3(i256 %n) nounwind { ; SRC-NEXT: cmovneq %rdi, %r9 ; SRC-NEXT: bsrq %rdx, %rdi ; SRC-NEXT: xorq $63, %rdi -; SRC-NEXT: bsrq %rsi, %rsi ; SRC-NEXT: movl $127, %r10d -; SRC-NEXT: cmovneq %rsi, %r10 +; SRC-NEXT: bsrq %rsi, %r10 ; SRC-NEXT: xorq $63, %r10 ; SRC-NEXT: addq $64, %r10 ; SRC-NEXT: testq %rdx, %rdx @@ -600,13 +589,12 @@ define i256 @test3(i256 %n) nounwind { ; LIN-NEXT: movq %rdi, %rax ; LIN-NEXT: xorps %xmm0, %xmm0 ; LIN-NEXT: movaps %xmm0, 16(%rdi) +; LIN-NEXT: movl $127, %r9d ; LIN-NEXT: movq %rsi, %rdi ; LIN-NEXT: negq %rdi ; LIN-NEXT: notq %rsi ; LIN-NEXT: andq %rdi, %rsi -; LIN-NEXT: bsrq %rsi, %rsi -; LIN-NEXT: movl $127, %r9d -; LIN-NEXT: cmovneq %rsi, %r9 +; LIN-NEXT: bsrq %rsi, %r9 ; LIN-NEXT: xorq $63, %r9 ; LIN-NEXT: addq $64, %r9 ; LIN-NEXT: xorl %edi, %edi diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 74017ea66529b..0a8c5b4b76e9d 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -684,12 +684,6 @@ static const X86FoldTableEntry Table1[] = { {X86::BLSR64rr, X86::BLSR64rm, 0}, {X86::BLSR64rr_EVEX, X86::BLSR64rm_EVEX, 0}, {X86::BLSR64rr_NF, X86::BLSR64rm_NF, 0}, - {X86::BSF16rr, X86::BSF16rm, 0}, - {X86::BSF32rr, X86::BSF32rm, 0}, - {X86::BSF64rr, X86::BSF64rm, 0}, - {X86::BSR16rr, X86::BSR16rm, 0}, - {X86::BSR32rr, X86::BSR32rm, 0}, - {X86::BSR64rr, X86::BSR64rm, 0}, {X86::BZHI32rr, X86::BZHI32rm, 0}, {X86::BZHI32rr_EVEX, X86::BZHI32rm_EVEX, 0}, {X86::BZHI32rr_NF, X86::BZHI32rm_NF, 0}, @@ -2072,6 +2066,12 @@ static const X86FoldTableEntry Table2[] = { {X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16}, {X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16}, {X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16}, + {X86::BSF16rr, X86::BSF16rm, 0}, + {X86::BSF32rr, X86::BSF32rm, 0}, + {X86::BSF64rr, X86::BSF64rm, 0}, + {X86::BSR16rr, X86::BSR16rm, 0}, + {X86::BSR32rr, X86::BSR32rm, 0}, + {X86::BSR64rr, X86::BSR64rm, 0}, {X86::CMOV16rr, X86::CMOV16rm, 0}, {X86::CMOV16rr_ND, X86::CMOV16rm_ND, 0}, {X86::CMOV32rr, X86::CMOV32rm, 0}, diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s index 6483809deda3a..0bd5f451e2e34 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s @@ -15,12 +15,12 @@ bsf %rax, %rcx # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 655 +# CHECK-NEXT: Total Cycles: 663 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.53 -# CHECK-NEXT: IPC: 0.61 +# CHECK-NEXT: uOps Per Cycle: 1.51 +# CHECK-NEXT: IPC: 0.60 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Instruction Info: