Skip to content

[X86] Allow speculative BSR/BSF instructions on targets with CMOV #102885

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3239,14 +3239,14 @@ bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,

bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
// Speculate cttz only if we can directly use TZCNT or can promote to i32/i64.
return Subtarget.hasBMI() ||
return Subtarget.hasBMI() || Subtarget.canUseCMOV() ||
(!Ty->isVectorTy() &&
Ty->getScalarSizeInBits() < (Subtarget.is64Bit() ? 64u : 32u));
}

bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
// Speculate ctlz only if we can directly use LZCNT.
return Subtarget.hasLZCNT();
return Subtarget.hasLZCNT() || Subtarget.canUseCMOV();
}

bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4210,9 +4210,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ABS, MVT::i64, { 1, 2, 3, 3 } }, // SUB+CMOV
{ ISD::BITREVERSE, MVT::i64, { 10, 12, 20, 22 } },
{ ISD::BSWAP, MVT::i64, { 1, 2, 1, 2 } },
{ ISD::CTLZ, MVT::i64, { 3, 2, 6, 6 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i64, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ_ZERO_UNDEF, MVT::i64,{ 1, 2, 2, 2 } }, // BSR+XOR
{ ISD::CTTZ, MVT::i64, { 2, 2, 5, 5 } }, // TEST+BSF+CMOV/BRANCH
{ ISD::CTTZ, MVT::i64, { 2, 2, 3, 4 } }, // TEST+BSF+CMOV/BRANCH
{ ISD::CTTZ_ZERO_UNDEF, MVT::i64,{ 1, 2, 1, 2 } }, // BSF
{ ISD::CTPOP, MVT::i64, { 10, 6, 19, 19 } },
{ ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
Expand Down Expand Up @@ -4241,9 +4241,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::i8, { 7, 9, 13, 14 } },
{ ISD::BSWAP, MVT::i32, { 1, 1, 1, 1 } },
{ ISD::BSWAP, MVT::i16, { 1, 2, 1, 2 } }, // ROL
{ ISD::CTLZ, MVT::i32, { 3, 2, 6, 6 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i16, { 3, 2, 6, 6 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i8, { 3, 2, 7, 7 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i32, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i16, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i8, { 2, 2, 5, 6 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ_ZERO_UNDEF, MVT::i32,{ 1, 2, 2, 2 } }, // BSR+XOR
{ ISD::CTLZ_ZERO_UNDEF, MVT::i16,{ 2, 2, 2, 2 } }, // BSR+XOR
{ ISD::CTLZ_ZERO_UNDEF, MVT::i8, { 2, 2, 3, 3 } }, // BSR+XOR
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/X86/ctlz-codesize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)

define i64 @var_ctlz_i64(i64 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i64'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i64'
Expand All @@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {

define i32 @var_ctlz_i32(i32 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i32'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i32'
Expand All @@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {

define i16 @var_ctlz_i16(i16 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i16'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i16'
Expand All @@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {

define i8 @var_ctlz_i8(i8 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i8'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i8'
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/X86/ctlz-sizelatency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)

define i64 @var_ctlz_i64(i64 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i64'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i64'
Expand All @@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {

define i32 @var_ctlz_i32(i32 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i32'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i32'
Expand All @@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {

define i16 @var_ctlz_i16(i16 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i16'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i16'
Expand All @@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {

define i8 @var_ctlz_i8(i8 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i8'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i8'
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/X86/ctlz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)

define i64 @var_ctlz_i64(i64 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i64'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i64'
Expand All @@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {

define i32 @var_ctlz_i32(i32 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i32'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i32'
Expand All @@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {

define i16 @var_ctlz_i16(i16 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i16'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i16'
Expand All @@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {

define i8 @var_ctlz_i8(i8 %a) {
; NOLZCNT-LABEL: 'var_ctlz_i8'
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctlz
;
; LZCNT-LABEL: 'var_ctlz_i8'
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ declare i8 @llvm.cttz.i8(i8, i1)

define i64 @var_cttz_i64(i64 %a) {
; NOBMI-LABEL: 'var_cttz_i64'
; NOBMI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %cttz
;
; BMI-LABEL: 'var_cttz_i64'
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ declare i8 @llvm.cttz.i8(i8, i1)

define i64 @var_cttz_i64(i64 %a) {
; NOBMI-LABEL: 'var_cttz_i64'
; NOBMI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %cttz
;
; BMI-LABEL: 'var_cttz_i64'
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/X86/atomic-bit-test.ll
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,6 @@ define i32 @split_hoist_and(i32 %0) nounwind {
; X64-NEXT: lock btsl $3, v32(%rip)
; X64-NEXT: setb %al
; X64-NEXT: shll $3, %eax
; X64-NEXT: testl %edi, %edi
; X64-NEXT: retq
%2 = atomicrmw or ptr @v32, i32 8 monotonic, align 4
%3 = tail call i32 @llvm.ctlz.i32(i32 %0, i1 false)
Expand Down
53 changes: 17 additions & 36 deletions llvm/test/CodeGen/X86/bit_ceil.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,12 @@
define i32 @bit_ceil_i32(i32 %x) {
; NOBMI-LABEL: bit_ceil_i32:
; NOBMI: # %bb.0:
; NOBMI-NEXT: movl %edi, %eax
; NOBMI-NEXT: decl %eax
; NOBMI-NEXT: je .LBB0_1
; NOBMI-NEXT: # %bb.2: # %cond.false
; NOBMI-NEXT: bsrl %eax, %ecx
; NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
; NOBMI-NEXT: leal -1(%rdi), %eax
; NOBMI-NEXT: bsrl %eax, %eax
; NOBMI-NEXT: movl $63, %ecx
; NOBMI-NEXT: cmovnel %eax, %ecx
; NOBMI-NEXT: xorl $31, %ecx
; NOBMI-NEXT: jmp .LBB0_3
; NOBMI-NEXT: .LBB0_1:
; NOBMI-NEXT: movl $32, %ecx
; NOBMI-NEXT: .LBB0_3: # %cond.end
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
Expand Down Expand Up @@ -51,15 +47,10 @@ define i32 @bit_ceil_i32(i32 %x) {
define i32 @bit_ceil_i32_plus1(i32 noundef %x) {
; NOBMI-LABEL: bit_ceil_i32_plus1:
; NOBMI: # %bb.0: # %entry
; NOBMI-NEXT: testl %edi, %edi
; NOBMI-NEXT: je .LBB1_1
; NOBMI-NEXT: # %bb.2: # %cond.false
; NOBMI-NEXT: bsrl %edi, %ecx
; NOBMI-NEXT: bsrl %edi, %eax
; NOBMI-NEXT: movl $63, %ecx
; NOBMI-NEXT: cmovnel %eax, %ecx
; NOBMI-NEXT: xorl $31, %ecx
; NOBMI-NEXT: jmp .LBB1_3
; NOBMI-NEXT: .LBB1_1:
; NOBMI-NEXT: movl $32, %ecx
; NOBMI-NEXT: .LBB1_3: # %cond.end
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
Expand Down Expand Up @@ -94,16 +85,11 @@ entry:
define i64 @bit_ceil_i64(i64 %x) {
; NOBMI-LABEL: bit_ceil_i64:
; NOBMI: # %bb.0:
; NOBMI-NEXT: movq %rdi, %rax
; NOBMI-NEXT: decq %rax
; NOBMI-NEXT: je .LBB2_1
; NOBMI-NEXT: # %bb.2: # %cond.false
; NOBMI-NEXT: bsrq %rax, %rcx
; NOBMI-NEXT: xorq $63, %rcx
; NOBMI-NEXT: jmp .LBB2_3
; NOBMI-NEXT: .LBB2_1:
; NOBMI-NEXT: movl $64, %ecx
; NOBMI-NEXT: .LBB2_3: # %cond.end
; NOBMI-NEXT: leaq -1(%rdi), %rax
; NOBMI-NEXT: bsrq %rax, %rax
; NOBMI-NEXT: movl $127, %ecx
; NOBMI-NEXT: cmovneq %rax, %rcx
; NOBMI-NEXT: xorl $63, %ecx
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
Expand Down Expand Up @@ -136,15 +122,10 @@ define i64 @bit_ceil_i64(i64 %x) {
define i64 @bit_ceil_i64_plus1(i64 noundef %x) {
; NOBMI-LABEL: bit_ceil_i64_plus1:
; NOBMI: # %bb.0: # %entry
; NOBMI-NEXT: testq %rdi, %rdi
; NOBMI-NEXT: je .LBB3_1
; NOBMI-NEXT: # %bb.2: # %cond.false
; NOBMI-NEXT: bsrq %rdi, %rcx
; NOBMI-NEXT: xorq $63, %rcx
; NOBMI-NEXT: jmp .LBB3_3
; NOBMI-NEXT: .LBB3_1:
; NOBMI-NEXT: movl $64, %ecx
; NOBMI-NEXT: .LBB3_3: # %cond.end
; NOBMI-NEXT: bsrq %rdi, %rax
; NOBMI-NEXT: movl $127, %ecx
; NOBMI-NEXT: cmovneq %rax, %rcx
; NOBMI-NEXT: xorl $63, %ecx
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
Expand Down
47 changes: 20 additions & 27 deletions llvm/test/CodeGen/X86/combine-or.ll
Original file line number Diff line number Diff line change
Expand Up @@ -213,21 +213,18 @@ define i64 @PR89533(<64 x i8> %a0) {
; SSE-NEXT: shll $16, %ecx
; SSE-NEXT: orl %eax, %ecx
; SSE-NEXT: pcmpeqb %xmm4, %xmm2
; SSE-NEXT: pmovmskb %xmm2, %edx
; SSE-NEXT: xorl $65535, %edx # imm = 0xFFFF
; SSE-NEXT: pmovmskb %xmm2, %eax
; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: pcmpeqb %xmm4, %xmm3
; SSE-NEXT: pmovmskb %xmm3, %eax
; SSE-NEXT: notl %eax
; SSE-NEXT: shll $16, %eax
; SSE-NEXT: orl %edx, %eax
; SSE-NEXT: shlq $32, %rax
; SSE-NEXT: orq %rcx, %rax
; SSE-NEXT: je .LBB11_2
; SSE-NEXT: # %bb.1: # %cond.false
; SSE-NEXT: rep bsfq %rax, %rax
; SSE-NEXT: retq
; SSE-NEXT: .LBB11_2: # %cond.end
; SSE-NEXT: pmovmskb %xmm3, %edx
; SSE-NEXT: notl %edx
; SSE-NEXT: shll $16, %edx
; SSE-NEXT: orl %eax, %edx
; SSE-NEXT: shlq $32, %rdx
; SSE-NEXT: orq %rcx, %rdx
; SSE-NEXT: bsfq %rdx, %rcx
; SSE-NEXT: movl $64, %eax
; SSE-NEXT: cmovneq %rcx, %rax
; SSE-NEXT: retq
;
; AVX1-LABEL: PR89533:
Expand All @@ -243,23 +240,19 @@ define i64 @PR89533(<64 x i8> %a0) {
; AVX1-NEXT: shll $16, %ecx
; AVX1-NEXT: orl %eax, %ecx
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %edx
; AVX1-NEXT: xorl $65535, %edx # imm = 0xFFFF
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: xorl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: notl %eax
; AVX1-NEXT: shll $16, %eax
; AVX1-NEXT: orl %edx, %eax
; AVX1-NEXT: shlq $32, %rax
; AVX1-NEXT: orq %rcx, %rax
; AVX1-NEXT: je .LBB11_2
; AVX1-NEXT: # %bb.1: # %cond.false
; AVX1-NEXT: rep bsfq %rax, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX1-NEXT: .LBB11_2: # %cond.end
; AVX1-NEXT: vpmovmskb %xmm0, %edx
; AVX1-NEXT: notl %edx
; AVX1-NEXT: shll $16, %edx
; AVX1-NEXT: orl %eax, %edx
; AVX1-NEXT: shlq $32, %rdx
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: bsfq %rdx, %rcx
; AVX1-NEXT: movl $64, %eax
; AVX1-NEXT: cmovneq %rcx, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
Expand Down
Loading
Loading