-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86] Add basic computeKnownBits support for X86ISD::BSR #102474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Resurrect https://reviews.llvm.org/D89214 The behaviour is undefined for an input of 0, otherwise the result is the position of the most significant set bit which must be in the range [0, bitwidth-1]. So any bits above log2 of bitwidth must be 0. Fixes llvm#74101
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesResurrect https://reviews.llvm.org/D89214 (by @topperc) The behaviour is undefined for an input of 0, otherwise the result is the position of the most significant set bit which must be in the range [0, bitwidth-1]. So any bits above log2 of bitwidth must be 0. Fixes #74101 Full diff: https://github.com/llvm/llvm-project/pull/102474.diff 10 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2891e21be1b26..89a2e84781e77 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37249,6 +37249,12 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known = KnownBits::mul(Known, Known2);
break;
}
+ case X86ISD::BSR:
+ // BSR(0) is undef, but any use of BSR already accounts for non-zero inputs.
+ // Similar KnownBits behaviour to CTLZ_ZERO_UNDEF.
+ // TODO: Bound with input known bits?
+ Known.Zero.setBitsFrom(Log2_32(BitWidth));
+ break;
case X86ISD::SETCC:
Known.Zero.setBitsFrom(1);
break;
diff --git a/llvm/test/CodeGen/X86/combine-sub.ll b/llvm/test/CodeGen/X86/combine-sub.ll
index 3123efc306360..f16b85eaa195d 100644
--- a/llvm/test/CodeGen/X86/combine-sub.ll
+++ b/llvm/test/CodeGen/X86/combine-sub.ll
@@ -453,13 +453,12 @@ define void @PR52032_4(ptr %p, ptr %q) {
ret void
}
-; FIXME: Failure to fold add(xor(bsr(x),-32),33) -> add(or(bsr(x),-32),33) -> add(bsr(x),1)
+; Fold sub(32,xor(bsr(x),31)) -> add(xor(bsr(x),-32),33) -> add(or(bsr(x),-32),33) -> add(bsr(x),1)
define i32 @PR74101(i32 %a0) {
; CHECK-LABEL: PR74101:
; CHECK: # %bb.0:
; CHECK-NEXT: bsrl %edi, %eax
-; CHECK-NEXT: xorl $-32, %eax
-; CHECK-NEXT: addl $33, %eax
+; CHECK-NEXT: incl %eax
; CHECK-NEXT: retq
%lz = call i32 @llvm.ctlz.i32(i32 %a0, i1 true)
%add = sub nuw nsw i32 32, %lz
diff --git a/llvm/test/CodeGen/X86/ctlo.ll b/llvm/test/CodeGen/X86/ctlo.ll
index 7431f94f0fdf2..bb80279e28f3d 100644
--- a/llvm/test/CodeGen/X86/ctlo.ll
+++ b/llvm/test/CodeGen/X86/ctlo.ll
@@ -364,7 +364,7 @@ define i64 @ctlo_i64_undef(i64 %x) {
; X86-NOCMOV-NEXT: notl %eax
; X86-NOCMOV-NEXT: bsrl %eax, %eax
; X86-NOCMOV-NEXT: xorl $31, %eax
-; X86-NOCMOV-NEXT: addl $32, %eax
+; X86-NOCMOV-NEXT: orl $32, %eax
; X86-NOCMOV-NEXT: xorl %edx, %edx
; X86-NOCMOV-NEXT: retl
; X86-NOCMOV-NEXT: .LBB7_1:
@@ -383,7 +383,7 @@ define i64 @ctlo_i64_undef(i64 %x) {
; X86-CMOV-NEXT: xorl $31, %edx
; X86-CMOV-NEXT: bsrl %eax, %eax
; X86-CMOV-NEXT: xorl $31, %eax
-; X86-CMOV-NEXT: addl $32, %eax
+; X86-CMOV-NEXT: orl $32, %eax
; X86-CMOV-NEXT: testl %ecx, %ecx
; X86-CMOV-NEXT: cmovnel %edx, %eax
; X86-CMOV-NEXT: xorl %edx, %edx
diff --git a/llvm/test/CodeGen/X86/ctlz.ll b/llvm/test/CodeGen/X86/ctlz.ll
index 87dca62d74168..d8f83502bd849 100644
--- a/llvm/test/CodeGen/X86/ctlz.ll
+++ b/llvm/test/CodeGen/X86/ctlz.ll
@@ -145,7 +145,7 @@ define i64 @ctlz_i64(i64 %x) {
; X86-NOCMOV-NEXT: # %bb.2:
; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: xorl $31, %eax
-; X86-NOCMOV-NEXT: addl $32, %eax
+; X86-NOCMOV-NEXT: orl $32, %eax
; X86-NOCMOV-NEXT: xorl %edx, %edx
; X86-NOCMOV-NEXT: retl
; X86-NOCMOV-NEXT: .LBB3_1:
@@ -161,7 +161,7 @@ define i64 @ctlz_i64(i64 %x) {
; X86-CMOV-NEXT: xorl $31, %edx
; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: xorl $31, %eax
-; X86-CMOV-NEXT: addl $32, %eax
+; X86-CMOV-NEXT: orl $32, %eax
; X86-CMOV-NEXT: testl %ecx, %ecx
; X86-CMOV-NEXT: cmovnel %edx, %eax
; X86-CMOV-NEXT: xorl %edx, %edx
@@ -1126,7 +1126,7 @@ define i64 @ctlz_xor63_i64_true(i64 %x) {
; X86-NOCMOV-NEXT: # %bb.2:
; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: xorl $31, %eax
-; X86-NOCMOV-NEXT: addl $32, %eax
+; X86-NOCMOV-NEXT: orl $32, %eax
; X86-NOCMOV-NEXT: jmp .LBB19_3
; X86-NOCMOV-NEXT: .LBB19_1:
; X86-NOCMOV-NEXT: bsrl %eax, %eax
@@ -1143,7 +1143,7 @@ define i64 @ctlz_xor63_i64_true(i64 %x) {
; X86-CMOV-NEXT: xorl $31, %edx
; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: xorl $31, %eax
-; X86-CMOV-NEXT: addl $32, %eax
+; X86-CMOV-NEXT: orl $32, %eax
; X86-CMOV-NEXT: testl %ecx, %ecx
; X86-CMOV-NEXT: cmovnel %edx, %eax
; X86-CMOV-NEXT: xorl $63, %eax
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
index 1c303de55c95d..1d3b015f3c547 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -234,7 +234,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: xorl $31, %edx
; X86-NEXT: bsrl %ebx, %ecx
; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: addl $32, %ecx
+; X86-NEXT: orl $32, %ecx
; X86-NEXT: testl %esi, %esi
; X86-NEXT: cmovnel %edx, %ecx
; X86-NEXT: bsrl %ebp, %edx
@@ -242,11 +242,11 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: bsrl %edi, %edi
; X86-NEXT: xorl $31, %edi
-; X86-NEXT: addl $32, %edi
+; X86-NEXT: orl $32, %edi
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: testl %ebp, %ebp
; X86-NEXT: cmovnel %edx, %edi
-; X86-NEXT: addl $64, %edi
+; X86-NEXT: orl $64, %edi
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebx, %edx
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -258,7 +258,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-NEXT: bsrl %ebp, %ecx
; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: addl $32, %ecx
+; X86-NEXT: orl $32, %ecx
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: cmovnel %edx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -266,10 +266,10 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: xorl $31, %esi
; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: addl $32, %edx
+; X86-NEXT: orl $32, %edx
; X86-NEXT: testl %eax, %eax
; X86-NEXT: cmovnel %esi, %edx
-; X86-NEXT: addl $64, %edx
+; X86-NEXT: orl $64, %edx
; X86-NEXT: movl %ebp, %esi
; X86-NEXT: orl %ebx, %esi
; X86-NEXT: cmovnel %ecx, %edx
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index fa45afbb634c4..58ea70e58028f 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -199,7 +199,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: xorl $31, %edx
; X86-NEXT: bsrl %esi, %ecx
; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: addl $32, %ecx
+; X86-NEXT: orl $32, %ecx
; X86-NEXT: testl %edi, %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: cmovnel %edx, %ecx
@@ -210,10 +210,10 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %esi, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: xorl $31, %ebp
-; X86-NEXT: addl $32, %ebp
+; X86-NEXT: orl $32, %ebp
; X86-NEXT: testl %eax, %eax
; X86-NEXT: cmovnel %edx, %ebp
-; X86-NEXT: addl $64, %ebp
+; X86-NEXT: orl $64, %ebp
; X86-NEXT: movl %edi, %edx
; X86-NEXT: orl %ebx, %edx
; X86-NEXT: cmovnel %ecx, %ebp
@@ -223,7 +223,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bsrl %eax, %ecx
; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: addl $32, %ecx
+; X86-NEXT: orl $32, %ecx
; X86-NEXT: testl %esi, %esi
; X86-NEXT: cmovnel %edx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -231,10 +231,10 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: xorl $31, %esi
; X86-NEXT: bsrl {{[0-9]+}}(%esp), %edx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: addl $32, %edx
+; X86-NEXT: orl $32, %edx
; X86-NEXT: testl %edi, %edi
; X86-NEXT: cmovnel %esi, %edx
-; X86-NEXT: addl $64, %edx
+; X86-NEXT: orl $64, %edx
; X86-NEXT: orl %ebx, %eax
; X86-NEXT: cmovnel %ecx, %edx
; X86-NEXT: subl %edx, %ebp
diff --git a/llvm/test/CodeGen/X86/lzcnt-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
index 6c8d5c9d55a6d..a9513a373661f 100644
--- a/llvm/test/CodeGen/X86/lzcnt-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
@@ -19,13 +19,13 @@ define i1 @lshr_ctlz_cmpeq_one_i64(i64 %in) nounwind {
; X64-BSR-NEXT: # %bb.2: # %cond.false
; X64-BSR-NEXT: bsrq %rdi, %rax
; X64-BSR-NEXT: xorq $63, %rax
-; X64-BSR-NEXT: jmp .LBB0_3
+; X64-BSR-NEXT: shrl $6, %eax
+; X64-BSR-NEXT: # kill: def $al killed $al killed $rax
+; X64-BSR-NEXT: retq
; X64-BSR-NEXT: .LBB0_1:
; X64-BSR-NEXT: movl $64, %eax
-; X64-BSR-NEXT: .LBB0_3: # %cond.end
-; X64-BSR-NEXT: shrq $6, %rax
-; X64-BSR-NEXT: cmpq $1, %rax
-; X64-BSR-NEXT: sete %al
+; X64-BSR-NEXT: shrl $6, %eax
+; X64-BSR-NEXT: # kill: def $al killed $al killed $rax
; X64-BSR-NEXT: retq
;
; X64-LZCNT-LABEL: lshr_ctlz_cmpeq_one_i64:
@@ -43,15 +43,6 @@ define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) nounwind {
; X86-BSR-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
; X86-BSR: # %bb.0:
; X86-BSR-NEXT: xorl %eax, %eax
-; X86-BSR-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-BSR-NEXT: jne .LBB1_2
-; X86-BSR-NEXT: # %bb.1:
-; X86-BSR-NEXT: bsrl {{[0-9]+}}(%esp), %eax
-; X86-BSR-NEXT: xorl $31, %eax
-; X86-BSR-NEXT: addl $32, %eax
-; X86-BSR-NEXT: .LBB1_2:
-; X86-BSR-NEXT: shrl $6, %eax
-; X86-BSR-NEXT: # kill: def $al killed $al killed $eax
; X86-BSR-NEXT: retl
;
; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
@@ -67,9 +58,7 @@ define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) nounwind {
;
; X64-BSR-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
; X64-BSR: # %bb.0:
-; X64-BSR-NEXT: bsrq %rdi, %rax
-; X64-BSR-NEXT: shrl $6, %eax
-; X64-BSR-NEXT: # kill: def $al killed $al killed $rax
+; X64-BSR-NEXT: xorl %eax, %eax
; X64-BSR-NEXT: retq
;
; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
@@ -99,12 +88,13 @@ define i1 @lshr_ctlz_cmpne_zero_i64(i64 %in) nounwind {
; X64-BSR-NEXT: # %bb.2: # %cond.false
; X64-BSR-NEXT: bsrq %rdi, %rax
; X64-BSR-NEXT: xorq $63, %rax
-; X64-BSR-NEXT: jmp .LBB2_3
+; X64-BSR-NEXT: shrl $6, %eax
+; X64-BSR-NEXT: # kill: def $al killed $al killed $rax
+; X64-BSR-NEXT: retq
; X64-BSR-NEXT: .LBB2_1:
; X64-BSR-NEXT: movl $64, %eax
-; X64-BSR-NEXT: .LBB2_3: # %cond.end
-; X64-BSR-NEXT: testq $-64, %rax
-; X64-BSR-NEXT: setne %al
+; X64-BSR-NEXT: shrl $6, %eax
+; X64-BSR-NEXT: # kill: def $al killed $al killed $rax
; X64-BSR-NEXT: retq
;
; X64-LZCNT-LABEL: lshr_ctlz_cmpne_zero_i64:
@@ -122,15 +112,6 @@ define i1 @lshr_ctlz_undef_cmpne_zero_i64(i64 %in) nounwind {
; X86-BSR-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
; X86-BSR: # %bb.0:
; X86-BSR-NEXT: xorl %eax, %eax
-; X86-BSR-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-BSR-NEXT: jne .LBB3_2
-; X86-BSR-NEXT: # %bb.1:
-; X86-BSR-NEXT: bsrl {{[0-9]+}}(%esp), %eax
-; X86-BSR-NEXT: xorl $31, %eax
-; X86-BSR-NEXT: addl $32, %eax
-; X86-BSR-NEXT: .LBB3_2:
-; X86-BSR-NEXT: shrl $6, %eax
-; X86-BSR-NEXT: # kill: def $al killed $al killed $eax
; X86-BSR-NEXT: retl
;
; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
@@ -146,9 +127,7 @@ define i1 @lshr_ctlz_undef_cmpne_zero_i64(i64 %in) nounwind {
;
; X64-BSR-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
; X64-BSR: # %bb.0:
-; X64-BSR-NEXT: bsrq %rdi, %rax
-; X64-BSR-NEXT: shrl $6, %eax
-; X64-BSR-NEXT: # kill: def $al killed $al killed $rax
+; X64-BSR-NEXT: xorl %eax, %eax
; X64-BSR-NEXT: retq
;
; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
diff --git a/llvm/test/CodeGen/X86/pr38539.ll b/llvm/test/CodeGen/X86/pr38539.ll
index e710d3f95e6f4..6fcebdb5116dd 100644
--- a/llvm/test/CodeGen/X86/pr38539.ll
+++ b/llvm/test/CodeGen/X86/pr38539.ll
@@ -52,7 +52,7 @@ define void @f() nounwind {
; X86-NEXT: # %bb.2: # %BB_udiv-special-cases
; X86-NEXT: bsrl %edx, %eax
; X86-NEXT: xorl $31, %eax
-; X86-NEXT: addl $32, %eax
+; X86-NEXT: orl $32, %eax
; X86-NEXT: jmp .LBB0_3
; X86-NEXT: .LBB0_1:
; X86-NEXT: bsrl %ecx, %eax
diff --git a/llvm/test/CodeGen/X86/pr40090.ll b/llvm/test/CodeGen/X86/pr40090.ll
index d1c38e4104d79..f0aaf09e359dd 100644
--- a/llvm/test/CodeGen/X86/pr40090.ll
+++ b/llvm/test/CodeGen/X86/pr40090.ll
@@ -5,7 +5,7 @@ define i64 @foo(i64 %x, i64 %y) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: bsrq %rdi, %rax
-; CHECK-NEXT: xorq $64, %rax
+; CHECK-NEXT: orq $64, %rax
; CHECK-NEXT: bsrq %rsi, %rcx
; CHECK-NEXT: cmoveq %rax, %rcx
; CHECK-NEXT: movl $63, %eax
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index 785b97d8c2402..df3c25a8c42ad 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -236,7 +236,7 @@ define i256 @test2(i256 %a) nounwind {
; ILP-NEXT: andq %rcx, %r9
; ILP-NEXT: bsrq %r9, %rcx
; ILP-NEXT: xorq $63, %rcx
-; ILP-NEXT: addq $64, %rcx
+; ILP-NEXT: orq $64, %rcx
; ILP-NEXT: testq %rdi, %rdi
; ILP-NEXT: cmovneq %r8, %rcx
; ILP-NEXT: xorq $63, %rdx
@@ -274,7 +274,7 @@ define i256 @test2(i256 %a) nounwind {
; HYBRID-NEXT: andq %rcx, %r9
; HYBRID-NEXT: bsrq %r9, %rcx
; HYBRID-NEXT: xorq $63, %rcx
-; HYBRID-NEXT: addq $64, %rcx
+; HYBRID-NEXT: orq $64, %rcx
; HYBRID-NEXT: testq %rdi, %rdi
; HYBRID-NEXT: cmovneq %r8, %rcx
; HYBRID-NEXT: andq %rdx, %r10
@@ -314,7 +314,7 @@ define i256 @test2(i256 %a) nounwind {
; BURR-NEXT: andq %rcx, %r9
; BURR-NEXT: bsrq %r9, %rcx
; BURR-NEXT: xorq $63, %rcx
-; BURR-NEXT: addq $64, %rcx
+; BURR-NEXT: orq $64, %rcx
; BURR-NEXT: testq %rdi, %rdi
; BURR-NEXT: cmovneq %r8, %rcx
; BURR-NEXT: andq %rdx, %r10
@@ -354,7 +354,7 @@ define i256 @test2(i256 %a) nounwind {
; SRC-NEXT: xorq $63, %rcx
; SRC-NEXT: bsrq %r9, %rdx
; SRC-NEXT: xorq $63, %rdx
-; SRC-NEXT: addq $64, %rdx
+; SRC-NEXT: orq $64, %rdx
; SRC-NEXT: testq %rdi, %rdi
; SRC-NEXT: cmovneq %rcx, %rdx
; SRC-NEXT: bsrq %r10, %rcx
@@ -402,7 +402,7 @@ define i256 @test2(i256 %a) nounwind {
; LIN-NEXT: andq %rcx, %rdi
; LIN-NEXT: bsrq %rdi, %rcx
; LIN-NEXT: xorq $63, %rcx
-; LIN-NEXT: addq $64, %rcx
+; LIN-NEXT: orq $64, %rcx
; LIN-NEXT: sbbq %r8, %rsi
; LIN-NEXT: andq %r8, %rsi
; LIN-NEXT: bsrq %rsi, %r8
@@ -446,7 +446,7 @@ define i256 @test3(i256 %n) nounwind {
; ILP-NEXT: andq %r11, %rcx
; ILP-NEXT: bsrq %rcx, %r10
; ILP-NEXT: xorq $63, %r10
-; ILP-NEXT: addq $64, %r10
+; ILP-NEXT: orq $64, %r10
; ILP-NEXT: notq %rsi
; ILP-NEXT: testq %r8, %r8
; ILP-NEXT: cmovneq %rbx, %r10
@@ -489,7 +489,7 @@ define i256 @test3(i256 %n) nounwind {
; HYBRID-NEXT: andq %r11, %rcx
; HYBRID-NEXT: bsrq %rcx, %r9
; HYBRID-NEXT: xorq $63, %r9
-; HYBRID-NEXT: addq $64, %r9
+; HYBRID-NEXT: orq $64, %r9
; HYBRID-NEXT: testq %r8, %r8
; HYBRID-NEXT: cmovneq %rbx, %r9
; HYBRID-NEXT: notq %rdx
@@ -535,7 +535,7 @@ define i256 @test3(i256 %n) nounwind {
; BURR-NEXT: andq %r11, %rcx
; BURR-NEXT: bsrq %rcx, %r9
; BURR-NEXT: xorq $63, %r9
-; BURR-NEXT: addq $64, %r9
+; BURR-NEXT: orq $64, %r9
; BURR-NEXT: testq %r8, %r8
; BURR-NEXT: cmovneq %rbx, %r9
; BURR-NEXT: notq %rdx
@@ -582,7 +582,7 @@ define i256 @test3(i256 %n) nounwind {
; SRC-NEXT: xorq $63, %rdi
; SRC-NEXT: bsrq %rcx, %r9
; SRC-NEXT: xorq $63, %r9
-; SRC-NEXT: addq $64, %r9
+; SRC-NEXT: orq $64, %r9
; SRC-NEXT: testq %r8, %r8
; SRC-NEXT: cmovneq %rdi, %r9
; SRC-NEXT: bsrq %rdx, %rdi
@@ -633,7 +633,7 @@ define i256 @test3(i256 %n) nounwind {
; LIN-NEXT: andq %rdx, %rcx
; LIN-NEXT: bsrq %rcx, %rdx
; LIN-NEXT: xorq $63, %rdx
-; LIN-NEXT: addq $64, %rdx
+; LIN-NEXT: orq $64, %rdx
; LIN-NEXT: sbbq %r8, %rdi
; LIN-NEXT: notq %r8
; LIN-NEXT: andq %rdi, %r8
@@ -764,16 +764,16 @@ define i256 @PR25498(i256 %a) nounwind {
; ILP-NEXT: xorq $63, %rcx
; ILP-NEXT: bsrq %r9, %rsi
; ILP-NEXT: xorq $63, %rsi
-; ILP-NEXT: addq $64, %rsi
+; ILP-NEXT: orq $64, %rsi
; ILP-NEXT: testq %r10, %r10
; ILP-NEXT: cmovneq %rcx, %rsi
; ILP-NEXT: xorq $63, %rdx
; ILP-NEXT: bsrq %rbx, %rcx
; ILP-NEXT: xorq $63, %rcx
-; ILP-NEXT: addq $64, %rcx
+; ILP-NEXT: orq $64, %rcx
; ILP-NEXT: testq %r11, %r11
; ILP-NEXT: cmovneq %rdx, %rcx
-; ILP-NEXT: subq $-128, %rcx
+; ILP-NEXT: orq $128, %rcx
; ILP-NEXT: xorl %edi, %edi
; ILP-NEXT: orq %r10, %r9
; ILP-NEXT: cmovneq %rsi, %rcx
@@ -810,17 +810,17 @@ define i256 @PR25498(i256 %a) nounwind {
; HYBRID-NEXT: xorq $63, %rcx
; HYBRID-NEXT: bsrq %r9, %rdx
; HYBRID-NEXT: xorq $63, %rdx
-; HYBRID-NEXT: addq $64, %rdx
+; HYBRID-NEXT: orq $64, %rdx
; HYBRID-NEXT: testq %r10, %r10
; HYBRID-NEXT: cmovneq %rcx, %rdx
; HYBRID-NEXT: bsrq %r11, %rsi
; HYBRID-NEXT: xorq $63, %rsi
; HYBRID-NEXT: bsrq %rbx, %rcx
; HYBRID-NEXT: xorq $63, %rcx
-; HYBRID-NEXT: addq $64, %rcx
+; HYBRID-NEXT: orq $64, %rcx
; HYBRID-NEXT: testq %r11, %r11
; HYBRID-NEXT: cmovneq %rsi, %rcx
-; HYBRID-NEXT: subq $-128, %rcx
+; HYBRID-NEXT: orq $128, %rcx
; HYBRID-NEXT: orq %r10, %r9
; HYBRID-NEXT: cmovneq %rdx, %rcx
; HYBRID-NEXT: xorl %edi, %edi
@@ -857,17 +857,17 @@ define i256 @PR25498(i256 %a) nounwind {
; BURR-NEXT: xorq $63, %rcx
; BURR-NEXT: bsrq %r9, %rdx
; BURR-NEXT: xorq $63, %rdx
-; BURR-NEXT: addq $64, %rdx
+; BURR-NEXT: orq $64, %rdx
; BURR-NEXT: testq %r10, %r10
; BURR-NEXT: cmovneq %rcx, %rdx
; BURR-NEXT: bsrq %r11, %rsi
; BURR-NEXT: xorq $63, %rsi
; BURR-NEXT: bsrq %rbx, %rcx
; BURR-NEXT: xorq $63, %rcx
-; BURR-NEXT: addq $64, %rcx
+; BURR-NEXT: orq $64, %rcx
; BURR-NEXT: testq %r11, %r11
; BURR-NEXT: cmovneq %rsi, %rcx
-; BURR-NEXT: subq $-128, %rcx
+; BURR-NEXT: orq $128, %rcx
; BURR-NEXT: orq %r10, %r9
; BURR-NEXT: cmovneq %rdx, %rcx
; BURR-NEXT: xorl %edi, %edi
@@ -904,17 +904,17 @@ define i256 @PR25498(i256 %a) nounwind {
; SRC-NEXT: xorq $63, %rcx
; SRC-NEXT: bsrq %r9, %rdx
; SRC-NEXT: xorq $63, %rdx
-; SRC-NEXT: addq $64, %rdx
+; SRC-NEXT: orq $64, %rdx
; SRC-NEXT: testq %r10, %r10
; SRC-NEXT: cmovneq %rcx, %rdx
; SRC-NEXT: bsrq %r11, %rsi
; SRC-NEXT: xorq $63, %rsi
; SRC-NEXT: bsrq %rbx, %rcx
; SRC-NEXT: xorq $63, %rcx
-; SRC-NEXT: addq $64, %rcx
+; SRC-NEXT: orq $64, %rcx
; SRC-NEXT: testq %r11, %r11
; SRC-NEXT: cmovneq %rsi, %rcx
-; SRC-NEXT: subq $-128, %rcx
+; SRC-NEXT: orq $128, %rcx
; SRC-NEXT: orq %r10, %r9
; SRC-NEXT: cmovneq %rdx, %rcx
; SRC-NEXT: xorl %edi, %edi
@@ -949,15 +949,15 @@ define i256 @PR25498(i256 %a) nounwind {
; LIN-NEXT: # %bb.2: # %cond.false
; LIN-NEXT: bsrq %rbx, %rcx
; LIN-NEXT: xorq $63, %rcx
-; LIN-NEXT: addq $64, %rcx
+; LIN-NEXT: orq $64, %rcx
; LIN-NEXT: bsrq %r11, %rdx
; LIN-NEXT: xorq $63, %rdx
; LIN-NEXT: testq %r11, %r11
; LIN-NEXT: cmoveq %rcx, %rdx
-; LIN-NEXT: subq $-128, %rdx
+; LIN-NEXT: orq $128, %rdx
; LIN-NEXT: bsrq %r9, %rsi
; LIN-NEXT: xorq $63, %rsi
-; LIN-NEXT: addq $64, %rsi
+; LIN-NEXT: orq $64, %rsi
; LIN-NEXT: bsrq %r10, %rcx
; LIN-NEXT: xorq $63, %rcx
; LIN-NEXT: testq %r10, %r10
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Resurrect https://reviews.llvm.org/D89214 (by @topperc)
The behaviour is undefined for an input of 0, otherwise the result is the position of the most significant set bit which must be in the range [0, bitwidth-1]. So any bits above log2 of bitwidth must be 0.
Fixes #74101