You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Auto merge of #127007 - krtab:improv_binary_search, r=<try>
Improve slice::binary_search_by
This PR aims to improve the performances of std::slice::binary_search.
**EDIT: The proposed implementation changed so the rest of this comment is outdated. See #127007 (comment) for an up to date presentation of the PR.**
It reduces the total instruction count for the `u32` monomorphization, but maybe more remarkably, removes 2 of the 12 instructions of the main loop (on x86).
It changes `test_binary_search_implementation_details()` so may warrant a crater run.
I will document it much more if this is shown to be interesting on benchmarks. Could we start with a timer run first?
**Before the PR**
```asm
mov eax, 1
test rsi, rsi
je .LBB0_1
mov rcx, rdx
mov rdx, rsi
mov ecx, dword ptr [rcx]
xor esi, esi
mov r8, rdx
.LBB0_3:
shr rdx
add rdx, rsi
mov r9d, dword ptr [rdi + 4*rdx]
cmp r9d, ecx
je .LBB0_4
lea r10, [rdx + 1]
cmp r9d, ecx
cmova r8, rdx
cmovb rsi, r10
mov rdx, r8
sub rdx, rsi
ja .LBB0_3
mov rdx, rsi
ret
.LBB0_1:
xor edx, edx
ret
.LBB0_4:
xor eax, eax
ret
```
**After the PR**
```asm
mov ecx, dword ptr [rdx]
xor eax, eax
xor edx, edx
.LBB1_1:
cmp rsi, 1
jbe .LBB1_2
mov r9, rsi
shr r9
lea r8, [r9 + rdx]
sub rsi, r9
cmp dword ptr [rdi + 4*r8], ecx
cmovb rdx, r8
cmova rsi, r9
jne .LBB1_1
mov rdx, r8
ret
.LBB1_2:
test rsi, rsi
je .LBB1_3
xor eax, eax
cmp dword ptr [rdi + 4*rdx], ecx
setne al
adc rdx, 0
ret
.LBB1_3:
mov eax, 1
ret
```
0 commit comments