|  | 
| 7 | 7 | #![stable(feature = "rust1", since = "1.0.0")] | 
| 8 | 8 | 
 | 
| 9 | 9 | use crate::cmp::Ordering::{self, Equal, Greater, Less}; | 
| 10 |  | -use crate::intrinsics::{exact_div, unchecked_sub}; | 
|  | 10 | +use crate::intrinsics::{exact_div, select_unpredictable, unchecked_sub}; | 
| 11 | 11 | use crate::mem::{self, SizedTypeProperties}; | 
| 12 | 12 | use crate::num::NonZero; | 
| 13 | 13 | use crate::ops::{Bound, OneSidedRange, Range, RangeBounds}; | 
| @@ -2770,41 +2770,54 @@ impl<T> [T] { | 
| 2770 | 2770 |     where | 
| 2771 | 2771 |         F: FnMut(&'a T) -> Ordering, | 
| 2772 | 2772 |     { | 
| 2773 |  | -        // INVARIANTS: | 
| 2774 |  | -        // - 0 <= left <= left + size = right <= self.len() | 
| 2775 |  | -        // - f returns Less for everything in self[..left] | 
| 2776 |  | -        // - f returns Greater for everything in self[right..] | 
| 2777 | 2773 |         let mut size = self.len(); | 
| 2778 |  | -        let mut left = 0; | 
| 2779 |  | -        let mut right = size; | 
| 2780 |  | -        while left < right { | 
| 2781 |  | -            let mid = left + size / 2; | 
| 2782 |  | - | 
| 2783 |  | -            // SAFETY: the while condition means `size` is strictly positive, so | 
| 2784 |  | -            // `size/2 < size`. Thus `left + size/2 < left + size`, which | 
| 2785 |  | -            // coupled with the `left + size <= self.len()` invariant means | 
| 2786 |  | -            // we have `left + size/2 < self.len()`, and this is in-bounds. | 
|  | 2774 | +        if size == 0 { | 
|  | 2775 | +            return Err(0); | 
|  | 2776 | +        } | 
|  | 2777 | +        let mut base = 0usize; | 
|  | 2778 | + | 
|  | 2779 | +        // This loop intentionally doesn't have an early exit if the comparison | 
|  | 2780 | +        // returns Equal. We want the number of loop iterations to depend *only* | 
|  | 2781 | +        // on the size of the input slice so that the CPU can reliably predict | 
|  | 2782 | +        // the loop count. | 
|  | 2783 | +        while size > 1 { | 
|  | 2784 | +            let half = size / 2; | 
|  | 2785 | +            let mid = base + half; | 
|  | 2786 | + | 
|  | 2787 | +            // SAFETY: the call is made safe by the following inconstants: | 
|  | 2788 | +            // - `mid >= 0`: by definition | 
|  | 2789 | +            // - `mid < size`: `mid = size / 2 + size / 4 + size / 8 ...` | 
| 2787 | 2790 |             let cmp = f(unsafe { self.get_unchecked(mid) }); | 
| 2788 | 2791 | 
 | 
| 2789 |  | -            // This control flow produces conditional moves, which results in | 
| 2790 |  | -            // fewer branches and instructions than if/else or matching on | 
| 2791 |  | -            // cmp::Ordering. | 
| 2792 |  | -            // This is x86 asm for u8: https://rust.godbolt.org/z/698eYffTx. | 
| 2793 |  | -            left = if cmp == Less { mid + 1 } else { left }; | 
| 2794 |  | -            right = if cmp == Greater { mid } else { right }; | 
| 2795 |  | -            if cmp == Equal { | 
| 2796 |  | -                // SAFETY: same as the `get_unchecked` above | 
| 2797 |  | -                unsafe { hint::assert_unchecked(mid < self.len()) }; | 
| 2798 |  | -                return Ok(mid); | 
| 2799 |  | -            } | 
| 2800 |  | - | 
| 2801 |  | -            size = right - left; | 
|  | 2792 | +            // Binary search interacts poorly with branch prediction, so force | 
|  | 2793 | +            // the compiler to use conditional moves if supported by the target | 
|  | 2794 | +            // architecture. | 
|  | 2795 | +            base = select_unpredictable(cmp == Greater, base, mid); | 
|  | 2796 | + | 
|  | 2797 | +            // This is imprecise in the case where `size` is odd and the | 
|  | 2798 | +            // comparison returns Greater: the mid element still gets included | 
|  | 2799 | +            // by `size` even though it's known to be larger than the element | 
|  | 2800 | +            // being searched for. | 
|  | 2801 | +            // | 
|  | 2802 | +            // This is fine though: we gain more performance by keeping the | 
|  | 2803 | +            // loop iteration count invariant (and thus predictable) than we | 
|  | 2804 | +            // lose from considering one additional element. | 
|  | 2805 | +            size -= half; | 
| 2802 | 2806 |         } | 
| 2803 | 2807 | 
 | 
| 2804 |  | -        // SAFETY: directly true from the overall invariant. | 
| 2805 |  | -        // Note that this is `<=`, unlike the assume in the `Ok` path. | 
| 2806 |  | -        unsafe { hint::assert_unchecked(left <= self.len()) }; | 
| 2807 |  | -        Err(left) | 
|  | 2808 | +        // SAFETY: base is always in [0, size) because base <= mid. | 
|  | 2809 | +        let cmp = f(unsafe { self.get_unchecked(base) }); | 
|  | 2810 | +        if cmp == Equal { | 
|  | 2811 | +            // SAFETY: same as the `get_unchecked` above. | 
|  | 2812 | +            unsafe { hint::assert_unchecked(base < self.len()) }; | 
|  | 2813 | +            Ok(base) | 
|  | 2814 | +        } else { | 
|  | 2815 | +            let result = base + (cmp == Less) as usize; | 
|  | 2816 | +            // SAFETY: same as the `get_unchecked` above. | 
|  | 2817 | +            // Note that this is `<=`, unlike the assume in the `Ok` path. | 
|  | 2818 | +            unsafe { hint::assert_unchecked(result <= self.len()) }; | 
|  | 2819 | +            Err(result) | 
|  | 2820 | +        } | 
| 2808 | 2821 |     } | 
| 2809 | 2822 | 
 | 
| 2810 | 2823 |     /// Binary searches this slice with a key extraction function. | 
|  | 
0 commit comments