Skip to content

Commit 89b8a97

Browse files
committed
Refactor memchr to allow optimization
1 parent 5b9e886 commit 89b8a97

File tree

4 files changed

+89
-16
lines changed

4 files changed

+89
-16
lines changed

library/core/src/slice/cmp.rs

+2
Original file line numberDiff line numberDiff line change
@@ -268,12 +268,14 @@ where
268268
}
269269

270270
impl SliceContains for u8 {
271+
#[inline]
271272
fn slice_contains(&self, x: &[Self]) -> bool {
272273
memchr::memchr(*self, x).is_some()
273274
}
274275
}
275276

276277
impl SliceContains for i8 {
278+
#[inline]
277279
fn slice_contains(&self, x: &[Self]) -> bool {
278280
let byte = *self as u8;
279281
// SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()`

library/core/src/slice/memchr.rs

+24-16
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ const HI_U64: u64 = 0x8080808080808080;
1212
// Use truncation.
1313
const LO_USIZE: usize = LO_U64 as usize;
1414
const HI_USIZE: usize = HI_U64 as usize;
15+
const USIZE_BYTES: usize = mem::size_of::<usize>();
1516

1617
/// Returns `true` if `x` contains any zero byte.
1718
///
@@ -38,19 +39,29 @@ fn repeat_byte(b: u8) -> usize {
3839
}
3940

4041
/// Returns the first index matching the byte `x` in `text`.
42+
#[inline]
4143
pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
44+
// Fast path for small slices
45+
if text.len() < 2 * USIZE_BYTES {
46+
return text.iter().position(|elt| *elt == x);
47+
}
48+
49+
memchr_general_case(x, text)
50+
}
51+
52+
fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
4253
// Scan for a single byte value by reading two `usize` words at a time.
4354
//
4455
// Split `text` in three parts
4556
// - unaligned initial part, before the first word aligned address in text
4657
// - body, scan by 2 words at a time
4758
// - the last remaining part, < 2 word size
59+
60+
// search up to an aligned boundary
4861
let len = text.len();
4962
let ptr = text.as_ptr();
50-
let usize_bytes = mem::size_of::<usize>();
63+
let mut offset = ptr.align_offset(USIZE_BYTES);
5164

52-
// search up to an aligned boundary
53-
let mut offset = ptr.align_offset(usize_bytes);
5465
if offset > 0 {
5566
offset = cmp::min(offset, len);
5667
if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
@@ -60,22 +71,19 @@ pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
6071

6172
// search the body of the text
6273
let repeated_x = repeat_byte(x);
74+
while offset <= len - 2 * USIZE_BYTES {
75+
unsafe {
76+
let u = *(ptr.add(offset) as *const usize);
77+
let v = *(ptr.add(offset + USIZE_BYTES) as *const usize);
6378

64-
if len >= 2 * usize_bytes {
65-
while offset <= len - 2 * usize_bytes {
66-
unsafe {
67-
let u = *(ptr.add(offset) as *const usize);
68-
let v = *(ptr.add(offset + usize_bytes) as *const usize);
69-
70-
// break if there is a matching byte
71-
let zu = contains_zero_byte(u ^ repeated_x);
72-
let zv = contains_zero_byte(v ^ repeated_x);
73-
if zu || zv {
74-
break;
75-
}
79+
// break if there is a matching byte
80+
let zu = contains_zero_byte(u ^ repeated_x);
81+
let zv = contains_zero_byte(v ^ repeated_x);
82+
if zu || zv {
83+
break;
7684
}
77-
offset += usize_bytes * 2;
7885
}
86+
offset += USIZE_BYTES * 2;
7987
}
8088

8189
// Find the byte after the point the body loop stopped.

library/core/src/slice/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1633,6 +1633,7 @@ impl<T> [T] {
16331633
/// assert!(!v.iter().any(|e| e == "hi"));
16341634
/// ```
16351635
#[stable(feature = "rust1", since = "1.0.0")]
1636+
#[inline]
16361637
pub fn contains(&self, x: &T) -> bool
16371638
where
16381639
T: PartialEq,

src/test/codegen/issue-75659.rs

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// This test checks that the call to memchr/slice_contains is optimized away
2+
// when searching in small slices.
3+
4+
// compile-flags: -O
5+
6+
#![crate_type = "lib"]
7+
8+
// CHECK-LABEL: @foo1
9+
#[no_mangle]
10+
pub fn foo1(x: u8, data: &[u8; 1]) -> bool {
11+
// CHECK-NOT: memchr
12+
// CHECK-NOT: slice_contains
13+
data.contains(&x)
14+
}
15+
16+
// CHECK-LABEL: @foo2
17+
#[no_mangle]
18+
pub fn foo2(x: u8, data: &[u8; 2]) -> bool {
19+
// CHECK-NOT: memchr
20+
// CHECK-NOT: slice_contains
21+
data.contains(&x)
22+
}
23+
24+
// CHECK-LABEL: @foo3
25+
#[no_mangle]
26+
pub fn foo3(x: u8, data: &[u8; 3]) -> bool {
27+
// CHECK-NOT: memchr
28+
// CHECK-NOT: slice_contains
29+
data.contains(&x)
30+
}
31+
32+
// CHECK-LABEL: @foo4
33+
#[no_mangle]
34+
pub fn foo4(x: u8, data: &[u8; 4]) -> bool {
35+
// CHECK-NOT: memchr
36+
// CHECK-NOT: slice_contains
37+
data.contains(&x)
38+
}
39+
40+
// CHECK-LABEL: @foo8
41+
#[no_mangle]
42+
pub fn foo8(x: u8, data: &[u8; 8]) -> bool {
43+
// CHECK-NOT: memchr
44+
// CHECK-NOT: slice_contains
45+
data.contains(&x)
46+
}
47+
48+
// CHECK-LABEL: @foo8_i8
49+
#[no_mangle]
50+
pub fn foo8_i8(x: i8, data: &[i8; 8]) -> bool {
51+
// CHECK-NOT: memchr
52+
// CHECK-NOT: slice_contains
53+
!data.contains(&x)
54+
}
55+
56+
// Check that the general case isn't inlined
57+
// CHECK-LABEL: @foo80
58+
#[no_mangle]
59+
pub fn foo80(x: u8, data: &[u8; 80]) -> bool {
60+
// CHECK: call core::slice::memchr
61+
data.contains(&x)
62+
}

0 commit comments

Comments
 (0)