Skip to content

Commit 0663286

Browse files
committed
Merge pull request rust-lang#3 from bluss/memrchr-must-go-faster
Provide a faster fallback for memrchr
2 parents 1a2171b + 6a1b5d7 commit 0663286

File tree

2 files changed

+132
-5
lines changed

2 files changed

+132
-5
lines changed

benches/bench.rs

+4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ fn iterator(b: &mut test::Bencher) {
1414
b.iter(|| {
1515
assert!(haystack.iter().position(|&b| b == needle).is_none());
1616
});
17+
b.bytes = haystack.len() as u64;
1718
}
1819

1920
#[bench]
@@ -23,6 +24,7 @@ fn libc_memchr(b: &mut test::Bencher) {
2324
b.iter(|| {
2425
assert!(memchr::memchr(needle, &haystack).is_none());
2526
});
27+
b.bytes = haystack.len() as u64;
2628
}
2729

2830
#[bench]
@@ -32,6 +34,7 @@ fn iterator_reversed(b: &mut test::Bencher) {
3234
b.iter(|| {
3335
assert!(haystack.iter().rposition(|&b| b == needle).is_none());
3436
});
37+
b.bytes = haystack.len() as u64;
3538
}
3639

3740
#[bench]
@@ -41,4 +44,5 @@ fn libc_memrchr(b: &mut test::Bencher) {
4144
b.iter(|| {
4245
assert!(memchr::memrchr(needle, &haystack).is_none());
4346
});
47+
b.bytes = haystack.len() as u64;
4448
}

src/lib.rs

+128-5
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ to the corresponding functions in `libc`.
88
extern crate libc;
99

1010
use libc::funcs::c95::string;
11-
use libc::types::common::c95::c_void;
12-
use libc::types::os::arch::c95::{c_int, size_t};
11+
use libc::c_void;
12+
use libc::{c_int, size_t};
1313

1414
/// A safe interface to `memchr`.
1515
///
@@ -78,18 +78,127 @@ pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
7878
}
7979
}
8080

81-
#[cfg(not(target_os = "linux"))]
81+
#[cfg(all(not(target_os = "linux"),
82+
any(target_pointer_width = "32", target_pointer_width = "64")))]
83+
fn memrchr_specific(needle: u8, haystack: &[u8]) -> Option<usize> {
84+
fallback::memrchr(needle, haystack)
85+
}
86+
87+
// For the rare case of neither 32 bit nor 64-bit platform.
88+
#[cfg(all(not(target_os = "linux"),
89+
not(target_pointer_width = "32"),
90+
not(target_pointer_width = "64")))]
8291
fn memrchr_specific(needle: u8, haystack: &[u8]) -> Option<usize> {
8392
haystack.iter().rposition(|&b| b == needle)
8493
}
8594

8695
memrchr_specific(needle, haystack)
8796
}
8897

98+
#[cfg(not(target_os = "linux"))]
99+
mod fallback {
100+
use std::cmp;
101+
102+
const LO_U64: u64 = 0x0101010101010101;
103+
const HI_U64: u64 = 0x8080808080808080;
104+
105+
// use truncation
106+
const LO_USIZE: usize = LO_U64 as usize;
107+
const HI_USIZE: usize = HI_U64 as usize;
108+
109+
#[cfg(target_pointer_width = "32")]
110+
const USIZE_BYTES: usize = 4;
111+
#[cfg(target_pointer_width = "64")]
112+
const USIZE_BYTES: usize = 8;
113+
114+
/// Return `true` if `x` contains any zero byte.
115+
///
116+
/// From *Matters Computational*, J. Arndt
117+
///
118+
/// "The idea is to subtract one from each of the bytes and then look for
119+
/// bytes where the borrow propagated all the way to the most significant
120+
/// bit."
121+
#[inline]
122+
fn contains_zero_byte(x: usize) -> bool {
123+
x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
124+
}
125+
126+
#[cfg(target_pointer_width = "32")]
127+
#[inline]
128+
fn repeat_byte(b: u8) -> usize {
129+
let mut rep = (b as usize) << 8 | b as usize;
130+
rep = rep << 16 | rep;
131+
rep
132+
}
133+
134+
#[cfg(target_pointer_width = "64")]
135+
#[inline]
136+
fn repeat_byte(b: u8) -> usize {
137+
let mut rep = (b as usize) << 8 | b as usize;
138+
rep = rep << 16 | rep;
139+
rep = rep << 32 | rep;
140+
rep
141+
}
142+
143+
/// Return the last index matching the byte `a` in `text`.
144+
pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> {
145+
// Scan for a single byte value by reading two `usize` words at a time.
146+
//
147+
// Split `text` in three parts
148+
// - unaligned tail, after the last word aligned address in text
149+
// - body, scan by 2 words at a time
150+
// - the first remaining bytes, < 2 word size
151+
let len = text.len();
152+
let ptr = text.as_ptr();
153+
154+
// search to an aligned boundary
155+
let endptr = unsafe { ptr.offset(text.len() as isize) };
156+
let align = (endptr as usize) & (USIZE_BYTES - 1);
157+
let tail;
158+
if align > 0 {
159+
tail = cmp::min(USIZE_BYTES - align, len);
160+
for (index, &byte) in text[len - tail..].iter().enumerate().rev() {
161+
if byte == x {
162+
return Some(len - tail + index);
163+
}
164+
}
165+
} else {
166+
tail = 0;
167+
}
168+
169+
// search the body of the text
170+
let repeated_x = repeat_byte(x);
171+
let mut offset = len - tail;
172+
173+
while offset >= 2 * USIZE_BYTES {
174+
unsafe {
175+
let u = *(ptr.offset(offset as isize - 2 * USIZE_BYTES as isize) as *const usize);
176+
let v = *(ptr.offset(offset as isize - USIZE_BYTES as isize) as *const usize);
177+
178+
// break if there is a matching byte
179+
let zu = contains_zero_byte(u ^ repeated_x);
180+
let zv = contains_zero_byte(v ^ repeated_x);
181+
if zu || zv {
182+
break;
183+
}
184+
}
185+
offset -= 2 * USIZE_BYTES;
186+
}
187+
188+
// find a zero after the point the body loop stopped
189+
for (index, &byte) in text[..offset].iter().enumerate().rev() {
190+
if byte == x {
191+
return Some(index);
192+
}
193+
}
194+
None
195+
}
196+
}
197+
89198
#[cfg(target_os = "linux")]
90199
mod ffi {
91-
use libc::types::common::c95::c_void;
92-
use libc::types::os::arch::c95::{c_int, size_t};
200+
use libc::c_void;
201+
use libc::{c_int, size_t};
93202
extern {
94203
pub fn memrchr(cx: *const c_void, c: c_int, n: size_t) -> *mut c_void;
95204
}
@@ -186,4 +295,18 @@ mod tests {
186295
}
187296
quickcheck::quickcheck(prop as fn(u8, Vec<u8>) -> bool);
188297
}
298+
299+
#[test]
300+
fn qc_correct_reversed() {
301+
fn prop(a: Vec<u8>) -> bool {
302+
for byte in 0..256u32 {
303+
let byte = byte as u8;
304+
if memrchr(byte, &a) != a.iter().rposition(|elt| *elt == byte) {
305+
return false;
306+
}
307+
}
308+
true
309+
}
310+
quickcheck::quickcheck(prop as fn(Vec<u8>) -> bool);
311+
}
189312
}

0 commit comments

Comments
 (0)