@@ -8,8 +8,8 @@ to the corresponding functions in `libc`.
8
8
extern crate libc;
9
9
10
10
use libc:: funcs:: c95:: string;
11
- use libc:: types :: common :: c95 :: c_void;
12
- use libc:: types :: os :: arch :: c95 :: { c_int, size_t} ;
11
+ use libc:: c_void;
12
+ use libc:: { c_int, size_t} ;
13
13
14
14
/// A safe interface to `memchr`.
15
15
///
@@ -78,18 +78,127 @@ pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
78
78
}
79
79
}
80
80
81
- #[ cfg( not( target_os = "linux" ) ) ]
81
+ #[ cfg( all( not( target_os = "linux" ) ,
82
+ any( target_pointer_width = "32" , target_pointer_width = "64" ) ) ) ]
83
+ fn memrchr_specific ( needle : u8 , haystack : & [ u8 ] ) -> Option < usize > {
84
+ fallback:: memrchr ( needle, haystack)
85
+ }
86
+
87
+ // For the rare case of neither 32 bit nor 64-bit platform.
88
+ #[ cfg( all( not( target_os = "linux" ) ,
89
+ not( target_pointer_width = "32" ) ,
90
+ not( target_pointer_width = "64" ) ) ) ]
82
91
fn memrchr_specific ( needle : u8 , haystack : & [ u8 ] ) -> Option < usize > {
83
92
haystack. iter ( ) . rposition ( |& b| b == needle)
84
93
}
85
94
86
95
memrchr_specific ( needle, haystack)
87
96
}
88
97
98
+ #[ cfg( not( target_os = "linux" ) ) ]
99
+ mod fallback {
100
+ use std:: cmp;
101
+
102
+ const LO_U64 : u64 = 0x0101010101010101 ;
103
+ const HI_U64 : u64 = 0x8080808080808080 ;
104
+
105
+ // use truncation
106
+ const LO_USIZE : usize = LO_U64 as usize ;
107
+ const HI_USIZE : usize = HI_U64 as usize ;
108
+
109
+ #[ cfg( target_pointer_width = "32" ) ]
110
+ const USIZE_BYTES : usize = 4 ;
111
+ #[ cfg( target_pointer_width = "64" ) ]
112
+ const USIZE_BYTES : usize = 8 ;
113
+
114
+ /// Return `true` if `x` contains any zero byte.
115
+ ///
116
+ /// From *Matters Computational*, J. Arndt
117
+ ///
118
+ /// "The idea is to subtract one from each of the bytes and then look for
119
+ /// bytes where the borrow propagated all the way to the most significant
120
+ /// bit."
121
+ #[ inline]
122
+ fn contains_zero_byte ( x : usize ) -> bool {
123
+ x. wrapping_sub ( LO_USIZE ) & !x & HI_USIZE != 0
124
+ }
125
+
126
+ #[ cfg( target_pointer_width = "32" ) ]
127
+ #[ inline]
128
+ fn repeat_byte ( b : u8 ) -> usize {
129
+ let mut rep = ( b as usize ) << 8 | b as usize ;
130
+ rep = rep << 16 | rep;
131
+ rep
132
+ }
133
+
134
+ #[ cfg( target_pointer_width = "64" ) ]
135
+ #[ inline]
136
+ fn repeat_byte ( b : u8 ) -> usize {
137
+ let mut rep = ( b as usize ) << 8 | b as usize ;
138
+ rep = rep << 16 | rep;
139
+ rep = rep << 32 | rep;
140
+ rep
141
+ }
142
+
143
+ /// Return the last index matching the byte `a` in `text`.
144
+ pub fn memrchr ( x : u8 , text : & [ u8 ] ) -> Option < usize > {
145
+ // Scan for a single byte value by reading two `usize` words at a time.
146
+ //
147
+ // Split `text` in three parts
148
+ // - unaligned tail, after the last word aligned address in text
149
+ // - body, scan by 2 words at a time
150
+ // - the first remaining bytes, < 2 word size
151
+ let len = text. len ( ) ;
152
+ let ptr = text. as_ptr ( ) ;
153
+
154
+ // search to an aligned boundary
155
+ let endptr = unsafe { ptr. offset ( text. len ( ) as isize ) } ;
156
+ let align = ( endptr as usize ) & ( USIZE_BYTES - 1 ) ;
157
+ let tail;
158
+ if align > 0 {
159
+ tail = cmp:: min ( USIZE_BYTES - align, len) ;
160
+ for ( index, & byte) in text[ len - tail..] . iter ( ) . enumerate ( ) . rev ( ) {
161
+ if byte == x {
162
+ return Some ( len - tail + index) ;
163
+ }
164
+ }
165
+ } else {
166
+ tail = 0 ;
167
+ }
168
+
169
+ // search the body of the text
170
+ let repeated_x = repeat_byte ( x) ;
171
+ let mut offset = len - tail;
172
+
173
+ while offset >= 2 * USIZE_BYTES {
174
+ unsafe {
175
+ let u = * ( ptr. offset ( offset as isize - 2 * USIZE_BYTES as isize ) as * const usize ) ;
176
+ let v = * ( ptr. offset ( offset as isize - USIZE_BYTES as isize ) as * const usize ) ;
177
+
178
+ // break if there is a matching byte
179
+ let zu = contains_zero_byte ( u ^ repeated_x) ;
180
+ let zv = contains_zero_byte ( v ^ repeated_x) ;
181
+ if zu || zv {
182
+ break ;
183
+ }
184
+ }
185
+ offset -= 2 * USIZE_BYTES ;
186
+ }
187
+
188
+ // find a zero after the point the body loop stopped
189
+ for ( index, & byte) in text[ ..offset] . iter ( ) . enumerate ( ) . rev ( ) {
190
+ if byte == x {
191
+ return Some ( index) ;
192
+ }
193
+ }
194
+ None
195
+ }
196
+ }
197
+
89
198
#[ cfg( target_os = "linux" ) ]
90
199
mod ffi {
91
- use libc:: types :: common :: c95 :: c_void;
92
- use libc:: types :: os :: arch :: c95 :: { c_int, size_t} ;
200
+ use libc:: c_void;
201
+ use libc:: { c_int, size_t} ;
93
202
extern {
94
203
pub fn memrchr ( cx : * const c_void , c : c_int , n : size_t ) -> * mut c_void ;
95
204
}
@@ -186,4 +295,18 @@ mod tests {
186
295
}
187
296
quickcheck:: quickcheck ( prop as fn ( u8 , Vec < u8 > ) -> bool ) ;
188
297
}
298
+
299
+ #[ test]
300
+ fn qc_correct_reversed ( ) {
301
+ fn prop ( a : Vec < u8 > ) -> bool {
302
+ for byte in 0 ..256u32 {
303
+ let byte = byte as u8 ;
304
+ if memrchr ( byte, & a) != a. iter ( ) . rposition ( |elt| * elt == byte) {
305
+ return false ;
306
+ }
307
+ }
308
+ true
309
+ }
310
+ quickcheck:: quickcheck ( prop as fn ( Vec < u8 > ) -> bool ) ;
311
+ }
189
312
}
0 commit comments