Skip to content

Commit 69316f0

Browse files
committed
auto merge of #6182 : huonw/rust/core-str-opts, r=nikomatsakis
This adds #[inline] to many very common string routines (e.g. `len`). It also rewrites `repeat` to not use `+=` and make it O(n) rather than O(n^2), and also concat/connect(_slices) to reduce the overhead of reallocations, and constantly `set_len`ing (etc) in `push_str`. (The added complexity might not be worth the 20% speedup though.)
2 parents 1c64f7a + 5714e2c commit 69316f0

File tree

1 file changed

+132
-15
lines changed

1 file changed

+132
-15
lines changed

src/libcore/str.rs

+132-15
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str {
7777
}
7878

7979
/// Copy a slice into a new unique str
80+
#[inline(always)]
8081
pub fn from_slice(s: &str) -> ~str {
8182
unsafe { raw::slice_bytes_owned(s, 0, len(s)) }
8283
}
@@ -240,38 +241,132 @@ pub fn append(lhs: ~str, rhs: &str) -> ~str {
240241

241242
/// Concatenate a vector of strings
242243
pub fn concat(v: &[~str]) -> ~str {
243-
let mut s: ~str = ~"";
244-
for vec::each(v) |ss| {
245-
push_str(&mut s, *ss);
244+
if v.is_empty() { return ~""; }
245+
246+
let mut len = 0;
247+
for v.each |ss| {
248+
len += ss.len();
249+
}
250+
let mut s = ~"";
251+
252+
reserve(&mut s, len);
253+
254+
unsafe {
255+
do as_buf(s) |buf, _len| {
256+
let mut buf = ::cast::transmute_mut_unsafe(buf);
257+
for v.each |ss| {
258+
do as_buf(*ss) |ssbuf, sslen| {
259+
let sslen = sslen - 1;
260+
ptr::copy_memory(buf, ssbuf, sslen);
261+
buf = buf.offset(sslen);
262+
}
263+
}
264+
}
265+
raw::set_len(&mut s, len);
246266
}
247267
s
248268
}
249269

250270
/// Concatenate a vector of strings, placing a given separator between each
251271
pub fn connect(v: &[~str], sep: &str) -> ~str {
272+
if v.is_empty() { return ~""; }
273+
274+
// concat is faster
275+
if sep.is_empty() { return concat(v); }
276+
277+
// this is wrong without the guarantee that v is non-empty
278+
let mut len = sep.len() * (v.len() - 1);
279+
for v.each |ss| {
280+
len += ss.len();
281+
}
252282
let mut s = ~"", first = true;
253-
for vec::each(v) |ss| {
254-
if first { first = false; } else { push_str(&mut s, sep); }
255-
push_str(&mut s, *ss);
283+
284+
reserve(&mut s, len);
285+
286+
unsafe {
287+
do as_buf(s) |buf, _len| {
288+
do as_buf(sep) |sepbuf, seplen| {
289+
let seplen = seplen - 1;
290+
let mut buf = ::cast::transmute_mut_unsafe(buf);
291+
for v.each |ss| {
292+
do as_buf(*ss) |ssbuf, sslen| {
293+
let sslen = sslen - 1;
294+
if first {
295+
first = false;
296+
} else {
297+
ptr::copy_memory(buf, sepbuf, seplen);
298+
buf = buf.offset(seplen);
299+
}
300+
ptr::copy_memory(buf, ssbuf, sslen);
301+
buf = buf.offset(sslen);
302+
}
303+
}
304+
}
305+
}
306+
raw::set_len(&mut s, len);
256307
}
257308
s
258309
}
259310

260311
/// Concatenate a vector of strings, placing a given separator between each
261312
pub fn connect_slices(v: &[&str], sep: &str) -> ~str {
313+
if v.is_empty() { return ~""; }
314+
315+
// this is wrong without the guarantee that v is non-empty
316+
let mut len = sep.len() * (v.len() - 1);
317+
for v.each |ss| {
318+
len += ss.len();
319+
}
262320
let mut s = ~"", first = true;
263-
for vec::each(v) |ss| {
264-
if first { first = false; } else { push_str(&mut s, sep); }
265-
push_str(&mut s, *ss);
321+
322+
reserve(&mut s, len);
323+
324+
unsafe {
325+
do as_buf(s) |buf, _len| {
326+
do as_buf(sep) |sepbuf, seplen| {
327+
let seplen = seplen - 1;
328+
let mut buf = ::cast::transmute_mut_unsafe(buf);
329+
for vec::each(v) |ss| {
330+
do as_buf(*ss) |ssbuf, sslen| {
331+
let sslen = sslen - 1;
332+
if first {
333+
first = false;
334+
} else if seplen > 0 {
335+
ptr::copy_memory(buf, sepbuf, seplen);
336+
buf = buf.offset(seplen);
337+
}
338+
ptr::copy_memory(buf, ssbuf, sslen);
339+
buf = buf.offset(sslen);
340+
}
341+
}
342+
}
343+
}
344+
raw::set_len(&mut s, len);
266345
}
267346
s
268347
}
269348

270349
/// Given a string, make a new string with repeated copies of it
271350
pub fn repeat(ss: &str, nn: uint) -> ~str {
272-
let mut acc = ~"";
273-
for nn.times { acc += ss; }
274-
acc
351+
do as_buf(ss) |buf, len| {
352+
let mut ret = ~"";
353+
// ignore the NULL terminator
354+
let len = len - 1;
355+
reserve(&mut ret, nn * len);
356+
357+
unsafe {
358+
do as_buf(ret) |rbuf, _len| {
359+
let mut rbuf = ::cast::transmute_mut_unsafe(rbuf);
360+
361+
for nn.times {
362+
ptr::copy_memory(rbuf, buf, len);
363+
rbuf = rbuf.offset(len);
364+
}
365+
}
366+
raw::set_len(&mut ret, nn * len);
367+
}
368+
ret
369+
}
275370
}
276371

277372
/*
@@ -820,6 +915,7 @@ Section: Comparing strings
820915
/// Bytewise slice equality
821916
#[cfg(notest)]
822917
#[lang="str_eq"]
918+
#[inline]
823919
pub fn eq_slice(a: &str, b: &str) -> bool {
824920
do as_buf(a) |ap, alen| {
825921
do as_buf(b) |bp, blen| {
@@ -836,6 +932,7 @@ pub fn eq_slice(a: &str, b: &str) -> bool {
836932
}
837933

838934
#[cfg(test)]
935+
#[inline]
839936
pub fn eq_slice(a: &str, b: &str) -> bool {
840937
do as_buf(a) |ap, alen| {
841938
do as_buf(b) |bp, blen| {
@@ -854,15 +951,18 @@ pub fn eq_slice(a: &str, b: &str) -> bool {
854951
/// Bytewise string equality
855952
#[cfg(notest)]
856953
#[lang="uniq_str_eq"]
954+
#[inline]
857955
pub fn eq(a: &~str, b: &~str) -> bool {
858956
eq_slice(*a, *b)
859957
}
860958

861959
#[cfg(test)]
960+
#[inline]
862961
pub fn eq(a: &~str, b: &~str) -> bool {
863962
eq_slice(*a, *b)
864963
}
865964

965+
#[inline]
866966
fn cmp(a: &str, b: &str) -> Ordering {
867967
let low = uint::min(a.len(), b.len());
868968

@@ -879,20 +979,24 @@ fn cmp(a: &str, b: &str) -> Ordering {
879979

880980
#[cfg(notest)]
881981
impl<'self> TotalOrd for &'self str {
982+
#[inline]
882983
fn cmp(&self, other: & &'self str) -> Ordering { cmp(*self, *other) }
883984
}
884985

885986
#[cfg(notest)]
886987
impl TotalOrd for ~str {
988+
#[inline]
887989
fn cmp(&self, other: &~str) -> Ordering { cmp(*self, *other) }
888990
}
889991

890992
#[cfg(notest)]
891993
impl TotalOrd for @str {
994+
#[inline]
892995
fn cmp(&self, other: &@str) -> Ordering { cmp(*self, *other) }
893996
}
894997

895998
/// Bytewise slice less than
999+
#[inline]
8961000
fn lt(a: &str, b: &str) -> bool {
8971001
let (a_len, b_len) = (a.len(), b.len());
8981002
let end = uint::min(a_len, b_len);
@@ -909,16 +1013,19 @@ fn lt(a: &str, b: &str) -> bool {
9091013
}
9101014

9111015
/// Bytewise less than or equal
1016+
#[inline]
9121017
pub fn le(a: &str, b: &str) -> bool {
9131018
!lt(b, a)
9141019
}
9151020

9161021
/// Bytewise greater than or equal
1022+
#[inline]
9171023
fn ge(a: &str, b: &str) -> bool {
9181024
!lt(a, b)
9191025
}
9201026

9211027
/// Bytewise greater than
1028+
#[inline]
9221029
fn gt(a: &str, b: &str) -> bool {
9231030
!le(a, b)
9241031
}
@@ -1595,6 +1702,7 @@ Section: String properties
15951702
*/
15961703

15971704
/// Returns true if the string has length 0
1705+
#[inline(always)]
15981706
pub fn is_empty(s: &str) -> bool { len(s) == 0u }
15991707

16001708
/**
@@ -1616,11 +1724,13 @@ fn is_alphanumeric(s: &str) -> bool {
16161724
}
16171725

16181726
/// Returns the string length/size in bytes not counting the null terminator
1727+
#[inline(always)]
16191728
pub fn len(s: &str) -> uint {
16201729
do as_buf(s) |_p, n| { n - 1u }
16211730
}
16221731

16231732
/// Returns the number of characters that a string holds
1733+
#[inline(always)]
16241734
pub fn char_len(s: &str) -> uint { count_chars(s, 0u, len(s)) }
16251735

16261736
/*
@@ -1752,7 +1862,8 @@ pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
17521862
return len;
17531863
}
17541864

1755-
/// Counts the number of bytes taken by the `n` in `s` starting from `start`.
1865+
/// Counts the number of bytes taken by the first `n` chars in `s`
1866+
/// starting from `start`.
17561867
pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
17571868
assert!(is_char_boundary(s, start));
17581869
let mut end = start, cnt = n;
@@ -1988,6 +2099,7 @@ static tag_six_b: uint = 252u;
19882099
* let i = str::as_bytes("Hello World") { |bytes| vec::len(bytes) };
19892100
* ~~~
19902101
*/
2102+
#[inline]
19912103
pub fn as_bytes<T>(s: &const ~str, f: &fn(&~[u8]) -> T) -> T {
19922104
unsafe {
19932105
let v: *~[u8] = cast::transmute(copy s);
@@ -2023,6 +2135,7 @@ pub fn as_bytes_slice<'a>(s: &'a str) -> &'a [u8] {
20232135
* let s = str::as_c_str("PATH", { |path| libc::getenv(path) });
20242136
* ~~~
20252137
*/
2138+
#[inline]
20262139
pub fn as_c_str<T>(s: &str, f: &fn(*libc::c_char) -> T) -> T {
20272140
do as_buf(s) |buf, len| {
20282141
// NB: len includes the trailing null.
@@ -2099,6 +2212,7 @@ pub fn subslice_offset(outer: &str, inner: &str) -> uint {
20992212
* * s - A string
21002213
* * n - The number of bytes to reserve space for
21012214
*/
2215+
#[inline(always)]
21022216
pub fn reserve(s: &mut ~str, n: uint) {
21032217
unsafe {
21042218
let v: *mut ~[u8] = cast::transmute(s);
@@ -2126,6 +2240,7 @@ pub fn reserve(s: &mut ~str, n: uint) {
21262240
* * s - A string
21272241
* * n - The number of bytes to reserve space for
21282242
*/
2243+
#[inline(always)]
21292244
pub fn reserve_at_least(s: &mut ~str, n: uint) {
21302245
reserve(s, uint::next_power_of_two(n + 1u) - 1u)
21312246
}
@@ -2314,6 +2429,7 @@ pub mod raw {
23142429
}
23152430

23162431
/// Sets the length of the string and adds the null terminator
2432+
#[inline]
23172433
pub unsafe fn set_len(v: &mut ~str, new_len: uint) {
23182434
let v: **mut vec::raw::VecRepr = cast::transmute(v);
23192435
let repr: *mut vec::raw::VecRepr = *v;
@@ -2489,7 +2605,7 @@ impl<'self> StrSlice<'self> for &'self str {
24892605
#[inline]
24902606
fn is_alphanumeric(&self) -> bool { is_alphanumeric(*self) }
24912607
/// Returns the size in bytes not counting the null terminator
2492-
#[inline]
2608+
#[inline(always)]
24932609
fn len(&self) -> uint { len(*self) }
24942610
/// Returns the number of characters that a string holds
24952611
#[inline]
@@ -2599,10 +2715,11 @@ pub trait OwnedStr {
25992715
}
26002716
26012717
impl OwnedStr for ~str {
2718+
#[inline]
26022719
fn push_str(&mut self, v: &str) {
26032720
push_str(self, v);
26042721
}
2605-
2722+
#[inline]
26062723
fn push_char(&mut self, c: char) {
26072724
push_char(self, c);
26082725
}

0 commit comments

Comments
 (0)