Skip to content

Commit 25c2cbe

Browse files
committed
add benchmark functions
1 parent 6c8dc46 commit 25c2cbe

File tree

2 files changed

+175
-0
lines changed

2 files changed

+175
-0
lines changed

benches/benches.rs

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
#![feature(test)]
2+
3+
extern crate test;
4+
5+
use rustc_literal_escaper::*;
6+
use std::iter::repeat_n;
7+
8+
const LEN: usize = 10_000;
9+
10+
#[bench]
11+
fn bench_skip_ascii_whitespace(b: &mut test::Bencher) {
12+
let input: String = test::black_box({
13+
let mut res = "\\\n".to_string();
14+
(0..LEN - 1).for_each(|_| res.push(' '));
15+
res.push('\n');
16+
res
17+
});
18+
assert_eq!(input[2..].len(), LEN);
19+
assert!(input.contains('\n'));
20+
b.iter(|| {
21+
let mut output = vec![];
22+
// This is internal, so call indirectly
23+
// skip_ascii_whitespace(&mut input.chars(), 0, &mut |range, res| {
24+
// output.push((range, res))
25+
// });
26+
unescape_unicode(&input, Mode::Str, &mut |range, res| {
27+
output.push((range, res))
28+
});
29+
assert_eq!(
30+
output,
31+
[((0..LEN + 2), Err(EscapeError::MultipleSkippedLinesWarning))]
32+
);
33+
});
34+
}
35+
36+
//
37+
// Check raw
38+
//
39+
40+
fn bench_check_raw(b: &mut test::Bencher, c: char, mode: Mode) {
41+
let input: String = test::black_box(repeat_n(c, LEN).collect());
42+
assert_eq!(input.len(), LEN * c.len_utf8());
43+
b.iter(|| {
44+
let mut output = vec![];
45+
unescape_unicode(&input, mode, &mut |range, res| output.push((range, res)));
46+
assert_eq!(output.len(), LEN);
47+
assert_eq!(output[0], ((0..c.len_utf8()), Ok(c)));
48+
});
49+
}
50+
51+
// raw str
52+
53+
#[bench]
54+
fn bench_check_raw_str_ascii(b: &mut test::Bencher) {
55+
bench_check_raw(b, 'a', Mode::RawStr);
56+
}
57+
58+
#[bench]
59+
fn bench_check_raw_str_unicode(b: &mut test::Bencher) {
60+
bench_check_raw(b, '🦀', Mode::RawStr);
61+
}
62+
63+
// raw byte str
64+
65+
#[bench]
66+
fn bench_check_raw_byte_str(b: &mut test::Bencher) {
67+
bench_check_raw(b, 'a', Mode::RawByteStr);
68+
}
69+
70+
// raw C str
71+
72+
#[bench]
73+
fn bench_check_raw_c_str_ascii(b: &mut test::Bencher) {
74+
bench_check_raw(b, 'a', Mode::RawCStr);
75+
}
76+
77+
#[bench]
78+
fn bench_check_raw_c_str_unicode(b: &mut test::Bencher) {
79+
bench_check_raw(b, '🦀', Mode::RawCStr);
80+
}
81+
82+
//
83+
// Unescape
84+
//
85+
86+
fn bench_unescape(b: &mut test::Bencher, s: &str, mode: Mode, expected: char) {
87+
let input: String = test::black_box(repeat_n(s, LEN).collect());
88+
assert_eq!(input.len(), LEN * s.len());
89+
b.iter(|| {
90+
let mut output = vec![];
91+
unescape_unicode(&input, mode, &mut |range, res| output.push((range, res)));
92+
assert_eq!(output.len(), LEN);
93+
assert_eq!(output[0], ((0..s.len()), Ok(expected)));
94+
});
95+
}
96+
97+
// str
98+
99+
#[bench]
100+
fn bench_unescape_str_trivial(b: &mut test::Bencher) {
101+
bench_unescape(b, r"a", Mode::Str, 'a');
102+
}
103+
104+
#[bench]
105+
fn bench_unescape_str_ascii(b: &mut test::Bencher) {
106+
bench_unescape(b, r"\n", Mode::Str, '\n');
107+
}
108+
109+
#[bench]
110+
fn bench_unescape_str_hex(b: &mut test::Bencher) {
111+
bench_unescape(b, r"\x22", Mode::Str, '"');
112+
}
113+
114+
#[bench]
115+
fn bench_unescape_str_unicode(b: &mut test::Bencher) {
116+
bench_unescape(b, r"\u{1f980}", Mode::Str, '🦀');
117+
}
118+
119+
// byte str
120+
121+
#[bench]
122+
fn bench_unescape_byte_str_trivial(b: &mut test::Bencher) {
123+
bench_unescape(b, r"a", Mode::ByteStr, 'a');
124+
}
125+
126+
#[bench]
127+
fn bench_unescape_byte_str_ascii(b: &mut test::Bencher) {
128+
bench_unescape(b, r"\n", Mode::ByteStr, b'\n' as char);
129+
}
130+
131+
#[bench]
132+
fn bench_unescape_byte_str_hex(b: &mut test::Bencher) {
133+
bench_unescape(b, r"\xff", Mode::ByteStr, b'\xff' as char);
134+
}
135+
136+
// C str
137+
138+
fn bench_unescape_c_str(b: &mut test::Bencher, s: &str, expected: MixedUnit) {
139+
let input: String = test::black_box(repeat_n(s, LEN).collect());
140+
assert_eq!(input.len(), LEN * s.len());
141+
b.iter(|| {
142+
let mut output = vec![];
143+
unescape_mixed(&input, Mode::CStr, &mut |range, res| {
144+
output.push((range, res))
145+
});
146+
assert_eq!(output.len(), LEN);
147+
assert_eq!(output[0], ((0..s.len()), Ok(expected)));
148+
});
149+
}
150+
151+
#[bench]
152+
fn bench_unescape_c_str_trivial(b: &mut test::Bencher) {
153+
bench_unescape_c_str(b, r"a", MixedUnit::Char('a'));
154+
}
155+
156+
#[bench]
157+
fn bench_unescape_c_str_ascii(b: &mut test::Bencher) {
158+
bench_unescape_c_str(b, r"\n", MixedUnit::Char('\n'));
159+
}
160+
161+
#[bench]
162+
fn bench_unescape_c_str_hex_ascii(b: &mut test::Bencher) {
163+
bench_unescape_c_str(b, r"\x22", MixedUnit::Char('"'));
164+
}
165+
166+
#[bench]
167+
fn bench_unescape_c_str_hex_byte(b: &mut test::Bencher) {
168+
bench_unescape_c_str(b, r"\xff", MixedUnit::HighByte(b'\xff'));
169+
}
170+
171+
#[bench]
172+
fn bench_unescape_c_str_unicode(b: &mut test::Bencher) {
173+
bench_unescape_c_str(b, r"\u{1f980}", MixedUnit::Char('🦀'));
174+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ where
109109

110110
/// Used for mixed utf8 string literals, i.e. those that allow both unicode
111111
/// chars and high bytes.
112+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
112113
pub enum MixedUnit {
113114
/// Used for ASCII chars (written directly or via `\x00`..`\x7f` escapes)
114115
/// and Unicode chars (written directly or via `\u` escapes).

0 commit comments

Comments
 (0)