Skip to content

Commit de1df36

Browse files
committed
Lexer: Avoid unnecessary allocations
1 parent 96798f5 commit de1df36

File tree

2 files changed

+57
-42
lines changed

2 files changed

+57
-42
lines changed

src/libsyntax/parse/comments.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use core::prelude::*;
1313
use ast;
1414
use codemap::{BytePos, CharPos, CodeMap, Pos};
1515
use diagnostic;
16-
use parse::lexer::{is_whitespace, get_str_from, reader};
16+
use parse::lexer::{is_whitespace, with_str_from, reader};
1717
use parse::lexer::{StringReader, bump, is_eof, nextch, TokenAndSpan};
1818
use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
1919
use parse::lexer;
@@ -352,9 +352,10 @@ pub fn gather_comments_and_literals(span_diagnostic:
352352
//discard, and look ahead; we're working with internal state
353353
let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
354354
if token::is_lit(&tok) {
355-
let s = get_str_from(rdr, bstart);
356-
debug!("tok lit: %s", s);
357-
literals.push(lit {lit: s, pos: sp.lo});
355+
do with_str_from(rdr, bstart) |s| {
356+
debug!("tok lit: %s", s);
357+
literals.push(lit {lit: s.to_owned(), pos: sp.lo});
358+
}
358359
} else {
359360
debug!("tok: %s", token::to_str(get_ident_interner(), &tok));
360361
}

src/libsyntax/parse/lexer.rs

+52-38
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,10 @@ fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
165165
(pos - rdr.filemap.start_pos)
166166
}
167167

168-
pub fn get_str_from(rdr: @mut StringReader, start: BytePos) -> ~str {
169-
return str::slice(*rdr.src, start.to_uint(),
170-
byte_offset(rdr, rdr.last_pos).to_uint()).to_owned();
168+
pub fn with_str_from<T>(rdr: @mut StringReader, start: BytePos, f: &fn(s: &str) -> T) -> T {
169+
f(rdr.src.slice(
170+
byte_offset(rdr, start).to_uint(),
171+
byte_offset(rdr, rdr.last_pos).to_uint()))
171172
}
172173

173174
// EFFECT: advance the StringReader by one character. If a newline is
@@ -259,18 +260,24 @@ fn consume_any_line_comment(rdr: @mut StringReader)
259260
bump(rdr);
260261
// line comments starting with "///" or "//!" are doc-comments
261262
if rdr.curr == '/' || rdr.curr == '!' {
262-
let start_bpos = rdr.pos - BytePos(2u);
263-
let mut acc = ~"//";
263+
let start_bpos = rdr.pos - BytePos(3u);
264264
while rdr.curr != '\n' && !is_eof(rdr) {
265-
str::push_char(&mut acc, rdr.curr);
266265
bump(rdr);
267266
}
268-
// but comments with only more "/"s are not
269-
if !is_line_non_doc_comment(acc) {
270-
return Some(TokenAndSpan{
271-
tok: token::DOC_COMMENT(str_to_ident(acc)),
272-
sp: codemap::mk_sp(start_bpos, rdr.pos)
273-
});
267+
let ret = do with_str_from(rdr, start_bpos) |string| {
268+
// but comments with only more "/"s are not
269+
if !is_line_non_doc_comment(string) {
270+
Some(TokenAndSpan{
271+
tok: token::DOC_COMMENT(str_to_ident(string)),
272+
sp: codemap::mk_sp(start_bpos, rdr.pos)
273+
})
274+
} else {
275+
None
276+
}
277+
};
278+
279+
if ret.is_some() {
280+
return ret;
274281
}
275282
} else {
276283
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
@@ -306,25 +313,26 @@ pub fn is_block_non_doc_comment(s: &str) -> bool {
306313
fn consume_block_comment(rdr: @mut StringReader)
307314
-> Option<TokenAndSpan> {
308315
// block comments starting with "/**" or "/*!" are doc-comments
309-
if rdr.curr == '*' || rdr.curr == '!' {
310-
let start_bpos = rdr.pos - BytePos(2u);
311-
let mut acc = ~"/*";
316+
let res = if rdr.curr == '*' || rdr.curr == '!' {
317+
let start_bpos = rdr.pos - BytePos(3u);
312318
while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
313-
str::push_char(&mut acc, rdr.curr);
314319
bump(rdr);
315320
}
316321
if is_eof(rdr) {
317322
rdr.fatal(~"unterminated block doc-comment");
318323
} else {
319-
acc += "*/";
320324
bump(rdr);
321325
bump(rdr);
322-
// but comments with only "*"s between two "/"s are not
323-
if !is_block_non_doc_comment(acc) {
324-
return Some(TokenAndSpan{
325-
tok: token::DOC_COMMENT(str_to_ident(acc)),
326-
sp: codemap::mk_sp(start_bpos, rdr.pos)
327-
});
326+
do with_str_from(rdr, start_bpos) |string| {
327+
// but comments with only "*"s between two "/"s are not
328+
if !is_block_non_doc_comment(string) {
329+
Some(TokenAndSpan{
330+
tok: token::DOC_COMMENT(str_to_ident(string)),
331+
sp: codemap::mk_sp(start_bpos, rdr.pos)
332+
})
333+
} else {
334+
None
335+
}
328336
}
329337
}
330338
} else {
@@ -338,10 +346,11 @@ fn consume_block_comment(rdr: @mut StringReader)
338346
bump(rdr);
339347
}
340348
}
341-
}
349+
None
350+
};
342351
// restart whitespace munch.
343352

344-
return consume_whitespace_and_comments(rdr);
353+
if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
345354
}
346355

347356
fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
@@ -540,17 +549,21 @@ fn ident_continue(c: char) -> bool {
540549
fn next_token_inner(rdr: @mut StringReader) -> token::Token {
541550
let mut c = rdr.curr;
542551
if ident_start(c) {
543-
let start = byte_offset(rdr, rdr.last_pos);
552+
let start = rdr.last_pos;
544553
while ident_continue(rdr.curr) {
545554
bump(rdr);
546555
}
547-
let string = get_str_from(rdr, start);
548556

549-
if "_" == string { return token::UNDERSCORE; }
550-
let is_mod_name = rdr.curr == ':' && nextch(rdr) == ':';
557+
return do with_str_from(rdr, start) |string| {
558+
if string == "_" {
559+
token::UNDERSCORE
560+
} else {
561+
let is_mod_name = rdr.curr == ':' && nextch(rdr) == ':';
551562

552-
// FIXME: perform NFKC normalization here. (Issue #2253)
553-
return token::IDENT(str_to_ident(string), is_mod_name);
563+
// FIXME: perform NFKC normalization here. (Issue #2253)
564+
token::IDENT(str_to_ident(string), is_mod_name)
565+
}
566+
}
554567
}
555568
if is_dec_digit(c) {
556569
return scan_number(c, rdr);
@@ -648,19 +661,19 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
648661
'\'' => {
649662
// Either a character constant 'a' OR a lifetime name 'abc
650663
bump(rdr);
664+
let start = rdr.last_pos;
651665
let mut c2 = rdr.curr;
652666
bump(rdr);
653667

654668
// If the character is an ident start not followed by another single
655669
// quote, then this is a lifetime name:
656670
if ident_start(c2) && rdr.curr != '\'' {
657-
let mut lifetime_name = ~"";
658-
lifetime_name.push_char(c2);
659671
while ident_continue(rdr.curr) {
660-
lifetime_name.push_char(rdr.curr);
661672
bump(rdr);
662673
}
663-
return token::LIFETIME(str_to_ident(lifetime_name));
674+
return do with_str_from(rdr, start) |lifetime_name| {
675+
token::LIFETIME(str_to_ident(lifetime_name))
676+
}
664677
}
665678

666679
// Otherwise it is a character constant:
@@ -691,12 +704,13 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
691704
}
692705
'"' => {
693706
let mut accum_str = ~"";
694-
let n = byte_offset(rdr, rdr.last_pos);
707+
let n = rdr.last_pos;
695708
bump(rdr);
696709
while rdr.curr != '"' {
697710
if is_eof(rdr) {
698-
rdr.fatal(fmt!("unterminated double quote string: %s",
699-
get_str_from(rdr, n)));
711+
do with_str_from(rdr, n) |s| {
712+
rdr.fatal(fmt!("unterminated double quote string: %s", s));
713+
}
700714
}
701715

702716
let ch = rdr.curr;

0 commit comments

Comments
 (0)