Skip to content

Commit 43cae88

Browse files
committed
Lexer: Fix offset handling in get_str_from()
As the comment said, the subtraction is bogus for multibyte characters. Fortunately, we can just use last_pos instead of pos to get the correct position without any subtraction hackery.
1 parent b8cf2f8 commit 43cae88

File tree

2 files changed

+8
-10
lines changed

2 files changed

+8
-10
lines changed

src/libsyntax/parse/comments.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ pub fn gather_comments_and_literals(span_diagnostic:
347347
}
348348

349349

350-
let bstart = rdr.pos;
350+
let bstart = rdr.last_pos;
351351
rdr.next_token();
352352
//discard, and look ahead; we're working with internal state
353353
let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();

src/libsyntax/parse/lexer.rs

+7-9
Original file line numberDiff line numberDiff line change
@@ -161,22 +161,20 @@ fn string_advance_token(r: @mut StringReader) {
161161
}
162162
}
163163

164-
fn byte_offset(rdr: &StringReader) -> BytePos {
165-
(rdr.pos - rdr.filemap.start_pos)
164+
fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
165+
(pos - rdr.filemap.start_pos)
166166
}
167167

168168
pub fn get_str_from(rdr: @mut StringReader, start: BytePos) -> ~str {
169-
// I'm pretty skeptical about this subtraction. What if there's a
170-
// multi-byte character before the mark?
171-
return str::slice(*rdr.src, start.to_uint() - 1u,
172-
byte_offset(rdr).to_uint() - 1u).to_owned();
169+
return str::slice(*rdr.src, start.to_uint(),
170+
byte_offset(rdr, rdr.last_pos).to_uint()).to_owned();
173171
}
174172

175173
// EFFECT: advance the StringReader by one character. If a newline is
176174
// discovered, add it to the FileMap's list of line start offsets.
177175
pub fn bump(rdr: &mut StringReader) {
178176
rdr.last_pos = rdr.pos;
179-
let current_byte_offset = byte_offset(rdr).to_uint();;
177+
let current_byte_offset = byte_offset(rdr, rdr.pos).to_uint();
180178
if current_byte_offset < (*rdr.src).len() {
181179
assert!(rdr.curr != -1 as char);
182180
let last_char = rdr.curr;
@@ -202,7 +200,7 @@ pub fn is_eof(rdr: @mut StringReader) -> bool {
202200
rdr.curr == -1 as char
203201
}
204202
pub fn nextch(rdr: @mut StringReader) -> char {
205-
let offset = byte_offset(rdr).to_uint();
203+
let offset = byte_offset(rdr, rdr.pos).to_uint();
206204
if offset < (*rdr.src).len() {
207205
return str::char_at(*rdr.src, offset);
208206
} else { return -1 as char; }
@@ -692,7 +690,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
692690
return token::LIT_INT(c2 as i64, ast::ty_char);
693691
}
694692
'"' => {
695-
let n = byte_offset(rdr);
693+
let n = byte_offset(rdr, rdr.last_pos);
696694
bump(rdr);
697695
while rdr.curr != '"' {
698696
if is_eof(rdr) {

0 commit comments

Comments
 (0)