Skip to content

Lex binary and octal literals more eagerly. #23872

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 1, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 25 additions & 13 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ impl<'a> StringReader<'a> {
let base = 10;

// find the integer representing the name
self.scan_digits(base);
self.scan_digits(base, base);
let encoded_name : u32 = self.with_str_from(start_bpos, |s| {
num::from_str_radix(s, 10).unwrap_or_else(|_| {
panic!("expected digits representing a name, got {:?}, {}, range [{:?},{:?}]",
Expand All @@ -639,7 +639,7 @@ impl<'a> StringReader<'a> {

// find the integer representing the ctxt
let start_bpos = self.last_pos;
self.scan_digits(base);
self.scan_digits(base, base);
let encoded_ctxt : ast::SyntaxContext = self.with_str_from(start_bpos, |s| {
num::from_str_radix(s, 10).unwrap_or_else(|_| {
panic!("expected digits representing a ctxt, got {:?}, {}", s, whence);
Expand All @@ -653,16 +653,28 @@ impl<'a> StringReader<'a> {
ctxt: encoded_ctxt, }
}

/// Scan through any digits (base `radix`) or underscores, and return how
/// many digits there were.
fn scan_digits(&mut self, radix: u32) -> usize {
/// Scan through any digits (base `scan_radix`) or underscores,
/// and return how many digits there were.
///
/// `real_radix` represents the true radix of the number we're
/// interested in, and errors will be emitted for any digits
/// between `real_radix` and `scan_radix`.
fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
assert!(real_radix <= scan_radix);
let mut len = 0;
loop {
let c = self.curr;
if c == Some('_') { debug!("skipping a _"); self.bump(); continue; }
match c.and_then(|cc| cc.to_digit(radix)) {
match c.and_then(|cc| cc.to_digit(scan_radix)) {
Some(_) => {
debug!("{:?} in scan_digits", c);
// check that the hypothetical digit is actually
// in range for the true radix
if c.unwrap().to_digit(real_radix).is_none() {
self.err_span_(self.last_pos, self.pos,
&format!("invalid digit for a base {} literal",
real_radix));
}
len += 1;
self.bump();
}
Expand All @@ -681,19 +693,19 @@ impl<'a> StringReader<'a> {

if c == '0' {
match self.curr.unwrap_or('\0') {
'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2); }
'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8); }
'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16); }
'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2, 10); }
'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8, 10); }
'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16, 16); }
'0'...'9' | '_' | '.' => {
num_digits = self.scan_digits(10) + 1;
num_digits = self.scan_digits(10, 10) + 1;
}
_ => {
// just a 0
return token::Integer(self.name_from(start_bpos));
}
}
} else if c.is_digit(10) {
num_digits = self.scan_digits(10) + 1;
num_digits = self.scan_digits(10, 10) + 1;
} else {
num_digits = 0;
}
Expand All @@ -712,7 +724,7 @@ impl<'a> StringReader<'a> {
// with a number
self.bump();
if self.curr.unwrap_or('\0').is_digit(10) {
self.scan_digits(10);
self.scan_digits(10, 10);
self.scan_float_exponent();
}
let last_pos = self.last_pos;
Expand Down Expand Up @@ -935,7 +947,7 @@ impl<'a> StringReader<'a> {
if self.curr_is('-') || self.curr_is('+') {
self.bump();
}
if self.scan_digits(10) == 0 {
if self.scan_digits(10, 10) == 0 {
self.err_span_(self.last_pos, self.pos, "expected at least one digit in exponent")
}
}
Expand Down
15 changes: 14 additions & 1 deletion src/libsyntax/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,20 @@ pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) ->

let res: u64 = match ::std::num::from_str_radix(s, base).ok() {
Some(r) => r,
None => { sd.span_err(sp, "int literal is too large"); 0 }
None => {
// small bases are lexed as if they were base 10, e.g, the string
// might be `0b10201`. This will cause the conversion above to fail,
// but these cases have errors in the lexer: we don't want to emit
// two errors, and we especially don't want to emit this error since
// it isn't necessarily true.
let already_errored = base < 10 &&
s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base));

if !already_errored {
sd.span_err(sp, "int literal is too large");
}
0
}
};

// adjust the sign
Expand Down
2 changes: 1 addition & 1 deletion src/test/parse-fail/issue-1802-1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@

// error-pattern:no valid digits found for number
fn main() {
log(error, 0b42);
log(error, 0b);
}
21 changes: 21 additions & 0 deletions src/test/parse-fail/lex-bad-binary-literal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

fn main() {
0b121; //~ ERROR invalid digit for a base 2 literal
0b10_10301; //~ ERROR invalid digit for a base 2 literal
0b30; //~ ERROR invalid digit for a base 2 literal
0b41; //~ ERROR invalid digit for a base 2 literal
0b5; //~ ERROR invalid digit for a base 2 literal
0b6; //~ ERROR invalid digit for a base 2 literal
0b7; //~ ERROR invalid digit for a base 2 literal
0b8; //~ ERROR invalid digit for a base 2 literal
0b9; //~ ERROR invalid digit for a base 2 literal
}
14 changes: 14 additions & 0 deletions src/test/parse-fail/lex-bad-octal-literal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

fn main() {
0o18; //~ ERROR invalid digit for a base 8 literal
0o1234_9_5670; //~ ERROR invalid digit for a base 8 literal
}