Skip to content

Commit 606f50c

Browse files
committed
Lex binary and octal literals more eagerly.
Previously 0b12 was considered two tokens, 0b1 and 2, as 2 isn't a valid base 2 digit. This patch changes that to collapse them into one (and makes `0b12` etc. an error: 2 isn't a valid base 2 digit). This may break some macro invocations of macros with `tt` (or syntax extensions) that rely on adjacent digits being separate tokens and hence is a [breaking-change] The fix is to separate the tokens, e.g. `0b12` -> `0b1 2`. cc rust-lang/rfcs#879
1 parent f002640 commit 606f50c

File tree

5 files changed

+75
-15
lines changed

5 files changed

+75
-15
lines changed

src/libsyntax/parse/lexer/mod.rs

+25-13
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ impl<'a> StringReader<'a> {
621621
let base = 10;
622622

623623
// find the integer representing the name
624-
self.scan_digits(base);
624+
self.scan_digits(base, base);
625625
let encoded_name : u32 = self.with_str_from(start_bpos, |s| {
626626
num::from_str_radix(s, 10).unwrap_or_else(|_| {
627627
panic!("expected digits representing a name, got {:?}, {}, range [{:?},{:?}]",
@@ -639,7 +639,7 @@ impl<'a> StringReader<'a> {
639639

640640
// find the integer representing the ctxt
641641
let start_bpos = self.last_pos;
642-
self.scan_digits(base);
642+
self.scan_digits(base, base);
643643
let encoded_ctxt : ast::SyntaxContext = self.with_str_from(start_bpos, |s| {
644644
num::from_str_radix(s, 10).unwrap_or_else(|_| {
645645
panic!("expected digits representing a ctxt, got {:?}, {}", s, whence);
@@ -653,16 +653,28 @@ impl<'a> StringReader<'a> {
653653
ctxt: encoded_ctxt, }
654654
}
655655

656-
/// Scan through any digits (base `radix`) or underscores, and return how
657-
/// many digits there were.
658-
fn scan_digits(&mut self, radix: u32) -> usize {
656+
/// Scan through any digits (base `scan_radix`) or underscores,
657+
/// and return how many digits there were.
658+
///
659+
/// `real_radix` represents the true radix of the number we're
660+
/// interested in, and errors will be emitted for any digits
661+
/// between `real_radix` and `scan_radix`.
662+
fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
663+
assert!(real_radix <= scan_radix);
659664
let mut len = 0;
660665
loop {
661666
let c = self.curr;
662667
if c == Some('_') { debug!("skipping a _"); self.bump(); continue; }
663-
match c.and_then(|cc| cc.to_digit(radix)) {
668+
match c.and_then(|cc| cc.to_digit(scan_radix)) {
664669
Some(_) => {
665670
debug!("{:?} in scan_digits", c);
671+
// check that the hypothetical digit is actually
672+
// in range for the true radix
673+
if c.unwrap().to_digit(real_radix).is_none() {
674+
self.err_span_(self.last_pos, self.pos,
675+
&format!("invalid digit for a base {} literal",
676+
real_radix));
677+
}
666678
len += 1;
667679
self.bump();
668680
}
@@ -681,19 +693,19 @@ impl<'a> StringReader<'a> {
681693

682694
if c == '0' {
683695
match self.curr.unwrap_or('\0') {
684-
'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2); }
685-
'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8); }
686-
'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16); }
696+
'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2, 10); }
697+
'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8, 10); }
698+
'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16, 16); }
687699
'0'...'9' | '_' | '.' => {
688-
num_digits = self.scan_digits(10) + 1;
700+
num_digits = self.scan_digits(10, 10) + 1;
689701
}
690702
_ => {
691703
// just a 0
692704
return token::Integer(self.name_from(start_bpos));
693705
}
694706
}
695707
} else if c.is_digit(10) {
696-
num_digits = self.scan_digits(10) + 1;
708+
num_digits = self.scan_digits(10, 10) + 1;
697709
} else {
698710
num_digits = 0;
699711
}
@@ -712,7 +724,7 @@ impl<'a> StringReader<'a> {
712724
// with a number
713725
self.bump();
714726
if self.curr.unwrap_or('\0').is_digit(10) {
715-
self.scan_digits(10);
727+
self.scan_digits(10, 10);
716728
self.scan_float_exponent();
717729
}
718730
let last_pos = self.last_pos;
@@ -935,7 +947,7 @@ impl<'a> StringReader<'a> {
935947
if self.curr_is('-') || self.curr_is('+') {
936948
self.bump();
937949
}
938-
if self.scan_digits(10) == 0 {
950+
if self.scan_digits(10, 10) == 0 {
939951
self.err_span_(self.last_pos, self.pos, "expected at least one digit in exponent")
940952
}
941953
}

src/libsyntax/parse/mod.rs

+14-1
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,20 @@ pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) ->
735735

736736
let res: u64 = match ::std::num::from_str_radix(s, base).ok() {
737737
Some(r) => r,
738-
None => { sd.span_err(sp, "int literal is too large"); 0 }
738+
None => {
739+
// small bases are lexed as if they were base 10, e.g, the string
740+
// might be `0b10201`. This will cause the conversion above to fail,
741+
// but these cases have errors in the lexer: we don't want to emit
742+
// two errors, and we especially don't want to emit this error since
743+
// it isn't necessarily true.
744+
let already_errored = base < 10 &&
745+
s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base));
746+
747+
if !already_errored {
748+
sd.span_err(sp, "int literal is too large");
749+
}
750+
0
751+
}
739752
};
740753

741754
// adjust the sign

src/test/parse-fail/issue-1802-1.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@
1010

1111
// error-pattern:no valid digits found for number
1212
fn main() {
13-
log(error, 0b42);
13+
log(error, 0b);
1414
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
fn main() {
12+
0b121; //~ ERROR invalid digit for a base 2 literal
13+
0b10_10301; //~ ERROR invalid digit for a base 2 literal
14+
0b30; //~ ERROR invalid digit for a base 2 literal
15+
0b41; //~ ERROR invalid digit for a base 2 literal
16+
0b5; //~ ERROR invalid digit for a base 2 literal
17+
0b6; //~ ERROR invalid digit for a base 2 literal
18+
0b7; //~ ERROR invalid digit for a base 2 literal
19+
0b8; //~ ERROR invalid digit for a base 2 literal
20+
0b9; //~ ERROR invalid digit for a base 2 literal
21+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
fn main() {
12+
0o18; //~ ERROR invalid digit for a base 8 literal
13+
0o1234_9_5670; //~ ERROR invalid digit for a base 8 literal
14+
}

0 commit comments

Comments
 (0)