@@ -130,7 +130,7 @@ impl<'a> StringReader<'a> {
130130 self . ch . is_none ( )
131131 }
132132
133- fn fail_unterminated_raw_string ( & self , pos : BytePos , hash_count : u16 ) {
133+ fn fail_unterminated_raw_string ( & self , pos : BytePos , hash_count : u16 ) -> ! {
134134 let mut err = self . struct_span_fatal ( pos, pos, "unterminated raw string" ) ;
135135 err. span_label ( self . mk_sp ( pos, pos) , "unterminated raw string" ) ;
136136
@@ -292,15 +292,6 @@ impl<'a> StringReader<'a> {
292292 self . sess . span_diagnostic . struct_span_fatal ( self . mk_sp ( from_pos, to_pos) , & m[ ..] )
293293 }
294294
295- /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
296- /// escaped character to the error message
297- fn err_span_char ( & self , from_pos : BytePos , to_pos : BytePos , m : & str , c : char ) {
298- let mut m = m. to_string ( ) ;
299- m. push_str ( ": " ) ;
300- push_escaped_char ( & mut m, c) ;
301- self . err_span_ ( from_pos, to_pos, & m[ ..] ) ;
302- }
303-
304295 /// Advance peek_token to refer to the next token, and
305296 /// possibly update the interner.
306297 fn advance_token ( & mut self ) -> Result < ( ) , ( ) > {
@@ -1070,7 +1061,13 @@ impl<'a> StringReader<'a> {
10701061 self . validate_byte_str_escape ( start_with_quote) ;
10711062 ( token:: ByteStr , symbol)
10721063 } ,
1073- Some ( 'r' ) => self . scan_raw_byte_string ( ) ,
1064+ Some ( 'r' ) => {
1065+ let ( start, end, hash_count) = self . scan_raw_string ( ) ;
1066+ let symbol = self . name_from_to ( start, end) ;
1067+ self . validate_raw_byte_str_escape ( start, end) ;
1068+
1069+ ( token:: ByteStrRaw ( hash_count) , symbol)
1070+ }
10741071 _ => unreachable ! ( ) , // Should have been a token::Ident above.
10751072 } ;
10761073 let suffix = self . scan_optional_raw_name ( ) ;
@@ -1086,79 +1083,9 @@ impl<'a> StringReader<'a> {
10861083 Ok ( TokenKind :: lit ( token:: Str , symbol, suffix) )
10871084 }
10881085 'r' => {
1089- let start_bpos = self . pos ;
1090- self . bump ( ) ;
1091- let mut hash_count: u16 = 0 ;
1092- while self . ch_is ( '#' ) {
1093- if hash_count == 65535 {
1094- let bpos = self . next_pos ;
1095- self . fatal_span_ ( start_bpos,
1096- bpos,
1097- "too many `#` symbols: raw strings may be \
1098- delimited by up to 65535 `#` symbols") . raise ( ) ;
1099- }
1100- self . bump ( ) ;
1101- hash_count += 1 ;
1102- }
1103-
1104- if self . is_eof ( ) {
1105- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1106- } else if !self . ch_is ( '"' ) {
1107- let last_bpos = self . pos ;
1108- let curr_char = self . ch . unwrap ( ) ;
1109- self . fatal_span_char ( start_bpos,
1110- last_bpos,
1111- "found invalid character; only `#` is allowed \
1112- in raw string delimitation",
1113- curr_char) . raise ( ) ;
1114- }
1115- self . bump ( ) ;
1116- let content_start_bpos = self . pos ;
1117- let mut content_end_bpos;
1118- let mut valid = true ;
1119- ' outer: loop {
1120- if self . is_eof ( ) {
1121- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1122- }
1123- // if self.ch_is('"') {
1124- // content_end_bpos = self.pos;
1125- // for _ in 0..hash_count {
1126- // self.bump();
1127- // if !self.ch_is('#') {
1128- // continue 'outer;
1129- let c = self . ch . unwrap ( ) ;
1130- match c {
1131- '"' => {
1132- content_end_bpos = self . pos ;
1133- for _ in 0 ..hash_count {
1134- self . bump ( ) ;
1135- if !self . ch_is ( '#' ) {
1136- continue ' outer;
1137- }
1138- }
1139- break ;
1140- }
1141- '\r' => {
1142- if !self . nextch_is ( '\n' ) {
1143- let last_bpos = self . pos ;
1144- self . err_span_ ( start_bpos,
1145- last_bpos,
1146- "bare CR not allowed in raw string, use \\ r \
1147- instead") ;
1148- valid = false ;
1149- }
1150- }
1151- _ => ( ) ,
1152- }
1153- self . bump ( ) ;
1154- }
1155-
1156- self . bump ( ) ;
1157- let symbol = if valid {
1158- self . name_from_to ( content_start_bpos, content_end_bpos)
1159- } else {
1160- Symbol :: intern ( "??" )
1161- } ;
1086+ let ( start, end, hash_count) = self . scan_raw_string ( ) ;
1087+ let symbol = self . name_from_to ( start, end) ;
1088+ self . validate_raw_str_escape ( start, end) ;
11621089 let suffix = self . scan_optional_raw_name ( ) ;
11631090
11641091 Ok ( TokenKind :: lit ( token:: StrRaw ( hash_count) , symbol, suffix) )
@@ -1315,16 +1242,18 @@ impl<'a> StringReader<'a> {
13151242 id
13161243 }
13171244
1318- fn scan_raw_byte_string ( & mut self ) -> ( token:: LitKind , Symbol ) {
1245+ /// Scans a raw (byte) string, returning byte position range for `"<literal>"`
1246+ /// (including quotes) along with `#` character count in `(b)r##..."<literal>"##...`;
1247+ fn scan_raw_string ( & mut self ) -> ( BytePos , BytePos , u16 ) {
13191248 let start_bpos = self . pos ;
13201249 self . bump ( ) ;
1321- let mut hash_count = 0 ;
1250+ let mut hash_count: u16 = 0 ;
13221251 while self . ch_is ( '#' ) {
13231252 if hash_count == 65535 {
13241253 let bpos = self . next_pos ;
13251254 self . fatal_span_ ( start_bpos,
13261255 bpos,
1327- "too many `#` symbols: raw byte strings may be \
1256+ "too many `#` symbols: raw strings may be \
13281257 delimited by up to 65535 `#` symbols") . raise ( ) ;
13291258 }
13301259 self . bump ( ) ;
@@ -1334,13 +1263,13 @@ impl<'a> StringReader<'a> {
13341263 if self . is_eof ( ) {
13351264 self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
13361265 } else if !self . ch_is ( '"' ) {
1337- let pos = self . pos ;
1338- let ch = self . ch . unwrap ( ) ;
1266+ let last_bpos = self . pos ;
1267+ let curr_char = self . ch . unwrap ( ) ;
13391268 self . fatal_span_char ( start_bpos,
1340- pos ,
1341- "found invalid character; only `#` is allowed in raw \
1342- string delimitation",
1343- ch ) . raise ( ) ;
1269+ last_bpos ,
1270+ "found invalid character; only `#` is allowed \
1271+ in raw string delimitation",
1272+ curr_char ) . raise ( ) ;
13441273 }
13451274 self . bump ( ) ;
13461275 let content_start_bpos = self . pos ;
@@ -1360,19 +1289,14 @@ impl<'a> StringReader<'a> {
13601289 }
13611290 break ;
13621291 }
1363- Some ( c) => {
1364- if c > '\x7F' {
1365- let pos = self . pos ;
1366- self . err_span_char ( pos, pos, "raw byte string must be ASCII" , c) ;
1367- }
1368- }
1292+ _ => ( ) ,
13691293 }
13701294 self . bump ( ) ;
13711295 }
13721296
13731297 self . bump ( ) ;
13741298
1375- ( token :: ByteStrRaw ( hash_count ) , self . name_from_to ( content_start_bpos, content_end_bpos) )
1299+ ( content_start_bpos, content_end_bpos, hash_count )
13761300 }
13771301
13781302 fn validate_char_escape ( & self , start_with_quote : BytePos ) {
@@ -1422,6 +1346,40 @@ impl<'a> StringReader<'a> {
14221346 } ) ;
14231347 }
14241348
1349+ fn validate_raw_str_escape ( & self , content_start : BytePos , content_end : BytePos ) {
1350+ self . with_str_from_to ( content_start, content_end, |lit : & str | {
1351+ unescape:: unescape_raw_str ( lit, & mut |range, c| {
1352+ if let Err ( err) = c {
1353+ emit_unescape_error (
1354+ & self . sess . span_diagnostic ,
1355+ lit,
1356+ self . mk_sp ( content_start - BytePos ( 1 ) , content_end + BytePos ( 1 ) ) ,
1357+ unescape:: Mode :: Str ,
1358+ range,
1359+ err,
1360+ )
1361+ }
1362+ } )
1363+ } ) ;
1364+ }
1365+
1366+ fn validate_raw_byte_str_escape ( & self , content_start : BytePos , content_end : BytePos ) {
1367+ self . with_str_from_to ( content_start, content_end, |lit : & str | {
1368+ unescape:: unescape_raw_byte_str ( lit, & mut |range, c| {
1369+ if let Err ( err) = c {
1370+ emit_unescape_error (
1371+ & self . sess . span_diagnostic ,
1372+ lit,
1373+ self . mk_sp ( content_start - BytePos ( 1 ) , content_end + BytePos ( 1 ) ) ,
1374+ unescape:: Mode :: ByteStr ,
1375+ range,
1376+ err,
1377+ )
1378+ }
1379+ } )
1380+ } ) ;
1381+ }
1382+
14251383 fn validate_byte_str_escape ( & self , start_with_quote : BytePos ) {
14261384 self . with_str_from_to ( start_with_quote + BytePos ( 1 ) , self . pos - BytePos ( 1 ) , |lit| {
14271385 unescape:: unescape_byte_str ( lit, & mut |range, c| {
0 commit comments