@@ -11,6 +11,7 @@ pub enum Token<'a> {
1111 Exclamation ,
1212 Question ,
1313 Colon ,
14+ EndOfLine ,
1415 Quote ( & ' a str ) ,
1516 Word ( & ' a str ) ,
1617}
@@ -24,6 +25,7 @@ impl fmt::Display for Token<'_> {
2425 Token :: Exclamation => write ! ( f, "!" ) ,
2526 Token :: Question => write ! ( f, "?" ) ,
2627 Token :: Colon => write ! ( f, ":" ) ,
28+ Token :: EndOfLine => Ok ( ( ) ) ,
2729 Token :: Quote ( body) => write ! ( f, r#""{}""# , body) ,
2830 Token :: Word ( word) => write ! ( f, "{}" , word) ,
2931 }
@@ -34,6 +36,7 @@ impl fmt::Display for Token<'_> {
3436pub struct Tokenizer < ' a > {
3537 input : & ' a str ,
3638 chars : Peekable < CharIndices < ' a > > ,
39+ end_of_input_emitted : bool ,
3740}
3841
3942#[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
@@ -74,6 +77,7 @@ impl<'a> Tokenizer<'a> {
7477 Tokenizer {
7578 input,
7679 chars : input. char_indices ( ) . peekable ( ) ,
80+ end_of_input_emitted : false ,
7781 }
7882 }
7983
@@ -86,7 +90,10 @@ impl<'a> Tokenizer<'a> {
8690 }
8791
8892 fn consume_whitespace ( & mut self ) {
89- while self . cur ( ) . map_or ( false , |c| c. 1 . is_whitespace ( ) ) {
93+ while self
94+ . cur ( )
95+ . map_or ( false , |c| c. 1 != '\n' && c. 1 . is_whitespace ( ) )
96+ {
9097 self . advance ( ) ;
9198 }
9299 }
@@ -100,6 +107,7 @@ impl<'a> Tokenizer<'a> {
100107 '!' => Some ( Token :: Exclamation ) ,
101108 '?' => Some ( Token :: Question ) ,
102109 ';' => Some ( Token :: Semi ) ,
110+ '\n' => Some ( Token :: EndOfLine ) ,
103111 _ => None ,
104112 }
105113 }
@@ -162,7 +170,12 @@ impl<'a> Tokenizer<'a> {
162170 pub fn next_token ( & mut self ) -> Result < Option < Token < ' a > > , Error < ' a > > {
163171 self . consume_whitespace ( ) ;
164172 if self . at_end ( ) {
165- return Ok ( None ) ;
173+ if self . end_of_input_emitted {
174+ return Ok ( None ) ;
175+ } else {
176+ self . end_of_input_emitted = true ;
177+ return Ok ( Some ( Token :: EndOfLine ) ) ;
178+ }
166179 }
167180 if let Some ( punct) = self . consume_punct ( ) {
168181 return Ok ( Some ( punct) ) ;
@@ -205,8 +218,17 @@ fn tokenize<'a>(input: &'a str) -> Result<Vec<Token<'a>>, Error<'a>> {
205218#[ test]
206219fn tokenize_1 ( ) {
207220 assert_eq ! (
208- tokenize( "foo\t \r \n bar\n baz" ) . unwrap( ) ,
209- [ Token :: Word ( "foo" ) , Token :: Word ( "bar" ) , Token :: Word ( "baz" ) , ]
221+ tokenize( "foo\t \r \n \n bar\n baz\n " ) . unwrap( ) ,
222+ [
223+ Token :: Word ( "foo" ) ,
224+ Token :: EndOfLine ,
225+ Token :: EndOfLine ,
226+ Token :: Word ( "bar" ) ,
227+ Token :: EndOfLine ,
228+ Token :: Word ( "baz" ) ,
229+ Token :: EndOfLine ,
230+ Token :: EndOfLine ,
231+ ]
210232 ) ;
211233}
212234
@@ -221,7 +243,8 @@ fn tokenize_2() {
221243 Token :: Dot ,
222244 Token :: Comma ,
223245 Token :: Dot ,
224- Token :: Comma
246+ Token :: Comma ,
247+ Token :: EndOfLine ,
225248 ]
226249 ) ;
227250}
@@ -234,7 +257,8 @@ fn tokenize_whitespace_dots() {
234257 Token :: Word ( "baz" ) ,
235258 Token :: Dot ,
236259 Token :: Comma ,
237- Token :: Word ( "bar" )
260+ Token :: Word ( "bar" ) ,
261+ Token :: EndOfLine ,
238262 ]
239263 ) ;
240264}
@@ -248,6 +272,7 @@ fn tokenize_3() {
248272 Token :: Comma ,
249273 Token :: Word ( "and" ) ,
250274 Token :: Word ( "-baz" ) ,
275+ Token :: EndOfLine ,
251276 ]
252277 ) ;
253278}
@@ -256,13 +281,21 @@ fn tokenize_3() {
256281fn tokenize_4 ( ) {
257282 assert_eq ! (
258283 tokenize( ", , b" ) . unwrap( ) ,
259- [ Token :: Comma , Token :: Comma , Token :: Word ( "b" ) ]
284+ [
285+ Token :: Comma ,
286+ Token :: Comma ,
287+ Token :: Word ( "b" ) ,
288+ Token :: EndOfLine ,
289+ ]
260290 ) ;
261291}
262292
263293#[ test]
264294fn tokenize_5 ( ) {
265- assert_eq ! ( tokenize( r#""testing""# ) . unwrap( ) , [ Token :: Quote ( "testing" ) ] ) ;
295+ assert_eq ! (
296+ tokenize( r#""testing""# ) . unwrap( ) ,
297+ [ Token :: Quote ( "testing" ) , Token :: EndOfLine , ]
298+ ) ;
266299}
267300
268301#[ test]
0 commit comments