Skip to content

Commit face004

Browse files
committed
Return Span-annotated TokenOccurrence from tokenize()
1 parent 5636dc3 commit face004

File tree

2 files changed

+46
-5
lines changed

2 files changed

+46
-5
lines changed

src/parser.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ pub struct Parser {
7777

7878
impl Parser {
7979
/// Parse the specified tokens
80-
pub fn new(tokens: Vec<Token>) -> Self {
80+
pub fn new(token_occurences: Vec<TokenOccurrence>) -> Self {
81+
let tokens = token_occurences.into_iter().map(|to| to.token).collect();
8182
Parser { tokens, index: 0 }
8283
}
8384

src/tokenizer.rs

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,30 @@ use super::dialect::keywords::ALL_KEYWORDS;
2323
use super::dialect::Dialect;
2424
use std::fmt;
2525

26+
#[derive(Debug, Clone, PartialEq)]
27+
pub struct LineColumn {
28+
pub line: u64,
29+
pub column: u64,
30+
}
31+
32+
#[derive(Debug, Clone, PartialEq)]
33+
pub struct Span {
34+
pub start: LineColumn,
35+
pub end: LineColumn,
36+
}
37+
38+
#[derive(Debug, Clone, PartialEq)]
39+
pub struct TokenOccurrence {
40+
pub token: Token,
41+
pub span: Span,
42+
}
43+
44+
impl fmt::Display for TokenOccurrence {
45+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46+
self.token.fmt(f)
47+
}
48+
}
49+
2650
/// SQL Token enumeration
2751
#[derive(Debug, Clone, PartialEq)]
2852
pub enum Token {
@@ -232,12 +256,17 @@ impl<'a> Tokenizer<'a> {
232256
}
233257

234258
/// Tokenize the statement and produce a vector of tokens
235-
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
259+
pub fn tokenize(&mut self) -> Result<Vec<TokenOccurrence>, TokenizerError> {
236260
let mut peekable = self.query.chars().peekable();
237261

238-
let mut tokens: Vec<Token> = vec![];
262+
let mut tokens: Vec<TokenOccurrence> = vec![];
239263

240264
while let Some(token) = self.next_token(&mut peekable)? {
265+
let token_start = LineColumn {
266+
line: self.line,
267+
column: self.col,
268+
};
269+
241270
match &token {
242271
Token::Whitespace(Whitespace::Newline) => {
243272
self.line += 1;
@@ -252,8 +281,18 @@ impl<'a> Tokenizer<'a> {
252281
_ => self.col += 1,
253282
}
254283

255-
tokens.push(token);
284+
tokens.push(TokenOccurrence {
285+
token,
286+
span: Span {
287+
start: token_start,
288+
end: LineColumn {
289+
line: self.line,
290+
column: self.col,
291+
},
292+
},
293+
});
256294
}
295+
257296
Ok(tokens)
258297
}
259298

@@ -805,7 +844,8 @@ mod tests {
805844
compare(expected, tokens);
806845
}
807846

808-
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
847+
fn compare(expected: Vec<Token>, actual: Vec<TokenOccurrence>) {
848+
let actual: Vec<_> = actual.into_iter().map(|to| to.token).collect();
809849
//println!("------------------------------");
810850
//println!("tokens = {:?}", actual);
811851
//println!("expected = {:?}", expected);

0 commit comments

Comments
 (0)