|
4 | 4 | ragg/support |
5 | 5 | racket/set |
6 | 6 | racket/port |
| 7 | + racket/list |
7 | 8 | "grammar.rkt") |
8 | 9 | (provide tokenize) |
9 | 10 |
|
|
27 | 28 | operator-chars |
28 | 29 | (union "+" "-" "*" "/" "<=" ">=" "==" "<>" "<" ">")) |
29 | 30 |
|
30 | | - |
| 31 | +(define (get-middle-pos n pos) |
| 32 | + (position (+ n (position-offset pos)) |
| 33 | + (position-line pos) |
| 34 | + (+ n (position-col pos)))) |
| 35 | + |
31 | 36 | (define (tokenize ip) |
32 | 37 | (port-count-lines! ip) |
33 | | - (define (push-back-paren) |
34 | | - (set! ip (input-port-append #t (open-input-string " (") |
35 | | - ip))) |
| 38 | + (define extra-token #f) |
36 | 39 | (define my-lexer |
37 | 40 | (lexer-src-pos |
38 | 41 | ;; open parenthesis: preceded by space and not |
|
41 | 44 | ;; doing this at the tokenizer level, we don't need to deal |
42 | 45 | ;; with whitespace in the grammar. |
43 | 46 | ["((" |
44 | | - (begin |
45 | | - (push-back-paren) |
46 | | - (token PARENNOSPACE "("))] |
| 47 | + (let [(middle-pos (get-middle-pos 1 start-pos))] |
| 48 | + (return-without-pos |
| 49 | + (list (position-token (token PARENNOSPACE "(") start-pos middle-pos) |
| 50 | + (position-token (token PARENSPACE "(") middle-pos end-pos))))] |
47 | 51 | [(concatenation operator-chars "(") |
48 | | - (let [(op (substring lexeme 0 |
49 | | - (- (string-length lexeme) 1)))] |
50 | | - (push-back-paren) |
51 | | - (token op op))] |
| 52 | + (let* [(op (substring lexeme 0 |
| 53 | + (- (string-length lexeme) 1))) |
| 54 | + (op-len (string-length op)) |
| 55 | + (middle-pos (get-middle-pos op-len start-pos))] |
| 56 | + (return-without-pos |
| 57 | + (list (position-token (token op op) start-pos middle-pos) |
| 58 | + (position-token (token PARENSPACE "(") middle-pos end-pos))))] |
52 | 59 | [(concatenation whitespace "(") |
53 | 60 | (token PARENSPACE "(")] |
54 | 61 | ["(" |
|
112 | 119 | [(eof) |
113 | 120 | (void)] |
114 | 121 | )) |
115 | | - (define (next-token) (my-lexer ip)) |
| 122 | + (define (next-token) |
| 123 | + (if extra-token |
| 124 | + (let [(rv extra-token)] |
| 125 | + (set! extra-token #f) |
| 126 | + rv) |
| 127 | + (let [(tokens (my-lexer ip))] |
| 128 | + (cond |
| 129 | + [(list? tokens) (set! extra-token (second tokens)) |
| 130 | + (first tokens)] |
| 131 | + [else tokens])))) |
116 | 132 | next-token) |
0 commit comments