|
15 | 15 | #endif
|
16 | 16 | {-# OPTIONS_GHC -fno-warn-unused-imports #-}
|
17 | 17 | module Distribution.Fields.Lexer
|
18 |
| - (ltest, lexString, lexByteString, lexToken, Token(..), LToken(..) |
| 18 | + (ltest, lexToken, Token(..), LToken(..) |
19 | 19 | ,bol_section, in_section, in_field_layout, in_field_braces
|
20 | 20 | ,mkLexState) where
|
21 | 21 |
|
@@ -82,102 +82,85 @@ tokens :-
|
82 | 82 | }
|
83 | 83 |
|
84 | 84 | <bol_section, bol_field_layout, bol_field_braces> {
|
85 |
| - @nbspspacetab* @nl { \pos len inp -> do |
86 |
| - _ <- checkWhitespace pos len inp |
87 |
| - adjustPos retPos |
88 |
| - toki Whitespace pos len inp } |
89 |
| - -- FIXME: no @nl here to allow for comments on last line of the file with no trailing \n |
90 |
| - -- FIXME: TODO: check the lack of @nl works here including counting line numbers |
91 |
| - $spacetab* "--" $comment* { toki Comment } |
| 85 | + @nbspspacetab* @nl { \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken } |
| 86 | + -- no @nl here to allow for comments on last line of the file with no trailing \n |
| 87 | + $spacetab* "--" $comment* ; -- TODO: check the lack of @nl works here |
| 88 | + -- including counting line numbers |
92 | 89 | }
|
93 | 90 |
|
94 | 91 | <bol_section> {
|
95 |
| - @nbspspacetab* { \pos len inp -> do |
96 |
| - len' <- checkLeadingWhitespace pos len inp |
97 |
| - -- len' is character whitespace length (counting nbsp as one) |
98 |
| - if B.length inp == len |
99 |
| - then return (L pos EOF) |
100 |
| - else do |
101 |
| - -- Small hack: if char and byte length mismatch |
102 |
| - -- subtract the difference, so lexToken will count position correctly. |
103 |
| - -- Proper (and slower) fix is to count utf8 length in lexToken |
104 |
| - when (len' /= len) $ adjustPos (incPos (len' - len)) |
105 |
| - setStartCode in_section |
106 |
| - return (L pos (Indent len')) } |
| 92 | + @nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' -> |
| 93 | + -- len' is character whitespace length (counting nbsp as one) |
| 94 | + if B.length inp == len |
| 95 | + then return (L pos EOF) |
| 96 | + else do |
| 97 | + -- Small hack: if char and byte length mismatch |
| 98 | + -- subtract the difference, so lexToken will count position correctly. |
| 99 | + -- Proper (and slower) fix is to count utf8 length in lexToken |
| 100 | + when (len' /= len) $ adjustPos (incPos (len' - len)) |
| 101 | + setStartCode in_section |
| 102 | + return (L pos (Indent len')) } |
107 | 103 | $spacetab* \{ { tok OpenBrace }
|
108 | 104 | $spacetab* \} { tok CloseBrace }
|
109 | 105 | }
|
110 | 106 |
|
111 | 107 | <in_section> {
|
112 |
| - --TODO: don't allow tab as leading space |
113 |
| - $spacetab+ { toki Whitespace } |
114 |
| - |
115 |
| - "--" $comment* { toki Comment } |
116 |
| - |
117 |
| - @name { toki TokSym } |
118 |
| - @string { \pos len inp -> return $! L pos (TokStr (B.take (len - 2) (B.tail inp))) } |
119 |
| - @oplike { toki TokOther } |
120 |
| - $paren { toki TokOther } |
121 |
| - \: { tok Colon } |
122 |
| - \{ { tok OpenBrace } |
123 |
| - \} { tok CloseBrace } |
124 |
| - @nl { \pos len inp -> do |
125 |
| - adjustPos retPos |
126 |
| - setStartCode bol_section |
127 |
| - toki Whitespace pos len inp } |
| 108 | + $spacetab+ ; --TODO: don't allow tab as leading space |
| 109 | + |
| 110 | + "--" $comment* ; -- TODO capture comments instead of deleting them |
| 111 | + |
| 112 | + @name { toki TokSym } |
| 113 | + @string { \pos len inp -> return $! L pos (TokStr (B.take (len - 2) (B.tail inp))) } |
| 114 | + @oplike { toki TokOther } |
| 115 | + $paren { toki TokOther } |
| 116 | + \: { tok Colon } |
| 117 | + \{ { tok OpenBrace } |
| 118 | + \} { tok CloseBrace } |
| 119 | + @nl { \_ _ _ -> adjustPos retPos >> setStartCode bol_section >> lexToken } |
128 | 120 | }
|
129 | 121 |
|
130 | 122 | <bol_field_layout> {
|
131 |
| - @nbspspacetab* { \pos len inp -> do |
132 |
| - len' <- checkLeadingWhitespace pos len inp |
133 |
| - if B.length inp == len |
134 |
| - then return (L pos EOF) |
135 |
| - else do |
136 |
| - -- Small hack: if char and byte length mismatch |
137 |
| - -- subtract the difference, so lexToken will count position correctly. |
138 |
| - -- Proper (and slower) fix is to count utf8 length in lexToken |
139 |
| - when (len' /= len) $ adjustPos (incPos (len' - len)) |
140 |
| - setStartCode in_field_layout |
141 |
| - return (L pos (Indent len')) } |
| 123 | + @nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' -> |
| 124 | + if B.length inp == len |
| 125 | + then return (L pos EOF) |
| 126 | + else do |
| 127 | + -- Small hack: if char and byte length mismatch |
| 128 | + -- subtract the difference, so lexToken will count position correctly. |
| 129 | + -- Proper (and slower) fix is to count utf8 length in lexToken |
| 130 | + when (len' /= len) $ adjustPos (incPos (len' - len)) |
| 131 | + setStartCode in_field_layout |
| 132 | + return (L pos (Indent len')) } |
142 | 133 | }
|
143 | 134 |
|
144 | 135 | <in_field_layout> {
|
145 |
| - $spacetab+ { toki Whitespace } |
146 |
| - $field_layout' $field_layout* { toki TokFieldLine } |
147 |
| - @nl { \pos len inp -> do |
148 |
| - adjustPos retPos |
149 |
| - setStartCode bol_field_layout |
150 |
| - toki Whitespace pos len inp } |
| 136 | + $spacetab+; |
| 137 | + $field_layout' $field_layout* { toki TokFieldLine } |
| 138 | + @nl { \_ _ _ -> adjustPos retPos >> setStartCode bol_field_layout >> lexToken } |
151 | 139 | }
|
152 | 140 |
|
153 | 141 | <bol_field_braces> {
|
154 |
| - () { \_ _ _ -> setStartCode in_field_braces >> lexToken } |
| 142 | + () { \_ _ _ -> setStartCode in_field_braces >> lexToken } |
155 | 143 | }
|
156 | 144 |
|
157 | 145 | <in_field_braces> {
|
158 |
| - $spacetab+ { toki Whitespace } |
| 146 | + $spacetab+; |
159 | 147 | $field_braces' $field_braces* { toki TokFieldLine }
|
160 |
| - \{ { tok OpenBrace } |
161 |
| - \} { tok CloseBrace } |
162 |
| - @nl { \pos len inp -> do |
163 |
| - adjustPos retPos |
164 |
| - setStartCode bol_field_braces |
165 |
| - toki Whitespace pos len inp } |
| 148 | + \{ { tok OpenBrace } |
| 149 | + \} { tok CloseBrace } |
| 150 | + @nl { \_ _ _ -> adjustPos retPos >> setStartCode bol_field_braces >> lexToken } |
166 | 151 | }
|
167 | 152 |
|
168 | 153 | {
|
169 | 154 |
|
170 | 155 | -- | Tokens of outer cabal file structure. Field values are treated opaquely.
|
171 |
| -data Token = TokSym !ByteString -- ^ Haskell-like identifier, number or operator |
172 |
| - | TokStr !ByteString -- ^ String in quotes |
173 |
| - | TokOther !ByteString -- ^ Operators and parens |
174 |
| - | Indent !Int -- ^ Indentation token |
| 156 | +data Token = TokSym !ByteString -- ^ Haskell-like identifier, number or operator |
| 157 | + | TokStr !ByteString -- ^ String in quotes |
| 158 | + | TokOther !ByteString -- ^ Operators and parens |
| 159 | + | Indent !Int -- ^ Indentation token |
175 | 160 | | TokFieldLine !ByteString -- ^ Lines after @:@
|
176 | 161 | | Colon
|
177 | 162 | | OpenBrace
|
178 | 163 | | CloseBrace
|
179 |
| - | Whitespace !ByteString |
180 |
| - | Comment !ByteString |
181 | 164 | | EOF
|
182 | 165 | | LexicalError InputStream --TODO: add separate string lexical error
|
183 | 166 | deriving Show
|
@@ -247,6 +230,7 @@ lexToken = do
|
247 | 230 | setInput inp'
|
248 | 231 | let !len_bytes = B.length inp - B.length inp'
|
249 | 232 | t <- action pos len_bytes inp
|
| 233 | + --traceShow t $ return tok |
250 | 234 | return t
|
251 | 235 |
|
252 | 236 |
|
@@ -275,29 +259,11 @@ lexAll = do
|
275 | 259 | _ -> do ts <- lexAll
|
276 | 260 | return (t : ts)
|
277 | 261 |
|
278 |
| --- FIXME: for debugging |
279 |
| -lexAll' :: Lex [(Int, LToken)] |
280 |
| -lexAll' = do |
281 |
| - t <- lexToken |
282 |
| - c <- getStartCode |
283 |
| - case t of |
284 |
| - L _ EOF -> return [(c, t)] |
285 |
| - _ -> do ts <- lexAll' |
286 |
| - return ((c, t) : ts) |
287 |
| - |
288 | 262 | ltest :: Int -> String -> Prelude.IO ()
|
289 | 263 | ltest code s =
|
290 | 264 | let (ws, xs) = execLexer (setStartCode code >> lexAll) (B.Char8.pack s)
|
291 | 265 | in traverse_ print ws >> traverse_ print xs
|
292 | 266 |
|
293 |
| -lexString :: String -> ([LexWarning], [LToken]) |
294 |
| -lexString = execLexer lexAll . B.Char8.pack |
295 |
| - |
296 |
| -lexByteString :: ByteString -> ([LexWarning], [LToken]) |
297 |
| -lexByteString = execLexer lexAll |
298 |
| - |
299 |
| -lexByteString' :: ByteString -> ([LexWarning], [(Int, LToken)]) |
300 |
| -lexByteString' = execLexer lexAll' |
301 | 267 |
|
302 | 268 | mkLexState :: ByteString -> LexState
|
303 | 269 | mkLexState input = LexState
|
|
0 commit comments