Skip to content

Commit c5780e0

Browse files
committed
Revert "Add comments and whitespace tokens to the lexer"
This reverts commit d752e49.
1 parent 15c2aea commit c5780e0

File tree

3 files changed

+110
-209
lines changed

3 files changed

+110
-209
lines changed

Cabal-syntax/src/Distribution/Fields/Lexer.x

Lines changed: 51 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#endif
1616
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
1717
module Distribution.Fields.Lexer
18-
(ltest, lexString, lexByteString, lexToken, Token(..), LToken(..)
18+
(ltest, lexToken, Token(..), LToken(..)
1919
,bol_section, in_section, in_field_layout, in_field_braces
2020
,mkLexState) where
2121

@@ -82,102 +82,85 @@ tokens :-
8282
}
8383

8484
<bol_section, bol_field_layout, bol_field_braces> {
85-
@nbspspacetab* @nl { \pos len inp -> do
86-
_ <- checkWhitespace pos len inp
87-
adjustPos retPos
88-
toki Whitespace pos len inp }
89-
-- FIXME: no @nl here to allow for comments on last line of the file with no trailing \n
90-
-- FIXME: TODO: check the lack of @nl works here including counting line numbers
91-
$spacetab* "--" $comment* { toki Comment }
85+
@nbspspacetab* @nl { \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken }
86+
-- no @nl here to allow for comments on last line of the file with no trailing \n
87+
$spacetab* "--" $comment* ; -- TODO: check the lack of @nl works here
88+
-- including counting line numbers
9289
}
9390

9491
<bol_section> {
95-
@nbspspacetab* { \pos len inp -> do
96-
len' <- checkLeadingWhitespace pos len inp
97-
-- len' is character whitespace length (counting nbsp as one)
98-
if B.length inp == len
99-
then return (L pos EOF)
100-
else do
101-
-- Small hack: if char and byte length mismatch
102-
-- subtract the difference, so lexToken will count position correctly.
103-
-- Proper (and slower) fix is to count utf8 length in lexToken
104-
when (len' /= len) $ adjustPos (incPos (len' - len))
105-
setStartCode in_section
106-
return (L pos (Indent len')) }
92+
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' ->
93+
-- len' is character whitespace length (counting nbsp as one)
94+
if B.length inp == len
95+
then return (L pos EOF)
96+
else do
97+
-- Small hack: if char and byte length mismatch
98+
-- subtract the difference, so lexToken will count position correctly.
99+
-- Proper (and slower) fix is to count utf8 length in lexToken
100+
when (len' /= len) $ adjustPos (incPos (len' - len))
101+
setStartCode in_section
102+
return (L pos (Indent len')) }
107103
$spacetab* \{ { tok OpenBrace }
108104
$spacetab* \} { tok CloseBrace }
109105
}
110106

111107
<in_section> {
112-
--TODO: don't allow tab as leading space
113-
$spacetab+ { toki Whitespace }
114-
115-
"--" $comment* { toki Comment }
116-
117-
@name { toki TokSym }
118-
@string { \pos len inp -> return $! L pos (TokStr (B.take (len - 2) (B.tail inp))) }
119-
@oplike { toki TokOther }
120-
$paren { toki TokOther }
121-
\: { tok Colon }
122-
\{ { tok OpenBrace }
123-
\} { tok CloseBrace }
124-
@nl { \pos len inp -> do
125-
adjustPos retPos
126-
setStartCode bol_section
127-
toki Whitespace pos len inp }
108+
$spacetab+ ; --TODO: don't allow tab as leading space
109+
110+
"--" $comment* ; -- TODO capture comments instead of deleting them
111+
112+
@name { toki TokSym }
113+
@string { \pos len inp -> return $! L pos (TokStr (B.take (len - 2) (B.tail inp))) }
114+
@oplike { toki TokOther }
115+
$paren { toki TokOther }
116+
\: { tok Colon }
117+
\{ { tok OpenBrace }
118+
\} { tok CloseBrace }
119+
@nl { \_ _ _ -> adjustPos retPos >> setStartCode bol_section >> lexToken }
128120
}
129121

130122
<bol_field_layout> {
131-
@nbspspacetab* { \pos len inp -> do
132-
len' <- checkLeadingWhitespace pos len inp
133-
if B.length inp == len
134-
then return (L pos EOF)
135-
else do
136-
-- Small hack: if char and byte length mismatch
137-
-- subtract the difference, so lexToken will count position correctly.
138-
-- Proper (and slower) fix is to count utf8 length in lexToken
139-
when (len' /= len) $ adjustPos (incPos (len' - len))
140-
setStartCode in_field_layout
141-
return (L pos (Indent len')) }
123+
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' ->
124+
if B.length inp == len
125+
then return (L pos EOF)
126+
else do
127+
-- Small hack: if char and byte length mismatch
128+
-- subtract the difference, so lexToken will count position correctly.
129+
-- Proper (and slower) fix is to count utf8 length in lexToken
130+
when (len' /= len) $ adjustPos (incPos (len' - len))
131+
setStartCode in_field_layout
132+
return (L pos (Indent len')) }
142133
}
143134

144135
<in_field_layout> {
145-
$spacetab+ { toki Whitespace }
146-
$field_layout' $field_layout* { toki TokFieldLine }
147-
@nl { \pos len inp -> do
148-
adjustPos retPos
149-
setStartCode bol_field_layout
150-
toki Whitespace pos len inp }
136+
$spacetab+;
137+
$field_layout' $field_layout* { toki TokFieldLine }
138+
@nl { \_ _ _ -> adjustPos retPos >> setStartCode bol_field_layout >> lexToken }
151139
}
152140

153141
<bol_field_braces> {
154-
() { \_ _ _ -> setStartCode in_field_braces >> lexToken }
142+
() { \_ _ _ -> setStartCode in_field_braces >> lexToken }
155143
}
156144

157145
<in_field_braces> {
158-
$spacetab+ { toki Whitespace }
146+
$spacetab+;
159147
$field_braces' $field_braces* { toki TokFieldLine }
160-
\{ { tok OpenBrace }
161-
\} { tok CloseBrace }
162-
@nl { \pos len inp -> do
163-
adjustPos retPos
164-
setStartCode bol_field_braces
165-
toki Whitespace pos len inp }
148+
\{ { tok OpenBrace }
149+
\} { tok CloseBrace }
150+
@nl { \_ _ _ -> adjustPos retPos >> setStartCode bol_field_braces >> lexToken }
166151
}
167152

168153
{
169154

170155
-- | Tokens of outer cabal file structure. Field values are treated opaquely.
171-
data Token = TokSym !ByteString -- ^ Haskell-like identifier, number or operator
172-
| TokStr !ByteString -- ^ String in quotes
173-
| TokOther !ByteString -- ^ Operators and parens
174-
| Indent !Int -- ^ Indentation token
156+
data Token = TokSym !ByteString -- ^ Haskell-like identifier, number or operator
157+
| TokStr !ByteString -- ^ String in quotes
158+
| TokOther !ByteString -- ^ Operators and parens
159+
| Indent !Int -- ^ Indentation token
175160
| TokFieldLine !ByteString -- ^ Lines after @:@
176161
| Colon
177162
| OpenBrace
178163
| CloseBrace
179-
| Whitespace !ByteString
180-
| Comment !ByteString
181164
| EOF
182165
| LexicalError InputStream --TODO: add separate string lexical error
183166
deriving Show
@@ -247,6 +230,7 @@ lexToken = do
247230
setInput inp'
248231
let !len_bytes = B.length inp - B.length inp'
249232
t <- action pos len_bytes inp
233+
--traceShow t $ return tok
250234
return t
251235

252236

@@ -275,29 +259,11 @@ lexAll = do
275259
_ -> do ts <- lexAll
276260
return (t : ts)
277261

278-
-- FIXME: for debugging
279-
lexAll' :: Lex [(Int, LToken)]
280-
lexAll' = do
281-
t <- lexToken
282-
c <- getStartCode
283-
case t of
284-
L _ EOF -> return [(c, t)]
285-
_ -> do ts <- lexAll'
286-
return ((c, t) : ts)
287-
288262
ltest :: Int -> String -> Prelude.IO ()
289263
ltest code s =
290264
let (ws, xs) = execLexer (setStartCode code >> lexAll) (B.Char8.pack s)
291265
in traverse_ print ws >> traverse_ print xs
292266

293-
lexString :: String -> ([LexWarning], [LToken])
294-
lexString = execLexer lexAll . B.Char8.pack
295-
296-
lexByteString :: ByteString -> ([LexWarning], [LToken])
297-
lexByteString = execLexer lexAll
298-
299-
lexByteString' :: ByteString -> ([LexWarning], [(Int, LToken)])
300-
lexByteString' = execLexer lexAll'
301267

302268
mkLexState :: ByteString -> LexState
303269
mkLexState input = LexState

0 commit comments

Comments
 (0)