@@ -69,21 +69,24 @@ module Parsing.String.Replace
69
69
70
70
import Prelude
71
71
72
- import Control.Monad.Rec.Class (class MonadRec )
72
+ import Control.Monad.Rec.Class (class MonadRec , Step (..), tailRecM )
73
73
import Data.Either (Either (..), hush )
74
- import Data.List (List (..), (:))
75
- import Data.List.NonEmpty (NonEmptyList (..), fold1 , singleton )
76
- import Data.Maybe (Maybe )
74
+ import Data.List (List (..), foldl , uncons , (:))
75
+ import Data.List.NonEmpty (NonEmptyList (..), fold1 , fromList , singleton )
76
+ import Data.Maybe (Maybe (..), fromJust )
77
77
import Data.Newtype (unwrap , wrap )
78
78
import Data.NonEmpty ((:|))
79
- import Data.String (null )
79
+ import Data.String (CodePoint , null )
80
+ import Data.String as CodePoint
81
+ import Data.String as String
80
82
import Data.String.CodeUnits as CodeUnits
81
83
import Data.Traversable (for )
82
84
import Data.Tuple (Tuple (..))
83
- import Data.Tuple.Nested (T3 , (/\))
84
- import Parsing (ParseState (..), Parser , ParserT , getParserT , runParserT )
85
- import Parsing.Combinators (many , try )
86
- import Parsing.String (anyCodePoint , anyTill )
85
+ import Data.Tuple.Nested (T3 (..), Tuple3 (..), (/\))
86
+ import Parsing (ParseState (..), Parser , ParserT , Position (..), getParserT , position , runParserT )
87
+ import Parsing.Combinators (many , optionMaybe , try )
88
+ import Parsing.String (anyCodePoint , anyTill , rest )
89
+ import Partial.Unsafe (unsafePartial )
87
90
88
91
-- | Monad transformer version of `breakCap`. The `sep` parser will run
89
92
-- | in the monad context.
@@ -208,50 +211,134 @@ breakCap input sep = unwrap $ breakCapT input sep
208
211
-- | ```purescript
209
212
-- | [Right ('A' /\ 1), Left " ", Right ('B' /\ 2)] /\ 2
210
213
-- | ```
214
+
215
+
216
+ -- splitCapT
217
+ -- :: forall m a
218
+ -- . Monad m
219
+ -- => MonadRec m
220
+ -- => String
221
+ -- -> ParserT String m a
222
+ -- -> m (NonEmptyList (Either String a))
223
+ -- splitCapT input sep =
224
+ -- -- We have to make sure we reasonably handle two cases:
225
+ -- -- 1. The sep parser fails but consumes input
226
+ -- -- 2. The sep parser succeeds but does not consume any input
227
+ -- runParse >>= case _ of
228
+ -- Left _ -> pure $ singleton $ Left input
229
+ -- Right (Tuple xs remain) -> do
230
+ -- let final = if null remain then Nil else (Left remain) : Nil
231
+ -- let
232
+ -- go Nil = final
233
+ -- go ((Tuple "" result) : rest) =
234
+ -- Right result : go rest
235
+ -- go ((Tuple prefix result) : rest) =
236
+ -- Left prefix : Right result : go rest
237
+ -- case xs of
238
+ -- Nil -> pure $ singleton $ Left input
239
+ -- Cons (Tuple "" result) rest -> do
240
+ -- let rest' = go rest
241
+ -- pure $ NonEmptyList $ Right result :| rest'
242
+ -- Cons (Tuple prefix result) rest -> do
243
+ -- let rest' = go rest
244
+ -- pure $ NonEmptyList $ Left prefix :| Right result : rest'
245
+ -- where
246
+ -- runParse = runParserT input $ do
247
+ -- -- TODO forceProgress on every failed parse? can we do better?
248
+ -- -- xs <- many (anyTill $ forceProgress sep)
249
+ -- -- xs <- many (try $ forceProgress $ anyTill sep)
250
+ -- xs <- many (try anyTill sep)
251
+ -- ParseState remain _ _ <- getParserT
252
+ -- pure $ Tuple xs remain
253
+ -- -- If the parser succeeds but consumes no input, then force it to skip
254
+ -- -- ahead one char.
255
+ -- forceProgress p = do
256
+ -- ParseState remain0 _ _ <- getParserT
257
+ -- x <- p
258
+ -- ParseState remain1 _ _ <- getParserT
259
+ -- if CodeUnits.length remain0 == CodeUnits.length remain1 then -- p succeeded but consumed nothing
260
+ --
261
+ -- anyCodePoint *> pure x
262
+ -- else
263
+ -- pure x
264
+
265
+
266
+
211
267
splitCapT
212
268
:: forall m a
213
269
. Monad m
214
270
=> MonadRec m
215
271
=> String
216
272
-> ParserT String m a
217
273
-> m (NonEmptyList (Either String a ))
218
- splitCapT input sep =
219
- runParse >>= case _ of
220
- Left _ -> pure $ singleton $ Left input
221
- Right (Tuple xs remain) -> do
222
- let final = if null remain then Nil else (Left remain) : Nil
223
- let
224
- go Nil = final
225
- go ((Tuple " " result) : rest) =
226
- Right result : go rest
227
- go ((Tuple prefix result) : rest) =
228
- Left prefix : Right result : go rest
229
- case xs of
230
- Nil -> pure $ singleton $ Left input
231
- Cons (Tuple " " result) rest -> do
232
- let rest' = go rest
233
- pure $ NonEmptyList $ Right result :| rest'
234
- Cons (Tuple prefix result) rest -> do
235
- let rest' = go rest
236
- pure $ NonEmptyList $ Left prefix :| Right result : rest'
274
+ splitCapT input sep = do
275
+ -- We have to make sure we reasonably handle two cases:
276
+ -- 1. The sep parser fails but consumes input
277
+ -- 2. The sep parser succeeds but does not consume any input
278
+ runParserT input (Tuple <$> (splitCapCombinator sep) <*> rest) >>= case _ of
279
+ Left _ -> pure (singleton (Left input))
280
+ Right (Tuple (Tuple rlist _) remain) -> case uncons rlist of
281
+ Nothing -> pure (singleton (Left input))
282
+ Just {head: Tuple hunmatched hmatched ,tail} ->
283
+ let term =
284
+ if String .null remain then
285
+ if String .null hunmatched then
286
+ (Right hmatched : Nil )
287
+ else
288
+ (Left hunmatched : Right hmatched : Nil )
289
+ else
290
+ if String .null hunmatched then
291
+ (Right hmatched : Left remain : Nil )
292
+ else
293
+ (Left hunmatched : Right hmatched : Left remain : Nil )
294
+ list = foldl (\ls (Tuple unmatched matched) ->
295
+ if String .null unmatched then
296
+ (Right matched : ls)
297
+ else
298
+ (Left unmatched : Right matched : ls)
299
+ ) term tail
300
+ in
301
+ pure $ unsafePartial $ fromJust $ fromList list
302
+
303
+ -- | Internal splitCap helper combinator. Returns
304
+ -- | - the reversed `many anyTill` List
305
+ -- | - the length of the nonempty tuple elements in the List.
306
+ splitCapCombinator
307
+ :: forall m a
308
+ . Monad m
309
+ => ParserT String m a
310
+ -> ParserT String m (Tuple (List (Tuple String a )) Int )
311
+ splitCapCombinator sep = tailRecM accum {lastPosIndex:0 ,carry:Nothing ,list:Nil ,arraySize:0 }
237
312
where
238
- runParse = runParserT input $ do
239
- -- TODO forceProgress on every failed parse? can we do better?
240
- -- xs <- many (anyTill $ forceProgress sep)
241
- xs <- many (try $ forceProgress $ anyTill sep)
242
- ParseState remain _ _ <- getParserT
243
- pure $ Tuple xs remain
244
- -- If the parser succeeds but consumes no input, then force it to skip
245
- -- ahead one char.
246
- forceProgress p = do
247
- ParseState remain0 _ _ <- getParserT
248
- x <- p
249
- ParseState remain1 _ _ <- getParserT
250
- if CodeUnits .length remain0 == CodeUnits .length remain1 then -- p succeeded but consumed nothing
313
+ accum :: { lastPosIndex :: Int ,carry :: Maybe CodePoint ,list :: List (Tuple String a ),arraySize :: Int } -> ParserT String m (Step { lastPosIndex :: Int ,carry :: Maybe CodePoint ,list :: List (Tuple String a ),arraySize :: Int } (Tuple (List (Tuple String a )) Int ))
314
+ accum {lastPosIndex,carry,list,arraySize} = do
315
+ optionMaybe (try $ anyTill sep) >>= case _ of
316
+ Just (Tuple unmatched matched) -> do
317
+ let
318
+ carry_unmatched = case carry of
319
+ Nothing -> unmatched
320
+ Just cp -> CodePoint .singleton cp <> unmatched
321
+ Position {index:posIndex} <- position
322
+ if posIndex == lastPosIndex then do
323
+ -- sep succeeded but consumed no input, so advance by one codepoint and
324
+ -- carry the codepoint over to cons to the next unmatched
325
+ carryNext <- anyCodePoint
326
+ pure $ Loop
327
+ { lastPosIndex: posIndex+1
328
+ , carry: Just carryNext
329
+ , list: (Tuple carry_unmatched matched : list)
330
+ , arraySize: if String .null unmatched then arraySize + 1 else arraySize + 2
331
+ }
332
+ else
333
+ pure $ Loop
334
+ { lastPosIndex: posIndex
335
+ , carry: Nothing
336
+ , list: (Tuple carry_unmatched matched : list)
337
+ , arraySize: if String .null unmatched then arraySize + 1 else arraySize + 2
338
+ }
339
+ Nothing -> -- no pattern found, so we're done
340
+ pure (Done (Tuple list arraySize))
251
341
252
- anyCodePoint *> pure x
253
- else
254
- pure x
255
342
256
343
-- | #### Split on and capture all patterns
257
344
-- |
@@ -385,6 +472,7 @@ streamEditT
385
472
streamEditT input sep editor = do
386
473
sections <- splitCapT input sep
387
474
-- TODO Is this fold1 efficient like mconcat?
475
+ -- Maybe we should use joinWith?
388
476
map fold1 $ for sections $ case _ of
389
477
Left l -> pure l
390
478
Right r -> editor r
0 commit comments