Skip to content

Commit 96ad113

Browse files
committed
WIP splitCap rewrite
1 parent 2e5f186 commit 96ad113

File tree

2 files changed

+134
-46
lines changed

2 files changed

+134
-46
lines changed

src/Parsing/String/Replace.purs

Lines changed: 132 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -69,21 +69,24 @@ module Parsing.String.Replace
6969

7070
import Prelude
7171

72-
import Control.Monad.Rec.Class (class MonadRec)
72+
import Control.Monad.Rec.Class (class MonadRec, Step(..), tailRecM)
7373
import Data.Either (Either(..), hush)
74-
import Data.List (List(..), (:))
75-
import Data.List.NonEmpty (NonEmptyList(..), fold1, singleton)
76-
import Data.Maybe (Maybe)
74+
import Data.List (List(..), foldl, uncons, (:))
75+
import Data.List.NonEmpty (NonEmptyList(..), fold1, fromList, singleton)
76+
import Data.Maybe (Maybe(..), fromJust)
7777
import Data.Newtype (unwrap, wrap)
7878
import Data.NonEmpty ((:|))
79-
import Data.String (null)
79+
import Data.String (CodePoint, null)
80+
import Data.String as CodePoint
81+
import Data.String as String
8082
import Data.String.CodeUnits as CodeUnits
8183
import Data.Traversable (for)
8284
import Data.Tuple (Tuple(..))
83-
import Data.Tuple.Nested (T3, (/\))
84-
import Parsing (ParseState(..), Parser, ParserT, getParserT, runParserT)
85-
import Parsing.Combinators (many, try)
86-
import Parsing.String (anyCodePoint, anyTill)
85+
import Data.Tuple.Nested (T3(..), Tuple3(..), (/\))
86+
import Parsing (ParseState(..), Parser, ParserT, Position(..), getParserT, position, runParserT)
87+
import Parsing.Combinators (many, optionMaybe, try)
88+
import Parsing.String (anyCodePoint, anyTill, rest)
89+
import Partial.Unsafe (unsafePartial)
8790

8891
-- | Monad transformer version of `breakCap`. The `sep` parser will run
8992
-- | in the monad context.
@@ -208,50 +211,134 @@ breakCap input sep = unwrap $ breakCapT input sep
208211
-- | ```purescript
209212
-- | [Right ('A' /\ 1), Left " ", Right ('B' /\ 2)] /\ 2
210213
-- | ```
214+
215+
216+
-- splitCapT
217+
-- :: forall m a
218+
-- . Monad m
219+
-- => MonadRec m
220+
-- => String
221+
-- -> ParserT String m a
222+
-- -> m (NonEmptyList (Either String a))
223+
-- splitCapT input sep =
224+
-- -- We have to make sure we reasonably handle two cases:
225+
-- -- 1. The sep parser fails but consumes input
226+
-- -- 2. The sep parser succeeds but does not consume any input
227+
-- runParse >>= case _ of
228+
-- Left _ -> pure $ singleton $ Left input
229+
-- Right (Tuple xs remain) -> do
230+
-- let final = if null remain then Nil else (Left remain) : Nil
231+
-- let
232+
-- go Nil = final
233+
-- go ((Tuple "" result) : rest) =
234+
-- Right result : go rest
235+
-- go ((Tuple prefix result) : rest) =
236+
-- Left prefix : Right result : go rest
237+
-- case xs of
238+
-- Nil -> pure $ singleton $ Left input
239+
-- Cons (Tuple "" result) rest -> do
240+
-- let rest' = go rest
241+
-- pure $ NonEmptyList $ Right result :| rest'
242+
-- Cons (Tuple prefix result) rest -> do
243+
-- let rest' = go rest
244+
-- pure $ NonEmptyList $ Left prefix :| Right result : rest'
245+
-- where
246+
-- runParse = runParserT input $ do
247+
-- -- TODO forceProgress on every failed parse? can we do better?
248+
-- -- xs <- many (anyTill $ forceProgress sep)
249+
-- -- xs <- many (try $ forceProgress $ anyTill sep)
250+
-- xs <- many (try anyTill sep)
251+
-- ParseState remain _ _ <- getParserT
252+
-- pure $ Tuple xs remain
253+
-- -- If the parser succeeds but consumes no input, then force it to skip
254+
-- -- ahead one char.
255+
-- forceProgress p = do
256+
-- ParseState remain0 _ _ <- getParserT
257+
-- x <- p
258+
-- ParseState remain1 _ _ <- getParserT
259+
-- if CodeUnits.length remain0 == CodeUnits.length remain1 then -- p succeeded but consumed nothing
260+
--
261+
-- anyCodePoint *> pure x
262+
-- else
263+
-- pure x
264+
265+
266+
211267
splitCapT
212268
:: forall m a
213269
. Monad m
214270
=> MonadRec m
215271
=> String
216272
-> ParserT String m a
217273
-> m (NonEmptyList (Either String a))
218-
splitCapT input sep =
219-
runParse >>= case _ of
220-
Left _ -> pure $ singleton $ Left input
221-
Right (Tuple xs remain) -> do
222-
let final = if null remain then Nil else (Left remain) : Nil
223-
let
224-
go Nil = final
225-
go ((Tuple "" result) : rest) =
226-
Right result : go rest
227-
go ((Tuple prefix result) : rest) =
228-
Left prefix : Right result : go rest
229-
case xs of
230-
Nil -> pure $ singleton $ Left input
231-
Cons (Tuple "" result) rest -> do
232-
let rest' = go rest
233-
pure $ NonEmptyList $ Right result :| rest'
234-
Cons (Tuple prefix result) rest -> do
235-
let rest' = go rest
236-
pure $ NonEmptyList $ Left prefix :| Right result : rest'
274+
splitCapT input sep = do
275+
-- We have to make sure we reasonably handle two cases:
276+
-- 1. The sep parser fails but consumes input
277+
-- 2. The sep parser succeeds but does not consume any input
278+
runParserT input (Tuple <$> (splitCapCombinator sep) <*> rest) >>= case _ of
279+
Left _ -> pure (singleton (Left input))
280+
Right (Tuple (Tuple rlist _) remain) -> case uncons rlist of
281+
Nothing -> pure (singleton (Left input))
282+
Just {head: Tuple hunmatched hmatched ,tail} ->
283+
let term =
284+
if String.null remain then
285+
if String.null hunmatched then
286+
(Right hmatched : Nil)
287+
else
288+
(Left hunmatched : Right hmatched : Nil)
289+
else
290+
if String.null hunmatched then
291+
(Right hmatched : Left remain : Nil)
292+
else
293+
(Left hunmatched : Right hmatched : Left remain : Nil)
294+
list = foldl (\ls (Tuple unmatched matched) ->
295+
if String.null unmatched then
296+
(Right matched : ls)
297+
else
298+
(Left unmatched : Right matched : ls)
299+
) term tail
300+
in
301+
pure $ unsafePartial $ fromJust $ fromList list
302+
303+
-- | Internal splitCap helper combinator. Returns
304+
-- | - the reversed `many anyTill` List
305+
-- | - the length of the nonempty tuple elements in the List.
306+
splitCapCombinator
307+
:: forall m a
308+
. Monad m
309+
=> ParserT String m a
310+
-> ParserT String m (Tuple (List (Tuple String a)) Int)
311+
splitCapCombinator sep = tailRecM accum {lastPosIndex:0,carry:Nothing,list:Nil,arraySize:0}
237312
where
238-
runParse = runParserT input $ do
239-
-- TODO forceProgress on every failed parse? can we do better?
240-
-- xs <- many (anyTill $ forceProgress sep)
241-
xs <- many (try $ forceProgress $ anyTill sep)
242-
ParseState remain _ _ <- getParserT
243-
pure $ Tuple xs remain
244-
-- If the parser succeeds but consumes no input, then force it to skip
245-
-- ahead one char.
246-
forceProgress p = do
247-
ParseState remain0 _ _ <- getParserT
248-
x <- p
249-
ParseState remain1 _ _ <- getParserT
250-
if CodeUnits.length remain0 == CodeUnits.length remain1 then -- p succeeded but consumed nothing
313+
accum :: {lastPosIndex::Int,carry::Maybe CodePoint,list::List (Tuple String a),arraySize::Int} -> ParserT String m (Step {lastPosIndex::Int,carry::Maybe CodePoint,list::List (Tuple String a),arraySize::Int} (Tuple (List (Tuple String a)) Int))
314+
accum {lastPosIndex,carry,list,arraySize} = do
315+
optionMaybe (try $ anyTill sep) >>= case _ of
316+
Just (Tuple unmatched matched) -> do
317+
let
318+
carry_unmatched = case carry of
319+
Nothing -> unmatched
320+
Just cp -> CodePoint.singleton cp <> unmatched
321+
Position {index:posIndex} <- position
322+
if posIndex == lastPosIndex then do
323+
-- sep succeeded but consumed no input, so advance by one codepoint and
324+
-- carry the codepoint over to cons to the next unmatched
325+
carryNext <- anyCodePoint
326+
pure $ Loop
327+
{ lastPosIndex: posIndex+1
328+
, carry: Just carryNext
329+
, list: (Tuple carry_unmatched matched : list)
330+
, arraySize: if String.null unmatched then arraySize + 1 else arraySize + 2
331+
}
332+
else
333+
pure $ Loop
334+
{ lastPosIndex: posIndex
335+
, carry: Nothing
336+
, list: (Tuple carry_unmatched matched : list)
337+
, arraySize: if String.null unmatched then arraySize + 1 else arraySize + 2
338+
}
339+
Nothing -> -- no pattern found, so we're done
340+
pure (Done (Tuple list arraySize))
251341

252-
anyCodePoint *> pure x
253-
else
254-
pure x
255342

256343
-- | #### Split on and capture all patterns
257344
-- |
@@ -385,6 +472,7 @@ streamEditT
385472
streamEditT input sep editor = do
386473
sections <- splitCapT input sep
387474
-- TODO Is this fold1 efficient like mconcat?
475+
-- Maybe we should use joinWith?
388476
map fold1 $ for sections $ case _ of
389477
Left l -> pure l
390478
Right r -> editor r

test/Main.purs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -926,11 +926,11 @@ main = do
926926
}
927927
assertEqual' "splitCap8"
928928
{ actual: splitCap "aa" (lookAhead $ string "a")
929-
, expected: NonEmptyList $ Right "a" :| Right "a" : Nil
929+
, expected: NonEmptyList $ Right "a" :| Left "a" : Right "a" : Left "a" : Nil
930930
}
931931
assertEqual' "splitCap8B"
932932
{ actual: splitCap "BaBa" (lookAhead $ string "B")
933-
, expected: NonEmptyList $ Right "B" :| Left "a" : Right "B" : Right "B" : Left "a" : Nil
933+
, expected: NonEmptyList $ Right "B" :| Left "Ba" : Right "B" : Left "Ba" : Nil
934934
}
935935
assertEqual' "splitCap9" $
936936
let

0 commit comments

Comments
 (0)