From 50f128bb6a5646c8d5c429435684bf9f319a4d6e Mon Sep 17 00:00:00 2001 From: Fyodor Soikin Date: Wed, 5 Jan 2022 11:37:12 -0500 Subject: [PATCH 1/4] CodePoint versions of oneOf and noneOf --- src/Text/Parsing/Parser/String.purs | 12 +++++++++++- test/Main.purs | 8 +++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index f15048f..9f0b28d 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -25,7 +25,9 @@ module Text.Parsing.Parser.String , whiteSpace , skipSpaces , oneOf + , oneOfCodePoints , noneOf + , noneOfCodePoints , match ) where @@ -121,6 +123,14 @@ oneOf ss = satisfy (flip elem ss) ("one of " <> show ss) noneOf :: forall m. Monad m => Array Char -> ParserT String m Char noneOf ss = satisfy (flip notElem ss) ("none of " <> show ss) +-- | Match one of the Unicode characters in the array. +oneOfCodePoints :: forall m. Monad m => Array CodePoint -> ParserT String m CodePoint +oneOfCodePoints ss = satisfyCodePoint (flip elem ss) ("one of " <> show ss) + +-- | Match any Unicode character not in the array. +noneOfCodePoints :: forall m. Monad m => Array CodePoint -> ParserT String m CodePoint +noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) ("none of " <> show ss) + -- | Updates a `Position` by adding the columns and lines in `String`. updatePosString :: Position -> String -> Position updatePosString pos str = case uncons str of @@ -154,4 +164,4 @@ match p = do -- | This will break at runtime if the definition of CodePoint ever changes -- | to something other than `newtype CodePoint = CodePoint Int`. unCodePoint :: CodePoint -> Int -unCodePoint = unsafeCoerce \ No newline at end of file +unCodePoint = unsafeCoerce diff --git a/test/Main.purs b/test/Main.purs index 3281e15..d5cf642 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -5,6 +5,7 @@ import Prelude hiding (between, when) import Control.Alt ((<|>)) import Control.Lazy (fix) import Data.Array (some) +import Data.Array as Array import Data.Either (Either(..)) import Data.List (List(..), fromFoldable, many) import Data.List.NonEmpty (cons, cons') @@ -21,7 +22,7 @@ import Text.Parsing.Parser.Combinators (between, chainl, endBy1, optionMaybe, se import Text.Parsing.Parser.Expr (Assoc(..), Operator(..), buildExprParser) import Text.Parsing.Parser.Language (haskellDef, haskellStyle, javaStyle) import Text.Parsing.Parser.Pos (Position(..), initialPos) -import Text.Parsing.Parser.String (anyChar, anyCodePoint, char, eof, satisfy, string, whiteSpace) +import Text.Parsing.Parser.String (anyChar, anyCodePoint, char, eof, noneOfCodePoints, oneOfCodePoints, satisfy, string, whiteSpace) import Text.Parsing.Parser.Token (TokenParser, letter, makeTokenParser, match, token, when) parens :: forall m a. Monad m => ParserT String m a -> ParserT String m a @@ -465,6 +466,11 @@ main = do sixteenth <- string "𝅘𝅥𝅯" <|> (singleton <$> char 'x') pure $ [ SCP.singleton quarter, eighth, letterx, sixteenth ] + parseTest "🤔💯✅🤔💯" [ "🤔💯", "✅🤔💯" ] do + none <- Array.many $ noneOfCodePoints $ SCP.toCodePointArray "❓✅" + one <- Array.many $ oneOfCodePoints $ SCP.toCodePointArray "🤔💯✅" + pure $ SCP.fromCodePointArray <$> [ none, one ] + parseTest "aa bb" [ "aa", " ", "bb" ] do aa <- SCU.fromCharArray <$> some letter w <- whiteSpace From eb8ce95f0cd025ead6eefc6587f1ef1ecd185dca Mon Sep 17 00:00:00 2001 From: Fyodor Soikin Date: Wed, 5 Jan 2022 11:44:08 -0500 Subject: [PATCH 2/4] Changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e53ebca..053707f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ Breaking changes: New features: +- Added primitive parsers `oneOfCodePoints` and `noneOfCodePoints` - `CodePoint` + versions of `oneOf` and `noneOf` respectively. (#127 by @fsoikin) + Bugfixes: Other improvements: From f105646d95bfb0101947dfa3bc305815a26fef62 Mon Sep 17 00:00:00 2001 From: Fyodor Soikin Date: Wed, 5 Jan 2022 11:56:04 -0500 Subject: [PATCH 3/4] Better error messages --- src/Text/Parsing/Parser/String.purs | 8 +++++--- test/Main.purs | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index 9f0b28d..4736cbd 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -33,13 +33,15 @@ module Text.Parsing.Parser.String import Prelude hiding (between) +import Control.Alt ((<|>)) +import Control.Lazy (defer) import Control.Monad.State (get, put) import Data.Array (notElem) import Data.Char (fromCharCode) import Data.CodePoint.Unicode (isSpace) import Data.Foldable (elem) import Data.Maybe (Maybe(..)) -import Data.String (CodePoint, Pattern(..), null, stripPrefix, uncons) +import Data.String (CodePoint, Pattern(..), null, singleton, stripPrefix, uncons) import Data.String.CodeUnits as SCU import Data.Tuple (Tuple(..), fst) import Text.Parsing.Parser (ParseState(..), ParserT, fail) @@ -125,11 +127,11 @@ noneOf ss = satisfy (flip notElem ss) ("none of " <> show ss) -- | Match one of the Unicode characters in the array. oneOfCodePoints :: forall m. Monad m => Array CodePoint -> ParserT String m CodePoint -oneOfCodePoints ss = satisfyCodePoint (flip elem ss) ("one of " <> show ss) +oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <|> defer \_ -> fail ("Expected one of " <> show (singleton <$> ss)) -- | Match any Unicode character not in the array. noneOfCodePoints :: forall m. Monad m => Array CodePoint -> ParserT String m CodePoint -noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) ("none of " <> show ss) +noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <|> defer \_ -> fail ("Expected none of " <> show (singleton <$> ss)) -- | Updates a `Position` by adding the columns and lines in `String`. updatePosString :: Position -> String -> Position diff --git a/test/Main.purs b/test/Main.purs index d5cf642..c9276ca 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -17,7 +17,7 @@ import Data.Tuple (Tuple(..)) import Effect (Effect) import Effect.Console (logShow) import Test.Assert (assert') -import Text.Parsing.Parser (ParseError(..), Parser, ParserT, parseErrorPosition, region, runParser) +import Text.Parsing.Parser (ParseError(..), Parser, ParserT, parseErrorMessage, parseErrorPosition, region, runParser) import Text.Parsing.Parser.Combinators (between, chainl, endBy1, optionMaybe, sepBy1, try) import Text.Parsing.Parser.Expr (Assoc(..), Operator(..), buildExprParser) import Text.Parsing.Parser.Language (haskellDef, haskellStyle, javaStyle) @@ -50,6 +50,14 @@ parseErrorTestPosition p input expected = case runParser input p of assert' ("expected: " <> show expected <> ", pos: " <> show pos) (expected == pos) logShow expected +parseErrorTestMessage :: forall s a. Show a => Parser s a -> s -> String -> Effect Unit +parseErrorTestMessage p input expected = case runParser input p of + Right x -> assert' ("ParseError expected '" <> expected <> "' but parsed " <> show x) false + Left err -> do + let msg = parseErrorMessage err + assert' ("expected: " <> expected <> ", message: " <> msg) (expected == msg) + logShow expected + opTest :: Parser String String opTest = chainl (singleton <$> anyChar) (char '+' $> append) "" @@ -471,6 +479,16 @@ main = do one <- Array.many $ oneOfCodePoints $ SCP.toCodePointArray "🤔💯✅" pure $ SCP.fromCodePointArray <$> [ none, one ] + parseErrorTestMessage + (noneOfCodePoints $ SCP.toCodePointArray "❓✅") + "❓" + "Expected none of [\"❓\",\"✅\"]" + + parseErrorTestMessage + (oneOfCodePoints $ SCP.toCodePointArray "❓✅") + "abc" + "Expected one of [\"❓\",\"✅\"]" + parseTest "aa bb" [ "aa", " ", "bb" ] do aa <- SCU.fromCharArray <$> some letter w <- whiteSpace From 1be0aa13c3c470916d4468074bfcb8bfeb37c3c3 Mon Sep 17 00:00:00 2001 From: Fyodor Soikin Date: Wed, 5 Jan 2022 11:57:06 -0500 Subject: [PATCH 4/4] Parens --- src/Text/Parsing/Parser/String.purs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index 4736cbd..b12f476 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -127,11 +127,11 @@ noneOf ss = satisfy (flip notElem ss) ("none of " <> show ss) -- | Match one of the Unicode characters in the array. oneOfCodePoints :: forall m. Monad m => Array CodePoint -> ParserT String m CodePoint -oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <|> defer \_ -> fail ("Expected one of " <> show (singleton <$> ss)) +oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <|> defer \_ -> fail $ "Expected one of " <> show (singleton <$> ss) -- | Match any Unicode character not in the array. noneOfCodePoints :: forall m. Monad m => Array CodePoint -> ParserT String m CodePoint -noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <|> defer \_ -> fail ("Expected none of " <> show (singleton <$> ss)) +noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <|> defer \_ -> fail $ "Expected none of " <> show (singleton <$> ss) -- | Updates a `Position` by adding the columns and lines in `String`. updatePosString :: Position -> String -> Position