Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Breaking changes:
- Fix semantics of endBy and sepEndBy parser combinators (#84 by @chtenb)

New features:
- Introduce code point parsers `anyCodePoint`, `codePoint'` and `satisfyCodePoint` (#88 by @chtenb)

Bugfixes:
- Do not export `chainl'` and `chainr'` helper functions (#84 by @chtenb)
Expand Down
1 change: 1 addition & 0 deletions spago.dhall
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
, "maybe"
, "minibench"
, "nonempty"
, "partial"
, "prelude"
, "psci-support"
, "strings"
Expand Down
37 changes: 29 additions & 8 deletions src/Text/Parsing/StringParser/CodePoints.purs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
module Text.Parsing.StringParser.CodePoints
( eof
, anyChar
, anyCodePoint
, anyDigit
, string
, satisfy
, satisfyCodePoint
, char
, codePoint
, whiteSpace
, skipSpaces
, oneOf
Expand All @@ -31,13 +34,14 @@ import Data.Either (Either(..))
import Data.Enum (fromEnum)
import Data.Foldable (class Foldable, foldMap, elem, notElem)
import Data.Maybe (Maybe(..))
import Data.String (CodePoint)
import Data.String.CodePoints as SCP
import Data.String.CodeUnits as SCU
import Data.String.Regex as Regex
import Data.String.Regex.Flags (noFlags)
import Text.Parsing.StringParser (Parser(..), try, fail)
import Text.Parsing.StringParser.Combinators (many, (<?>))
import Text.Parsing.StringParser.CodeUnits as CodeUnitsParser
import Text.Parsing.StringParser.Combinators (many, (<?>))

-- | Match the end of the file.
eof :: Parser Unit
Expand All @@ -46,17 +50,23 @@ eof = Parser \s ->
{ substring, position } | 0 < SCP.length substring -> Left { pos: position, error: "Expected EOF" }
_ -> Right { result: unit, suffix: s }

-- | Match any character.
-- | Match any character from the Basic Multilingual Plane.
anyChar :: Parser Char
anyChar = Parser \{ substring, position } ->
case SCP.codePointAt 0 substring of
Just cp -> case toChar cp of
Just chr -> Right { result: chr, suffix: { substring: SCP.drop 1 substring, position: position + 1 } }
Nothing -> Left { pos: position, error: "CodePoint " <> show cp <> " is not a character" }
Nothing -> Left { pos: position, error: "Unexpected EOF" }
anyChar = do
cp <- anyCodePoint
case toChar cp of
Just chr -> pure chr
Nothing -> fail $ "Code point " <> show cp <> " is not a character"
where
toChar = fromCharCode <<< fromEnum
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed and uncommented the test I added for that earlier :)


-- | Match any code point.
anyCodePoint :: Parser CodePoint
anyCodePoint = Parser \{ substring, position } ->
case SCP.codePointAt 0 substring of
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's better indeed

Just cp -> Right { result: cp, suffix: { substring: SCP.drop 1 substring, position: position + 1 } }
Nothing -> Left { pos: position, error: "Unexpected EOF" }

-- | Match any digit.
anyDigit :: Parser Char
anyDigit = try do
Expand All @@ -81,10 +91,21 @@ satisfy f = try do
if f c then pure c
else fail $ "Character " <> show c <> " did not satisfy predicate"

-- | Match a code point satisfying the given predicate.
satisfyCodePoint :: (CodePoint -> Boolean) -> Parser CodePoint
satisfyCodePoint f = try do
cp <- anyCodePoint
if f cp then pure cp
else fail $ "Code point " <> show cp <> " did not satisfy predicate"

-- | Match the specified character.
char :: Char -> Parser Char
char c = satisfy (_ == c) <?> "Could not match character " <> show c

-- | Match the specified code point.
codePoint :: CodePoint -> Parser CodePoint
codePoint c = satisfyCodePoint (_ == c) <?> "Could not match code point " <> show c

-- | Match many whitespace characters.
whiteSpace :: Parser String
whiteSpace = do
Expand Down
20 changes: 17 additions & 3 deletions test/BasicSpecs.purs
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,44 @@ module Test.BasicSpecs where

import Prelude hiding (between)

import Test.Utils (AnyParser(..), mkAnyParser)
import Control.Alt ((<|>))
import Control.Monad.Writer (Writer, execWriter, tell)
import Data.Either (isRight)
import Data.List (List)
import Data.List as List
import Data.Maybe (fromJust)
import Data.String (CodePoint, codePointAt)
import Data.Traversable (traverse)
import Effect (Effect)
import Effect.Class.Console (log)
import Partial.Unsafe (unsafePartial)
import Test.Assert (assert')
import Test.Utils (AnyParser(..), mkAnyParser)
import Text.Parsing.StringParser (Parser, runParser, try)
import Text.Parsing.StringParser.CodePoints (anyChar, anyDigit, anyLetter, char, eof, skipSpaces, string)
import Text.Parsing.StringParser.CodePoints (anyChar, anyCodePoint, anyDigit, anyLetter, char, codePoint, eof, skipSpaces, string)
import Text.Parsing.StringParser.Combinators (between, chainl, chainl1, endBy, endBy1, lookAhead, many, many1, manyTill, sepBy, sepBy1, sepEndBy, sepEndBy1)

type TestInputs = { successes :: Array String, failures :: Array String }
type TestCase = { name :: String, parser :: AnyParser, inputs :: TestInputs }

codePointLiteral :: String -> CodePoint
codePointLiteral s = unsafePartial $ fromJust $ codePointAt 0 s

testCases :: Array TestCase
testCases =
[ { name: "anyChar"
, parser: mkAnyParser anyChar
-- TODO: test "🙂" which should fail
-- this is an open upstream issue https://github.com/purescript/purescript-strings/issues/153
, inputs: { successes: [ "a", "%" ], failures: [ "" ] }
, inputs: { successes: [ "a", "%" ], failures: [ "", "aa" ] }
}
, { name: "anyCodePoint"
, parser: mkAnyParser anyCodePoint
, inputs: { successes: [ "a", "%", "🙂" ], failures: [ "", "aa" ] }
}
, { name: "codePoint"
, parser: mkAnyParser $ codePoint $ codePointLiteral "🙂"
, inputs: { successes: [ "🙂" ], failures: [ "", "a", "aa" ] }
}
, { name: "anyLetter"
, parser: mkAnyParser anyLetter
Expand Down