diff --git a/CHANGELOG.md b/CHANGELOG.md index c30bae1..15dbec0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ New features: - `Parser.Combinators.many1Till_` (#143 by @jamesbrock) - `Parser.Combinators.manyTillRec_` (#143 by @jamesbrock) - `Parser.Combinators.many1TillRec_` (#143 by @jamesbrock) +- `Parser.String.Basic.number` (#142 by @jamesbrock) +- `Parser.String.Basic.intDecimal` (#142 by @jamesbrock) Bugfixes: @@ -23,8 +25,11 @@ Bugfixes: Other improvements: +- Moved the `Parser.Token` parsers `digit`, `hexDigit`, `octDigit`, `upper`, + `space`, `letter`, `alphaNum` into the new module `Parser.String.Basic`. (#142 by @jamesdbrock) - Documentation. (#140 by @jamesdbrock) - Documentation. (#143 by @jamesdbrock) +- Documentation. (#142 by @jamesdbrock) ## [v8.1.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v8.1.0) - 2022-01-10 diff --git a/README.md b/README.md index 9293e59..ef52ec5 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ and then the parser will succeed and return `Right true`. ### More parsers -There are other `String` parsers in the module `Text.Parsing.Parser.Token`, for example the parser `letter :: Parser String Char` which will accept any single alphabetic letter. +There are other `String` parsers in the module `Text.Parsing.Parser.String.Basic`, for example the parser `letter :: Parser String Char` which will accept any single alphabetic letter. ### Parser combinators diff --git a/bench/Main.purs b/bench/Main.purs index c96af76..f741b87 100644 --- a/bench/Main.purs +++ b/bench/Main.purs @@ -55,7 +55,7 @@ import Effect.Unsafe (unsafePerformEffect) import Performance.Minibench (benchWith) import Text.Parsing.Parser (Parser, runParser) import Text.Parsing.Parser.String (string) -import Text.Parsing.Parser.Token (digit) +import Text.Parsing.Parser.String.Basic (digit) import Text.Parsing.StringParser as StringParser import Text.Parsing.StringParser.CodePoints as StringParser.CodePoints import Text.Parsing.StringParser.CodeUnits as StringParser.CodeUnits diff --git a/spago.dhall b/spago.dhall index 7958795..c1ee314 100644 --- a/spago.dhall +++ b/spago.dhall @@ -12,6 +12,7 @@ , "math" , "maybe" , "newtype" + , "numbers" , "prelude" , "strings" , "tailrec" diff --git a/src/Text/Parsing/Parser/Expr.purs b/src/Text/Parsing/Parser/Expr.purs index 22e886f..60f383a 100644 --- a/src/Text/Parsing/Parser/Expr.purs +++ b/src/Text/Parsing/Parser/Expr.purs @@ -1,3 +1,7 @@ +-- | This module is a port of the Haskell +-- | [__Text.Parsec.Expr__](https://hackage.haskell.org/package/docs/Text-Parsec-Expr.html) +-- | module. + module Text.Parsing.Parser.Expr ( Assoc(..) , Operator(..) diff --git a/src/Text/Parsing/Parser/Indent.purs b/src/Text/Parsing/Parser/Indent.purs index d14742c..fc54c03 100644 --- a/src/Text/Parsing/Parser/Indent.purs +++ b/src/Text/Parsing/Parser/Indent.purs @@ -1,5 +1,7 @@ --- | This is purescript-port of Text.Parsing.Indent --- | , 05.07.2016. +-- | This module is a port of the Haskell +-- | [__Text.Parsec.Indent__](https://hackage.haskell.org/package/indents-0.3.3/docs/Text-Parsec-Indent.html) +-- | module from 2016-05-07. +-- | -- | A module to construct indentation aware parsers. Many programming -- | language have indentation based syntax rules e.g. python and Haskell. -- | This module exports combinators to create such parsers. diff --git a/src/Text/Parsing/Parser/Language.purs b/src/Text/Parsing/Parser/Language.purs index 6f74011..151684a 100644 --- a/src/Text/Parsing/Parser/Language.purs +++ b/src/Text/Parsing/Parser/Language.purs @@ -1,3 +1,6 @@ +-- | This module is a port of the Haskell +-- | [__Text.Parsec.Language__](https://hackage.haskell.org/package/parsec/docs/Text-Parsec-Language.html) +-- | module. module Text.Parsing.Parser.Language ( haskellDef , haskell @@ -11,7 +14,8 @@ import Prelude import Control.Alt ((<|>)) import Text.Parsing.Parser (ParserT) import Text.Parsing.Parser.String (char, oneOf) -import Text.Parsing.Parser.Token (GenLanguageDef(..), LanguageDef, TokenParser, alphaNum, letter, makeTokenParser, unGenLanguageDef) +import Text.Parsing.Parser.String.Basic (alphaNum, letter) +import Text.Parsing.Parser.Token (GenLanguageDef(..), LanguageDef, TokenParser, makeTokenParser, unGenLanguageDef) ----------------------------------------------------------- -- Styles: haskellStyle, javaStyle diff --git a/src/Text/Parsing/Parser/String/Basic.purs b/src/Text/Parsing/Parser/String/Basic.purs new file mode 100644 index 0000000..1a8f2ac --- /dev/null +++ b/src/Text/Parsing/Parser/String/Basic.purs @@ -0,0 +1,119 @@ +-- | Basic `String` parsers derived from primitive `String` parsers. +-- | +-- | Note: In the future, the +-- | __noneOf__, __noneOfCodePoints__, __oneOf__, __oneOfCodePoints__, __skipSpaces__, __whiteSpace__ +-- | should be moved into this module and removed from the +-- | __Text.Parsing.Parser.String__ module, because they are not primitive parsers. +module Text.Parsing.Parser.String.Basic + ( digit + , hexDigit + , octDigit + , letter + , space + , upper + , alphaNum + , intDecimal + , number + , module Text.Parsing.Parser.String + ) where + +import Prelude + +import Data.CodePoint.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isOctDigit, isSpace, isUpper) +import Data.Int as Data.Int +import Data.Maybe (Maybe(..)) +import Data.Number (infinity, nan) +import Data.Number as Data.Number +import Data.String (CodePoint) +import Data.String.CodePoints (codePointFromChar) +import Data.Tuple (Tuple(..)) +import Text.Parsing.Parser (ParserT, fail) +import Text.Parsing.Parser.Combinators (choice, skipMany, ()) +import Text.Parsing.Parser.String (noneOf, noneOfCodePoints, oneOf, oneOfCodePoints, skipSpaces, whiteSpace) +import Text.Parsing.Parser.String as Parser.String + +-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`. +digit :: forall m. Monad m => ParserT String m Char +digit = satisfyCP isDecDigit "digit" + +-- | Parse a hex digit. Matches any char that satisfies `Data.CodePoint.Unicode.isHexDigit`. +hexDigit :: forall m. Monad m => ParserT String m Char +hexDigit = satisfyCP isHexDigit "hex digit" + +-- | Parse an octal digit. Matches any char that satisfies `Data.CodePoint.Unicode.isOctDigit`. +octDigit :: forall m. Monad m => ParserT String m Char +octDigit = satisfyCP isOctDigit "oct digit" + +-- | Parse an uppercase letter. Matches any char that satisfies `Data.CodePoint.Unicode.isUpper`. +upper :: forall m. Monad m => ParserT String m Char +upper = satisfyCP isUpper "uppercase letter" + +-- | Parse a space character. Matches any char that satisfies `Data.CodePoint.Unicode.isSpace`. +space :: forall m. Monad m => ParserT String m Char +space = satisfyCP isSpace "space" + +-- | Parse an alphabetical character. Matches any char that satisfies `Data.CodePoint.Unicode.isAlpha`. +letter :: forall m. Monad m => ParserT String m Char +letter = satisfyCP isAlpha "letter" + +-- | Parse an alphabetical or numerical character. +-- | Matches any char that satisfies `Data.CodePoint.Unicode.isAlphaNum`. +alphaNum :: forall m. Monad m => ParserT String m Char +alphaNum = satisfyCP isAlphaNum "letter or digit" + +-- | Parser based on the __Data.Number.fromString__ function. +-- | +-- | This should be the inverse of `show :: String -> Number`. +-- | +-- | Examples of strings which can be parsed by this parser: +-- | * `"3"` +-- | * `"3.0"` +-- | * `"0.3"` +-- | * `"-0.3"` +-- | * `"+0.3"` +-- | * `"-3e-1"` +-- | * `"-3.0E-1.0"` +-- | * `"NaN"` +-- | * `"-Infinity"` +number :: forall m. Monad m => ParserT String m Number +-- TODO because the JavaScript parseFloat function will successfully parse +-- a Number up until it doesn't understand something and then return +-- the partially parsed Number, this parser will sometimes consume more +-- String that it actually parses. Example "1..3" will parse as 1.0. +-- So this needs improvement. +number = + choice + [ Parser.String.string "Infinity" *> pure infinity + , Parser.String.string "+Infinity" *> pure infinity + , Parser.String.string "-Infinity" *> pure (negate infinity) + , Parser.String.string "NaN" *> pure nan + , do + Tuple section _ <- Parser.String.match do + _ <- Parser.String.oneOf [ '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] + skipMany $ Parser.String.oneOf [ 'e', 'E', '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] + -- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat + case Data.Number.fromString section of + Nothing -> fail $ "Could not parse Number " <> section + Just x -> pure x + ] + +-- | Parser based on the __Data.Int.fromString__ function. +-- | +-- | This should be the inverse of `show :: String -> Int`. +-- | +-- | Examples of strings which can be parsed by this parser: +-- | * `"3"` +-- | * `"-3"` +-- | * `"+300"` +intDecimal :: forall m. Monad m => ParserT String m Int +intDecimal = do + Tuple section _ <- Parser.String.match do + _ <- Parser.String.oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] + skipMany $ Parser.String.oneOf [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] + case Data.Int.fromString section of + Nothing -> fail $ "Could not parse Int " <> section + Just x -> pure x + +-- | Helper function +satisfyCP :: forall m. Monad m => (CodePoint -> Boolean) -> ParserT String m Char +satisfyCP p = Parser.String.satisfy (p <<< codePointFromChar) diff --git a/src/Text/Parsing/Parser/Token.purs b/src/Text/Parsing/Parser/Token.purs index 3241817..b1a1320 100644 --- a/src/Text/Parsing/Parser/Token.purs +++ b/src/Text/Parsing/Parser/Token.purs @@ -1,4 +1,8 @@ -- | Functions for working with streams of tokens. +-- | +-- | This module is a port of the Haskell +-- | [__Text.Parsec.Token__](https://hackage.haskell.org/package/docs/Text-Parsec-Token.html) +-- | module. module Text.Parsing.Parser.Token ( token @@ -11,14 +15,7 @@ module Text.Parsing.Parser.Token , TokenParser , GenTokenParser , makeTokenParser - -- should these be exported? Maybe they should go in a different module? - , digit - , hexDigit - , octDigit - , upper - , space - , letter - , alphaNum + , module Text.Parsing.Parser.String.Basic ) where import Prelude hiding (between, when) @@ -28,7 +25,7 @@ import Control.Monad.State (get, gets, modify_) import Control.MonadPlus (guard, (<|>)) import Data.Array as Array import Data.Char (fromCharCode, toCharCode) -import Data.CodePoint.Unicode (hexDigitToInt, isAlpha, isAlphaNum, isDecDigit, isHexDigit, isOctDigit, isSpace, isUpper) +import Data.CodePoint.Unicode (hexDigitToInt, isAlpha, isSpace) import Data.Either (Either(..)) import Data.Foldable (foldl, foldr) import Data.Identity (Identity) @@ -37,7 +34,7 @@ import Data.List (List(..)) import Data.List as List import Data.List.NonEmpty (NonEmptyList) import Data.Maybe (Maybe(..), maybe) -import Data.String (CodePoint, null, toLower) +import Data.String (null, toLower) import Data.String.CodePoints (codePointFromChar) import Data.String.CodeUnits (singleton, toChar) as CodeUnits import Data.String.CodeUnits as SCU @@ -48,6 +45,8 @@ import Text.Parsing.Parser (ParseState(..), ParserT, consume, fail) import Text.Parsing.Parser.Combinators (between, choice, notFollowedBy, option, sepBy, sepBy1, skipMany, skipMany1, try, tryRethrow, (), ()) import Text.Parsing.Parser.Pos (Position) import Text.Parsing.Parser.String (char, noneOf, oneOf, satisfy, satisfyCodePoint, string) +import Text.Parsing.Parser.String.Basic as Basic +import Text.Parsing.Parser.String.Basic (digit, hexDigit, octDigit, upper, space, letter, alphaNum) -- | A parser which returns the first token in the stream. token :: forall m a. Monad m => (a -> Position) -> ParserT (List a) m a @@ -475,7 +474,7 @@ makeTokenParser (LanguageDef languageDef) = escapeEmpty = char '&' escapeGap :: ParserT String m Char - escapeGap = Array.some space *> char '\\' "end of string gap" + escapeGap = Array.some Basic.space *> char '\\' "end of string gap" -- -- escape codes escapeCode :: ParserT String m Char @@ -485,7 +484,7 @@ makeTokenParser (LanguageDef languageDef) = charControl :: ParserT String m Char charControl = do _ <- char '^' - code <- upper + code <- Basic.upper case fromCharCode (toCharCode code - toCharCode 'A' + 1) of Just c -> pure c Nothing -> fail "invalid character code (should not happen)" @@ -493,8 +492,8 @@ makeTokenParser (LanguageDef languageDef) = charNum :: ParserT String m Char charNum = do code <- decimal - <|> (char 'o' *> number 8 octDigit) - <|> (char 'x' *> number 16 hexDigit) + <|> (char 'o' *> number 8 Basic.octDigit) + <|> (char 'x' *> number 16 Basic.hexDigit) if code > 0x10FFFF then fail "invalid escape sequence" else case fromCharCode code of Just c -> pure c @@ -646,7 +645,7 @@ makeTokenParser (LanguageDef languageDef) = fraction :: ParserT String m Number fraction = "fraction" do _ <- char '.' - digits <- Array.some digit "fraction" + digits <- Array.some Basic.digit "fraction" maybe (fail "not digit") pure $ foldr op (Just 0.0) digits where op :: Char -> Maybe Number -> Maybe Number @@ -688,13 +687,13 @@ makeTokenParser (LanguageDef languageDef) = (hexadecimal <|> octal <|> decimal <|> pure 0) "" decimal :: ParserT String m Int - decimal = number 10 digit + decimal = number 10 Basic.digit hexadecimal :: ParserT String m Int - hexadecimal = oneOf [ 'x', 'X' ] *> number 16 hexDigit + hexadecimal = oneOf [ 'x', 'X' ] *> number 16 Basic.hexDigit octal :: ParserT String m Int - octal = oneOf [ 'o', 'O' ] *> number 8 octDigit + octal = oneOf [ 'o', 'O' ] *> number 8 Basic.octDigit number :: Int -> ParserT String m Char -> ParserT String m Int number base baseDigit = do @@ -878,38 +877,3 @@ inCommentSingle (LanguageDef languageDef) = startEnd :: Array Char startEnd = SCU.toCharArray languageDef.commentEnd <> SCU.toCharArray languageDef.commentStart -------------------------------------------------------------------------- --- Helper functions that should maybe go in Text.Parsing.Parser.String -- -------------------------------------------------------------------------- - -satisfyCP :: forall m. Monad m => (CodePoint -> Boolean) -> ParserT String m Char -satisfyCP p = satisfy (p <<< codePointFromChar) - --- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`. -digit :: forall m. Monad m => ParserT String m Char -digit = satisfyCP isDecDigit "digit" - --- | Parse a hex digit. Matches any char that satisfies `Data.CodePoint.Unicode.isHexDigit`. -hexDigit :: forall m. Monad m => ParserT String m Char -hexDigit = satisfyCP isHexDigit "hex digit" - --- | Parse an octal digit. Matches any char that satisfies `Data.CodePoint.Unicode.isOctDigit`. -octDigit :: forall m. Monad m => ParserT String m Char -octDigit = satisfyCP isOctDigit "oct digit" - --- | Parse an uppercase letter. Matches any char that satisfies `Data.CodePoint.Unicode.isUpper`. -upper :: forall m. Monad m => ParserT String m Char -upper = satisfyCP isUpper "uppercase letter" - --- | Parse a space character. Matches any char that satisfies `Data.CodePoint.Unicode.isSpace`. -space :: forall m. Monad m => ParserT String m Char -space = satisfyCP isSpace "space" - --- | Parse an alphabetical character. Matches any char that satisfies `Data.CodePoint.Unicode.isAlpha`. -letter :: forall m. Monad m => ParserT String m Char -letter = satisfyCP isAlpha "letter" - --- | Parse an alphabetical or numerical character. --- | Matches any char that satisfies `Data.CodePoint.Unicode.isAlphaNum`. -alphaNum :: forall m. Monad m => ParserT String m Char -alphaNum = satisfyCP isAlphaNum "letter or digit" diff --git a/test/Main.purs b/test/Main.purs index 5aa2cbe..db30c75 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -11,6 +11,7 @@ import Data.List (List(..), fromFoldable, many) import Data.List.NonEmpty (cons, cons') import Data.List.NonEmpty as NE import Data.Maybe (Maybe(..), fromJust) +import Data.Number (infinity, isNaN) import Data.String.CodePoints as SCP import Data.String.CodeUnits (fromCharArray, singleton) import Data.String.CodeUnits as SCU @@ -25,7 +26,8 @@ import Text.Parsing.Parser.Expr (Assoc(..), Operator(..), buildExprParser) import Text.Parsing.Parser.Language (haskellDef, haskellStyle, javaStyle) import Text.Parsing.Parser.Pos (Position(..), initialPos) import Text.Parsing.Parser.String (anyChar, anyCodePoint, char, eof, noneOfCodePoints, oneOfCodePoints, rest, satisfy, string, takeN, whiteSpace) -import Text.Parsing.Parser.Token (TokenParser, letter, makeTokenParser, match, token, when) +import Text.Parsing.Parser.String.Basic (intDecimal, number, letter) +import Text.Parsing.Parser.Token (TokenParser, makeTokenParser, match, token, when) import Text.Parsing.Parser.Token as Parser.Token parens :: forall m a. Monad m => ParserT String m a -> ParserT String m a @@ -655,6 +657,32 @@ main = do parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 }) parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { column: 10, line: 1 }) + parseTest "Infinity" infinity number + parseTest "+Infinity" infinity number + parseTest "-Infinity" (negate infinity) number + parseErrorTestPosition number "+xxx" (mkPos 2) + + parseTest "-3.0E-1.0" (-0.3) number + + -- test from issue #73 + parseTest "0.7531531167929774" 0.7531531167929774 number + + -- test from issue #115 + parseTest "-6.0" (-6.0) number + parseTest "+6.0" (6.0) number + + -- we can't test "NaN" with `parseTest` because nan doesn't compare equal + case runParser "NaN" number of + Right actual -> do + assert' ("expected: NaN, actual: " <> show actual) (isNaN actual) + logShow actual + Left err -> assert' ("error: " <> show err) false + + -- TODO This shows the current limitations of the number parser. Ideally this parse should fail. + parseTest "1..3" 1.0 $ number <* eof + + parseTest "-300" (-300) intDecimal + stackSafeLoopsTest tokenParserIdentifierTest