diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index 56179bf..44d93fd 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -17,6 +17,9 @@ jobs: steps: - name: Checkout uses: actions/checkout@v2 + - name: Set Swift version + if: matrix.os == 'ubuntu-18.04' + run: echo "5.3-DEVELOPMENT-SNAPSHOT-2020-08-08-a" > .swift-version - name: Install Swift uses: YOCKOW/Action-setup-swift@master - name: Build diff --git a/.swift-version b/.swift-version index 73ce950..d346e2a 100644 --- a/.swift-version +++ b/.swift-version @@ -1 +1 @@ -5.2.4 +5.3 diff --git a/.swiftpm/xcode/xcshareddata/xcbaselines/PerformanceTests.xcbaseline/1868159C-3A7D-4BC9-A194-7BBFC5CE3264.plist b/.swiftpm/xcode/xcshareddata/xcbaselines/PerformanceTests.xcbaseline/1868159C-3A7D-4BC9-A194-7BBFC5CE3264.plist index d60639b..025da2e 100644 --- a/.swiftpm/xcode/xcshareddata/xcbaselines/PerformanceTests.xcbaseline/1868159C-3A7D-4BC9-A194-7BBFC5CE3264.plist +++ b/.swiftpm/xcode/xcshareddata/xcbaselines/PerformanceTests.xcbaseline/1868159C-3A7D-4BC9-A194-7BBFC5CE3264.plist @@ -65,6 +65,13 @@ baselineIntegrationDisplayName 7 May 2020 at 21:25:02 + com.apple.dt.XCTMetric_CPU.instructions_retired + + baselineAverage + 3.9872e+06 + baselineIntegrationDisplayName + Local Baseline + testOneOrMore() diff --git a/Sources/Patterns/Atomic Patterns/Line.swift b/Sources/Patterns/Atomic Patterns/Line.swift index 02a4e8e..fd04c89 100644 --- a/Sources/Patterns/Atomic Patterns/Line.swift +++ b/Sources/Patterns/Atomic Patterns/Line.swift @@ -5,17 +5,46 @@ // Created by Kåre Morstøl on 25/05/2020. // +public protocol CharacterLike: Hashable { + var isNewline: Bool { get } +} + +extension Character: CharacterLike {} +extension String.UTF8View.Element: CharacterLike { + @inlinable + public var isNewline: Bool { + // “\n” (U+000A): LINE FEED (LF), U+000B: LINE TABULATION (VT), U+000C: FORM FEED (FF), “\r” (U+000D): CARRIAGE RETURN (CR) + self < 14 && self > 9 + } +} + +// U+0085: NEXT LINE (NEL), U+2028: LINE SEPARATOR, U+2029: PARAGRAPH SEPARATOR +@usableFromInline +let newlines = Set([0x000A as UInt16, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029].map { Unicode.Scalar($0)! }) + +extension String.UnicodeScalarView.Element: CharacterLike { + @inlinable + public var isNewline: Bool { + newlines.contains(self) + } +} + +extension String.UTF16View.Element: CharacterLike { + @inlinable + public var isNewline: Bool { + Unicode.Scalar(self).map(newlines.contains(_:)) ?? false + } +} + /// Matches one line, not including newline characters. -public struct Line: Pattern { +public struct Line: Pattern + where Input.Element: CharacterLike, Input.Index == String.Index { public init() {} public var description: String { "Line()" } - public static let start = Start() - public static let end = End() - @inlinable - public func createInstructions(_ instructions: inout Instructions) throws { + public func createInstructions(_ instructions: inout ContiguousArray>) throws { try (Start() • Skip() • End()).createInstructions(&instructions) } @@ -31,7 +60,7 @@ public struct Line: Pattern { } @inlinable - public func createInstructions(_ instructions: inout Instructions) { + public func createInstructions(_ instructions: inout ContiguousArray>) { instructions.append(.checkIndex(self.parse(_:at:))) } } @@ -48,8 +77,13 @@ public struct Line: Pattern { } @inlinable - public func createInstructions(_ instructions: inout Instructions) { + public func createInstructions(_ instructions: inout ContiguousArray>) { instructions.append(.checkIndex(self.parse(_:at:))) } } } + +extension Line where Input == String { + public static let start = Start() + public static let end = End() +} diff --git a/Sources/Patterns/Atomic Patterns/Literal.swift b/Sources/Patterns/Atomic Patterns/Literal.swift index 0bcaff0..97f659b 100644 --- a/Sources/Patterns/Atomic Patterns/Literal.swift +++ b/Sources/Patterns/Atomic Patterns/Literal.swift @@ -10,34 +10,75 @@ import Foundation /// Matches a sequence of elements. /// /// If empty, it will always succeed without consuming any input. -public struct Literal: Pattern { +public struct Literal: Pattern where Input.Element: Hashable { public let elements: Input public var description: String { - #""\#(String(elements).replacingOccurrences(of: "\n", with: "\\n"))""# + #""\#(String(describing: elements).replacingOccurrences(of: "\n", with: "\\n"))""# + } + + @inlinable + public init(_ input: Input) { + elements = input } /// Matches `sequence`. @inlinable - public init(_ sequence: S) where S.Element == Pattern.Input.Element { - self.elements = Pattern.Input(sequence) + public init(_ sequence: S) where S.Element == Input.Element, Input == String { + self.elements = Input(sequence) + } + + @inlinable + public func createInstructions(_ instructions: inout ContiguousArray>) { + instructions.append(contentsOf: elements.map(Instruction.elementEquals)) } +} +extension Literal where Input == String { /// Matches this character. @inlinable public init(_ character: Character) { self.init(String(character)) } +} + +// MARK: Create from string literal. +extension Literal: ExpressibleByUnicodeScalarLiteral where Input: LosslessStringConvertible { @inlinable - public func createInstructions(_ instructions: inout Instructions) { - instructions.append(contentsOf: elements.map(Instruction.elementEquals)) + public init(unicodeScalarLiteral value: StaticString) { + elements = Input(String(describing: value))! } } -extension Literal: ExpressibleByStringLiteral { +extension Literal: ExpressibleByExtendedGraphemeClusterLiteral where Input: LosslessStringConvertible { + public typealias ExtendedGraphemeClusterLiteralType = StaticString +} + +extension Literal: ExpressibleByStringLiteral where Input: LosslessStringConvertible { @inlinable public init(stringLiteral value: StaticString) { - self.init(String(describing: value)) + elements = Input(String(describing: value))! + } +} + +extension String.UTF8View: LosslessStringConvertible { + @inlinable + public init?(_ description: String) { + self = description.utf8 + } +} + +extension String.UTF16View: LosslessStringConvertible { + @inlinable + public init?(_ description: String) { + self = description.utf16 + } +} + +extension String.UnicodeScalarView: LosslessStringConvertible { + @inlinable + public init?(_ description: String) { + self = description.unicodeScalars } } diff --git a/Sources/Patterns/Atomic Patterns/OneOf.swift b/Sources/Patterns/Atomic Patterns/OneOf.swift index 3bf7ff4..22b09e0 100644 --- a/Sources/Patterns/Atomic Patterns/OneOf.swift +++ b/Sources/Patterns/Atomic Patterns/OneOf.swift @@ -8,7 +8,7 @@ import Foundation /// Matches and consumes a single element. -public struct OneOf: Pattern, RegexConvertible { +public struct OneOf: Pattern /*, RegexConvertible*/ where Input.Element: Hashable & Comparable { @usableFromInline let group: Group public let description: String @@ -36,44 +36,36 @@ public struct OneOf: Pattern, RegexConvertible { self.init(description: description, regex: regex, group: Group(contains: contains)) } + /// Matches any element for which `contains` returns `true`. + /// - Parameters: + /// - description: A descriptive identifier for textual representation of the pattern. + /// - regex: An optional regex matching the same elements. + /// - contains: A closure returning true for any element that matches. + @inlinable + public init(description: String, regex: String? = nil, contains: @escaping (Input.Element) -> Bool) where Input == String { + self.init(description: description, regex: regex, group: Group(contains: contains)) + } + /// Matches any elements in `elements`. /// - Parameter elements: A sequence of elements to match. @inlinable - public init(_ elements: S) where S.Element == Input.Element { + public init(_ elements: Input) { group = Group(contentsOf: elements) - description = #"[\#(String(elements))]"# + description = "[\(String(describing: elements))]" _regex = "[\(NSRegularExpression.escapedPattern(for: elements.map(String.init(describing:)).joined()))]" } /// Matches any elements _not_ in `elements`. /// - Parameter elements: A sequence of elements _not_ to match. @inlinable - public init(not elements: S) where S.Element == Input.Element { + public init(not elements: Input) { group = Group(contentsOf: elements).inverted() - description = #"[^\#(String(elements))]"# + description = "[^\(String(describing: elements))]" _regex = "[^\(NSRegularExpression.escapedPattern(for: elements.map(String.init(describing:)).joined()))]" } - /// Matches any of the provided elements. - @inlinable - public init(_ oneofs: OneOfConvertible...) { - let closures = oneofs.map { $0.contains(_:) } - group = Group(contains: { char in closures.contains(where: { $0(char) }) }) - description = "[\(oneofs.map(String.init(describing:)).joined(separator: ","))]" - _regex = nil - } - - /// Matches anything that is _not_ among the provided elements. @inlinable - public init(not oneofs: OneOfConvertible...) { - let closures = oneofs.map { $0.contains(_:) } - group = Group(contains: { char in !closures.contains(where: { $0(char) }) }) - description = #"[^\#(oneofs.map(String.init(describing:)).joined(separator: ","))]"# - _regex = nil - } - - @inlinable - public func createInstructions(_ instructions: inout Instructions) { + public func createInstructions(_ instructions: inout ContiguousArray>) { instructions.append(.checkElement(group.contains)) } @@ -84,19 +76,37 @@ public struct OneOf: Pattern, RegexConvertible { // MARK: OneOfConvertible +// Allows for e.g. `OneOf("a" ..< "e", "g", uppercase)` and `OneOf(not: "a" ..< "e", "gåopr", uppercase)` + /// A type that `OneOf` can use. public protocol OneOfConvertible { + associatedtype Element: Hashable & Comparable + @inlinable + func contains(_: Element) -> Bool +} + +extension OneOf: OneOfConvertible { @inlinable - func contains(_: Pattern.Input.Element) -> Bool + public func contains(_ char: Input.Element) -> Bool { group.contains(char) } } extension Character: OneOfConvertible { @inlinable - public func contains(_ char: Pattern.Input.Element) -> Bool { char == self } + public func contains(_ char: Character) -> Bool { char == self } } +/* Should have been + extension Collection: OneOfConvertible where Element: Hashable { } + but "Extension of protocol 'Collection' cannot have an inheritance clause". + */ extension String: OneOfConvertible {} extension Substring: OneOfConvertible {} +extension String.UTF8View: OneOfConvertible {} +extension Substring.UTF8View: OneOfConvertible {} +extension String.UTF16View: OneOfConvertible {} +extension Substring.UTF16View: OneOfConvertible {} +extension String.UnicodeScalarView: OneOfConvertible {} +extension Substring.UnicodeScalarView: OneOfConvertible {} @inlinable public func ... (lhs: Character, rhs: Character) -> ClosedRange { @@ -104,7 +114,7 @@ public func ... (lhs: Character, rhs: Character) -> ClosedRange { return ClosedRange(uncheckedBounds: (lower: lhs, upper: rhs)) } -extension ClosedRange: OneOfConvertible where Bound == Character {} +extension ClosedRange: OneOfConvertible where Bound: Hashable {} @inlinable public func ..< (lhs: Character, rhs: Character) -> Range { @@ -112,46 +122,122 @@ public func ..< (lhs: Character, rhs: Character) -> Range { return Range(uncheckedBounds: (lower: lhs, upper: rhs)) } -extension Range: OneOfConvertible where Bound == Character {} +extension Range: OneOfConvertible where Bound: Hashable {} -extension OneOf: OneOfConvertible { +extension OneOf { + /* It will be a glorious day when all this can be replaced by two methods using variadic generics. */ + + @usableFromInline + internal init(closures: [(Input.Element) -> Bool], description: String, isNegated: Bool = false) { + group = Group(contains: isNegated + ? { element in !closures.contains(where: { $0(element) }) } + : { element in closures.contains(where: { $0(element) }) }) + self.description = description + _regex = nil + } + + /// Matches any of the provided elements. + @inlinable + public init(_ o1: O1) + where Input.Element == O1.Element { + let closures = [o1.contains(_:)] + self.init(closures: closures, description: "[\(o1)]") + } + + /// Matches any of the provided elements. + @inlinable + public init(_ o1: O1, _ o2: O2) + where Input.Element == O1.Element, O1.Element == O2.Element { + let closures = [o1.contains(_:), o2.contains(_:)] + self.init(closures: closures, description: "[\(o1), \(o2)]") + } + + /// Matches any of the provided elements. + @inlinable + public init(_ o1: O1, _ o2: O2, _ o3: O3) + where Input.Element == O1.Element, O1.Element == O2.Element, O2.Element == O3.Element { + let closures = [o1.contains(_:), o2.contains(_:), o3.contains(_:)] + self.init(closures: closures, description: "[\(o1), \(o2), \(o3)]") + } + + /// Matches any of the provided elements. + @inlinable + public init + (_ o1: O1, _ o2: O2, _ o3: O3, _ o4: O4) + where Input.Element == O1.Element, O1.Element == O2.Element, O2.Element == O3.Element, O3.Element == O4.Element { + let closures = [o1.contains(_:), o2.contains(_:), o3.contains(_:), o4.contains(_:)] + self.init(closures: closures, description: "[\(o1), \(o2), \(o3), \(o4)]") + } + + // Not + + /// Matches any _but_ the provided elements. + @inlinable + public init(not o1: O1) + where Input.Element == O1.Element { + let closures = [o1.contains(_:)] + self.init(closures: closures, description: "[^\(o1)]", isNegated: true) + } + + /// Matches any _but_ the provided elements. + @inlinable + public init(not o1: O1, _ o2: O2) + where Input.Element == O1.Element, O1.Element == O2.Element { + let closures = [o1.contains(_:), o2.contains(_:)] + self.init(closures: closures, description: "[^\(o1), \(o2)]", isNegated: true) + } + + /// Matches any _but_ the provided elements. @inlinable - public func contains(_ char: Pattern.Input.Element) -> Bool { group.contains(char) } + public init(not o1: O1, _ o2: O2, _ o3: O3) + where Input.Element == O1.Element, O1.Element == O2.Element, O2.Element == O3.Element { + let closures = [o1.contains(_:), o2.contains(_:), o3.contains(_:)] + self.init(closures: closures, description: "[^\(o1), \(o2), \(o3)]", isNegated: true) + } + + /// Matches any of the provided elements. + @inlinable + public init + (not o1: O1, _ o2: O2, _ o3: O3, _ o4: O4) + where Input.Element == O1.Element, O1.Element == O2.Element, O2.Element == O3.Element, O3.Element == O4.Element { + let closures = [o1.contains(_:), o2.contains(_:), o3.contains(_:), o4.contains(_:)] + self.init(closures: closures, description: "[^\(o1), \(o2), \(o3), \(o4)]", isNegated: true) + } } // MARK: Join `&&OneOf • OneOf` into one. @inlinable -public func • (lhs: AndPattern, rhs: OneOf) -> OneOf { +public func • (lhs: AndPattern>, rhs: OneOf) -> OneOf { OneOf(description: "\(lhs) \(rhs)", group: lhs.wrapped.group.intersection(rhs.group)) } @inlinable -public func • (lhs: Concat>, rhs: OneOf) -> Concat { +public func • (lhs: Concat>>, rhs: OneOf) -> Concat> { lhs.first • (lhs.second • rhs) } // MARK: Join `!OneOf • Oneof` into one. @inlinable -public func • (lhs: NotPattern, rhs: OneOf) -> OneOf { +public func • (lhs: NotPattern>, rhs: OneOf) -> OneOf { OneOf(description: "\(lhs) \(rhs)", group: rhs.group.subtracting(lhs.wrapped.group)) } @inlinable -public func • (lhs: Concat>, rhs: OneOf) -> Concat { +public func • (lhs: Concat>>, rhs: OneOf) -> Concat> { lhs.first • (lhs.second • rhs) } // MARK: Join `OneOf / OneOf` into one. @inlinable -public func / (lhs: OneOf, rhs: OneOf) -> OneOf { +public func / (lhs: OneOf, rhs: OneOf) -> OneOf { OneOf(description: "\(lhs) / \(rhs)", group: lhs.group.union(rhs.group)) } @inlinable -public func / (lhs: OrPattern, rhs: OneOf) -> OrPattern { +public func / (lhs: OrPattern>, rhs: OneOf) -> OrPattern> { lhs.first / (lhs.second / rhs) } @@ -200,12 +286,12 @@ public let mathSymbol = OneOf(description: "mathSymbol", regex: #"\p{Sm}"#, public let currencySymbol = OneOf(description: "currencySymbol", regex: #"\p{Sc}"#, contains: { $0.isCurrencySymbol }) -extension OneOf { +extension OneOf where Input == String { /// Predefined OneOf patterns. - public static let patterns: [OneOf] = [ - alphanumeric, letter, lowercase, uppercase, punctuation, whitespace, newline, hexDigit, digit, - ascii, symbol, mathSymbol, currencySymbol, - ] + public static var patterns: [OneOf] { + [alphanumeric, letter, lowercase, uppercase, punctuation, whitespace, newline, hexDigit, digit, + ascii, symbol, mathSymbol, currencySymbol] + } /// All the predefined OneOf patterns that match `element`. public static func patterns(for element: Input.Element) -> [OneOf] { diff --git a/Sources/Patterns/Atomic Patterns/Word.swift b/Sources/Patterns/Atomic Patterns/Word.swift index 5dc8a2c..e49a107 100644 --- a/Sources/Patterns/Atomic Patterns/Word.swift +++ b/Sources/Patterns/Atomic Patterns/Word.swift @@ -5,12 +5,7 @@ // Created by Kåre Morstøl on 28/06/2019. // -public struct Word { - /// Detects boundaries between words. - /// - /// Uses rules from https://www.unicode.org/reports/tr29/#Word_Boundary_Rules . - public static let boundary = Boundary() - +public struct Word where Input.Element == Character { /// Detects boundaries between words. /// /// Uses rules from https://www.unicode.org/reports/tr29/#Word_Boundary_Rules . @@ -72,7 +67,7 @@ public struct Word { } @inlinable - public func createInstructions(_ instructions: inout Instructions) { + public func createInstructions(_ instructions: inout ContiguousArray>) { instructions.append(.checkIndex { (input, index) -> Bool in self.parse(input, at: index) != nil }) @@ -80,6 +75,13 @@ public struct Word { } } +extension Word where Input == String { + /// Detects boundaries between words. + /// + /// Uses rules from https://www.unicode.org/reports/tr29/#Word_Boundary_Rules . + public static let boundary = Boundary() +} + extension Group where Element == UInt32 { init(_ c: ClosedRange...) { // The collection is sorted. Exit as soon as possible. diff --git a/Sources/Patterns/Decoder.swift b/Sources/Patterns/Decoder.swift index f003de0..5f34ceb 100644 --- a/Sources/Patterns/Decoder.swift +++ b/Sources/Patterns/Decoder.swift @@ -75,12 +75,12 @@ extension Parser.Match where Input == String { @usableFromInline var codingPath: [CodingKey] @usableFromInline - let values: [Range] + let values: [Range] @usableFromInline let string: String @usableFromInline - init(codingPath: [CodingKey], values: [Range], string: String) { + init(codingPath: [CodingKey], values: [Range], string: String) { self.codingPath = codingPath self.values = values self.string = string diff --git a/Sources/Patterns/Grammar.swift b/Sources/Patterns/Grammar.swift index 67adac5..686dfeb 100644 --- a/Sources/Patterns/Grammar.swift +++ b/Sources/Patterns/Grammar.swift @@ -26,11 +26,11 @@ /// ``` /// will lead to infinite recursion. @dynamicMemberLookup -public class Grammar: Pattern { +public class Grammar: Pattern where Input.Element: Hashable { /// Calls another subpattern in a grammar. public struct CallPattern: Pattern { /// The grammar that contains the subpattern being called. - public let grammar: Grammar + public let grammar: Grammar /// The name of the subpattern being called. public let name: String public var description: String { "<\(name)>" } @@ -42,7 +42,7 @@ public class Grammar: Pattern { } @inlinable - public func createInstructions(_ instructions: inout Instructions) { + public func createInstructions(_ instructions: inout ContiguousArray>) { instructions.append(.openCall(name: name)) } } @@ -50,7 +50,7 @@ public class Grammar: Pattern { public var description: String { "Grammar" } // TODO: /// All the subpatterns and their names. - public internal(set) var patterns: [(name: String, pattern: AnyPattern)] = [] + public internal(set) var patterns: [(name: String, pattern: AnyPattern)] = [] /// The main subpattern, which will be called when this Grammar is being used. public var firstPattern: String? { patterns.first?.name } @@ -59,7 +59,16 @@ public class Grammar: Pattern { public init() {} @inlinable - public convenience init(_ closure: (Grammar) -> Void) { + public init() where Input == String {} + + @inlinable + public convenience init(_ closure: (Grammar) -> Void) { + self.init() + closure(self) + } + + @inlinable + public convenience init(_ closure: (Grammar) -> Void) where Input == String { self.init() closure(self) } @@ -71,7 +80,7 @@ public class Grammar: Pattern { } @inlinable - public func createInstructions(_ instructions: inout Instructions) throws { + public func createInstructions(_ instructions: inout ContiguousArray>) throws { // We begin with a call to the first subpattern, followed by a jump to the end. // This enables this grammar to be used inside other patterns (including other grammars). @@ -113,20 +122,20 @@ public class Grammar: Pattern { instructions[startIndex + 1] = .jump(offset: instructions.endIndex - startIndex - 1) } - public static func == (lhs: Grammar, rhs: Grammar) -> Bool { - lhs.patterns.elementsEqual(rhs.patterns, by: { $0 == $1 }) + public static func == (lhs: Grammar, rhs: Grammar) -> Bool { + lhs.patterns.elementsEqual(rhs.patterns, by: { $0.name == $1.name && $0.pattern == $1.pattern }) } } infix operator <-: AssignmentPrecedence /// Used by grammars to define subpatterns with `g.a <- ...`. -public func <- (call: Grammar.CallPattern, pattern: P) { +public func <- (call: Grammar.CallPattern, pattern: P) { call.grammar.patterns.append((call.name, AnyPattern(pattern))) } /// In case of `g.name <- Capture(...)`, names the nameless Capture "name". -public func <- (call: Grammar.CallPattern, capture: Capture

) { +public func <- (call: Grammar.CallPattern, capture: Capture

) { let newPattern = capture.name == nil ? Capture(name: call.name, capture.wrapped) : capture diff --git a/Sources/Patterns/Operations on Patterns/And.swift b/Sources/Patterns/Operations on Patterns/And.swift index 1addd79..b3a731a 100644 --- a/Sources/Patterns/Operations on Patterns/And.swift +++ b/Sources/Patterns/Operations on Patterns/And.swift @@ -7,6 +7,7 @@ /// A pattern which matches the `wrapped` pattern, without consuming any input. public struct AndPattern: Pattern { + public typealias Input = Wrapped.Input public let wrapped: Wrapped public var description: String { "&\(wrapped)" } @@ -16,9 +17,9 @@ public struct AndPattern: Pattern { } @inlinable - public func createInstructions(_ instructions: inout Instructions) throws { + public func createInstructions(_ instructions: inout ContiguousArray>) throws { let wrappedInstructions = try wrapped.createInstructions() - if let indexMovedBy = wrappedInstructions.movesIndexBy { + if let indexMovedBy = wrappedInstructions.movesIndexBy() { instructions.append(contentsOf: wrappedInstructions) instructions.append(.moveIndex(offset: -indexMovedBy)) } else { diff --git a/Sources/Patterns/Operations on Patterns/AnyPattern.swift b/Sources/Patterns/Operations on Patterns/AnyPattern.swift index 9df8eb3..7fcabfe 100644 --- a/Sources/Patterns/Operations on Patterns/AnyPattern.swift +++ b/Sources/Patterns/Operations on Patterns/AnyPattern.swift @@ -7,12 +7,12 @@ /// A type erased wrapper around a pattern. /// Can be used to store patterns in arrays and non-generic variables. -public struct AnyPattern: Pattern { +public struct AnyPattern: Pattern where Input.Element: Hashable { @usableFromInline let _instructions: (inout Instructions) throws -> Void @inlinable - public func createInstructions(_ instructions: inout Instructions) throws { + public func createInstructions(_ instructions: inout ContiguousArray>) throws { try _instructions(&instructions) } @@ -22,7 +22,7 @@ public struct AnyPattern: Pattern { /// The wrapped pattern. If you know the exact type you can unwrap it again. public let wrapped: Any - public init(_ p: P) { + public init(_ p: P) where Input == P.Input { _instructions = p.createInstructions _description = { p.description } wrapped = p @@ -33,7 +33,7 @@ public struct AnyPattern: Pattern { self = p } - public init(_ p: Literal) { + public init(_ p: Literal) { _instructions = p.createInstructions _description = { p.description } wrapped = p @@ -44,11 +44,26 @@ public struct AnyPattern: Pattern { } } +extension AnyPattern: ExpressibleByUnicodeScalarLiteral where Input == String { + @inlinable + public init(unicodeScalarLiteral value: String) { + self.init(stringLiteral: String(describing: value)) + } +} + +extension AnyPattern: ExpressibleByExtendedGraphemeClusterLiteral where Input == String { + public typealias ExtendedGraphemeClusterLiteralType = String +} + +extension AnyPattern: ExpressibleByStringLiteral where Input == String { + public typealias StringLiteralType = String +} + /// Allows AnyPattern to be defined by a string with patterns in interpolations. /// /// `let p: AnyPattern = "hi\(whitespace)there"` /// is the same as `"hi" • whitespace • "there"`. -extension AnyPattern: ExpressibleByStringInterpolation { +extension AnyPattern: ExpressibleByStringInterpolation where Input == String { public struct StringInterpolation: StringInterpolationProtocol { @usableFromInline var pattern = AnyPattern("") @@ -64,7 +79,7 @@ extension AnyPattern: ExpressibleByStringInterpolation { } @inlinable - public mutating func appendInterpolation(_ newpattern: P) { + public mutating func appendInterpolation(_ newpattern: P) where P.Input == Input { pattern = AnyPattern(pattern • newpattern) } } diff --git a/Sources/Patterns/Operations on Patterns/Capture.swift b/Sources/Patterns/Operations on Patterns/Capture.swift index 99fe4a7..a3ff03f 100644 --- a/Sources/Patterns/Operations on Patterns/Capture.swift +++ b/Sources/Patterns/Operations on Patterns/Capture.swift @@ -9,12 +9,13 @@ /// /// It can be retrieved in `Parser.Match.captures` or used for decoding into Decodables. public struct Capture: Pattern { + public typealias Input = Wrapped.Input public var description: String { let result: String switch (name, wrapped) { - case (nil, is NoPattern): + case (nil, is NoPattern): result = "" - case let (name?, is NoPattern): + case let (name?, is NoPattern): result = "name: \(name)" case let (name?, wrapped): result = "name: \(name), \(wrapped)" @@ -37,40 +38,46 @@ public struct Capture: Pattern { } @inlinable - public func createInstructions(_ instructions: inout Instructions) throws { + public func createInstructions(_ instructions: inout ContiguousArray>) throws { instructions.append(.captureStart(name: name)) try wrapped.createInstructions(&instructions) instructions.append(.captureEnd) } } -extension Capture where Wrapped == NoPattern { +extension Capture { /// Captures the current input position as an empty range. /// - Parameter name: optional name @inlinable - public init(name: String? = nil) { - self.wrapped = NoPattern() + public init(name: String? = nil) where Wrapped == NoPattern { + self.wrapped = NoPattern() + self.name = name + } + + /// Captures the current input position as an empty range. + /// - Parameter name: optional name + @inlinable + public init(name: String? = nil) where Wrapped == NoPattern { + self.wrapped = NoPattern() self.name = name } -} -extension Capture where Wrapped == Literal { /// Captures the position of `wrapped` as a range. /// - Parameter name: optional name @inlinable - public init(name: String? = nil, _ wrapped: Literal) { + public init(name: String? = nil, _ wrapped: Literal) where Wrapped == Literal { self.wrapped = wrapped self.name = name } } /// A pattern that does absolutely nothing. -public struct NoPattern: Pattern { +public struct NoPattern: Pattern where Input.Element: Hashable { public var description: String { "" } @inlinable public init() {} @inlinable - public func createInstructions(_ instructions: inout Instructions) throws {} + public func createInstructions(_ instructions: inout ContiguousArray>) throws {} } diff --git a/Sources/Patterns/Operations on Patterns/Choice.swift b/Sources/Patterns/Operations on Patterns/Choice.swift index 5792a33..b6b3feb 100644 --- a/Sources/Patterns/Operations on Patterns/Choice.swift +++ b/Sources/Patterns/Operations on Patterns/Choice.swift @@ -10,7 +10,8 @@ import Foundation /// A pattern which first tries the `first` pattern, /// if that fails it tries the `second` pattern from the same position. -public struct OrPattern: Pattern { +public struct OrPattern: Pattern where First.Input == Second.Input { + public typealias Input = First.Input public let first: First public let second: Second @@ -25,7 +26,7 @@ public struct OrPattern: Pattern { } @inlinable - public func createInstructions(_ instructions: inout Instructions) throws { + public func createInstructions(_ instructions: inout ContiguousArray>) throws { let inst1 = try first.createInstructions() let inst2 = try second.createInstructions() instructions.append(.choice(offset: inst1.count + 3)) @@ -47,20 +48,27 @@ public func / (p1: First, p2: Second) -> OrPatt /// First tries the pattern to the left, /// if that fails it tries the pattern to the right from the same position. @inlinable -public func / (p1: Literal, p2: Second) -> OrPattern { +public func / (p1: Literal, p2: Second) -> OrPattern, Second> { OrPattern(p1, or: p2) } /// First tries the pattern to the left, /// if that fails it tries the pattern to the right from the same position. @inlinable -public func / (p1: First, p2: Literal) -> OrPattern { +public func / (p1: First, p2: Literal) -> OrPattern> { OrPattern(p1, or: p2) } /// First tries the pattern to the left, /// if that fails it tries the pattern to the right from the same position. @inlinable -public func / (p1: Literal, p2: Literal) -> OrPattern { +public func / (p1: Literal, p2: Literal) -> OrPattern, Literal> { + OrPattern(p1, or: p2) +} + +/// First tries the pattern to the left, +/// if that fails it tries the pattern to the right from the same position. +@inlinable +public func / (p1: Literal, p2: Literal) -> OrPattern, Literal> { OrPattern(p1, or: p2) } diff --git a/Sources/Patterns/Operations on Patterns/Concatenation.swift b/Sources/Patterns/Operations on Patterns/Concatenation.swift index 398ce90..b068082 100644 --- a/Sources/Patterns/Operations on Patterns/Concatenation.swift +++ b/Sources/Patterns/Operations on Patterns/Concatenation.swift @@ -14,7 +14,8 @@ infix operator •: PatternConcatenationPrecedence /// A pattern which first tries the `first` pattern, /// if that succeeds it continues with the `second` pattern. -public struct Concat: Pattern { +public struct Concat: Pattern where First.Input == Second.Input { + public typealias Input = First.Input public let first: First public let second: Second public var description: String { "\(first) \(second)" } @@ -26,7 +27,7 @@ public struct Concat: Pattern { } @inlinable - public func createInstructions(_ instructions: inout Instructions) throws { + public func createInstructions(_ instructions: inout ContiguousArray>) throws { try first.createInstructions(&instructions) try second.createInstructions(&instructions) } @@ -34,24 +35,24 @@ public struct Concat: Pattern { /// First tries the pattern to the left, if that succeeds it tries the pattern to the right. @inlinable -public func • (lhs: Left, rhs: Right) -> Concat { +public func • (lhs: Left, rhs: Right) -> Concat where Left.Input == Right.Input { Concat(lhs, rhs) } /// First tries the pattern to the left, if that succeeds it tries the pattern to the right. @inlinable -public func • (lhs: Literal, rhs: Right) -> Concat { +public func • (lhs: Literal, rhs: Right) -> Concat, Right> { Concat(lhs, rhs) } /// First tries the pattern to the left, if that succeeds it tries the pattern to the right. @inlinable -public func • (lhs: Left, rhs: Literal) -> Concat { +public func • (lhs: Left, rhs: Literal) -> Concat> { Concat(lhs, rhs) } /// First tries the pattern to the left, if that succeeds it tries the pattern to the right. @inlinable -public func • (lhs: Literal, rhs: Literal) -> Concat { +public func • (lhs: Literal, rhs: Literal) -> Concat, Literal> { Concat(lhs, rhs) } diff --git a/Sources/Patterns/Operations on Patterns/Not.swift b/Sources/Patterns/Operations on Patterns/Not.swift index 21093a4..f57be63 100644 --- a/Sources/Patterns/Operations on Patterns/Not.swift +++ b/Sources/Patterns/Operations on Patterns/Not.swift @@ -8,6 +8,7 @@ /// A pattern which only succeeds if the `wrapped` pattern fails. /// The next pattern will continue from where `wrapped` started. public struct NotPattern: Pattern { + public typealias Input = Wrapped.Input public let wrapped: Wrapped public var description: String { "!\(wrapped)" } @@ -17,7 +18,7 @@ public struct NotPattern: Pattern { } @inlinable - public func createInstructions(_ instructions: inout Instructions) throws { + public func createInstructions(_ instructions: inout ContiguousArray>) throws { let wrappedInstructions = try wrapped.createInstructions() instructions.append(.choice(offset: wrappedInstructions.count + 3)) instructions.append(contentsOf: wrappedInstructions) @@ -34,6 +35,6 @@ public prefix func ! (pattern: P) -> NotPattern

{ /// Will only succeed if the following pattern fails. Does not consume any input. @inlinable -public prefix func ! (pattern: Literal) -> NotPattern { +public prefix func ! (pattern: Literal) -> NotPattern> { NotPattern(pattern) } diff --git a/Sources/Patterns/Operations on Patterns/Repetition.swift b/Sources/Patterns/Operations on Patterns/Repetition.swift index defed6e..2f2f47a 100644 --- a/Sources/Patterns/Operations on Patterns/Repetition.swift +++ b/Sources/Patterns/Operations on Patterns/Repetition.swift @@ -10,6 +10,7 @@ /// /// Used by operators `*+¿`. public struct RepeatPattern: Pattern { + public typealias Input = Wrapped.Input public let wrapped: Wrapped public let min: Int public let max: Int? @@ -27,7 +28,7 @@ public struct RepeatPattern: Pattern { } @inlinable - public func createInstructions(_ instructions: inout Instructions) throws { + public func createInstructions(_ instructions: inout ContiguousArray>) throws { let repeatedInstructions = try wrapped.createInstructions() for _ in 0 ..< min { instructions.append(contentsOf: repeatedInstructions) } if let max = max { @@ -72,7 +73,7 @@ public postfix func * (me: P) -> RepeatPattern

{ /// Repeats the preceding pattern 0 or more times. @inlinable -public postfix func * (me: Literal) -> RepeatPattern { +public postfix func * (me: Literal) -> RepeatPattern> { me.repeat(0...) } @@ -86,7 +87,7 @@ public postfix func + (me: P) -> RepeatPattern

{ /// Repeats the preceding pattern 1 or more times. @inlinable -public postfix func + (me: Literal) -> RepeatPattern { +public postfix func + (me: Literal) -> RepeatPattern> { me.repeat(1...) } @@ -100,6 +101,6 @@ public postfix func ¿ (me: P) -> RepeatPattern

{ /// Tries the preceding pattern, and continues even if it fails. @inlinable -public postfix func ¿ (me: Literal) -> RepeatPattern { +public postfix func ¿ (me: Literal) -> RepeatPattern> { me.repeat(0 ... 1) } diff --git a/Sources/Patterns/Operations on Patterns/Skip.swift b/Sources/Patterns/Operations on Patterns/Skip.swift index 6e278ae..8a30a83 100644 --- a/Sources/Patterns/Operations on Patterns/Skip.swift +++ b/Sources/Patterns/Operations on Patterns/Skip.swift @@ -8,13 +8,15 @@ /// Skips 0 or more elements until a match for the next patterns are found. /// /// If this is at the end of a pattern, it skips to the end of input. -public struct Skip: Pattern { +public struct Skip: Pattern where Input.Element: Hashable { public var description: String { "Skip()" } public init() {} + public init() where Input == String {} + @inlinable - public func createInstructions(_ instructions: inout Instructions) throws { + public func createInstructions(_ instructions: inout ContiguousArray>) throws { instructions.append(.skip) } } diff --git a/Sources/Patterns/Pattern And Instruction.swift b/Sources/Patterns/Pattern And Instruction.swift index b684c88..359ebfd 100644 --- a/Sources/Patterns/Pattern And Instruction.swift +++ b/Sources/Patterns/Pattern And Instruction.swift @@ -6,8 +6,8 @@ // /// Something that can create Instructions for the Parser. -public protocol Pattern: CustomStringConvertible, Equatable { - typealias Input = String +public protocol Pattern: CustomStringConvertible { + associatedtype Input: BidirectionalCollection where Input.Element: Hashable typealias ParsedRange = Range typealias Instructions = ContiguousArray> @@ -146,10 +146,10 @@ public enum Instruction where Input.Element: Has } } -extension Sequence where Element == Instruction { +extension Sequence { /// The offset by which these instructions will move the input index. @inlinable - var movesIndexBy: Int? { + func movesIndexBy

() -> Int? where Element == Instruction

{ lazy .map { $0.movesIndexBy }.reduceIfNoNils(into: 0) { result, offset in result += offset } } } diff --git a/Sources/Patterns/Regex.swift b/Sources/Patterns/Regex.swift index b32e6b4..996bfa8 100644 --- a/Sources/Patterns/Regex.swift +++ b/Sources/Patterns/Regex.swift @@ -13,10 +13,8 @@ public protocol RegexConvertible: Pattern { var regex: String { get } } -// For `OneOf` to be convertible the regex has to be provided manually when it is created. - -extension Literal: RegexConvertible { - public var regex: String { NSRegularExpression.escapedPattern(for: String(elements)) } +extension Literal: RegexConvertible where Input: StringProtocol { + public var regex: String { NSRegularExpression.escapedPattern(for: String(self.elements)) } } extension Line: RegexConvertible { @@ -63,3 +61,5 @@ extension Skip: RegexConvertible { extension NoPattern: RegexConvertible { public var regex: String { "" } } + +// For `OneOf` to be convertible the regex has to be provided manually when it is created. See OneOf.swift. diff --git a/Tests/LongTests/LongTests.swift b/Tests/LongTests/LongTests.swift index e285ad6..98c3033 100644 --- a/Tests/LongTests/LongTests.swift +++ b/Tests/LongTests/LongTests.swift @@ -4,20 +4,21 @@ import XCTest class LongTests: XCTestCase { func testOr() { - XCTAssert(type(of: "a" / letter / ascii / punctuation / "b") - == OrPattern, Literal>.self, + let char = letter / ascii / punctuation + XCTAssert(type(of: "a" / char / "b") + == OrPattern, OneOf>, Literal>.self, "'/' operator isn't optimizing OneOf's properly.") } func testNot() { XCTAssert( - type(of: "a" • !letter • ascii • "b") == Concat, Literal>.self, + type(of: "a" • !letter • ascii • "b") == Concat, OneOf>, Literal>.self, "'•' operator isn't optimizing OneOf's properly.") } func testAnd() throws { XCTAssert( - type(of: "a" • &&letter • ascii • "b") == Concat, Literal>.self, + type(of: "a" • &&letter • ascii • "b") == Concat, OneOf>, Literal>.self, "'•' operator isn't optimizing OneOf's properly.") } @@ -50,14 +51,15 @@ class LongTests: XCTestCase { } // from http://www.inf.puc-rio.br/~roberto/docs/peg.pdf, page 2 and 5 - static let pegGrammar = Grammar { g in + static let pegGrammar = Grammar { g in //g.all <- g.pattern • !any g.pattern <- g.grammar / g.simplepatt g.grammar <- (g.nonterminal • "<-" • g.sp • g.simplepatt)+ g.simplepatt <- g.alternative • ("/" • g.sp • g.alternative)* g.alternative <- (OneOf("!&")¿ • g.sp • g.suffix)+ g.suffix <- g.primary • (OneOf("*+?") • g.sp)* - g.primary <- "(" • g.sp • g.pattern • ")" • g.sp / "." • g.sp / g.literal / g.charclass / g.nonterminal • !"<-" + let primaryPart1 = "(" • g.sp • g.pattern • ")" • g.sp / "." • g.sp / g.literal + g.primary <- primaryPart1 / g.charclass / g.nonterminal • !"<-" g.literal <- "’" • (!"’" • any)* • "’" • g.sp g.charclass <- "[" • (!"]" • (any • "-" • any / any))* • "]" • g.sp g.nonterminal <- OneOf("a" ... "z", "A" ... "Z")+ • g.sp diff --git a/Tests/PatternsTests/ConcatenationTests.swift b/Tests/PatternsTests/ConcatenationTests.swift index 179aa89..7fb1dd7 100644 --- a/Tests/PatternsTests/ConcatenationTests.swift +++ b/Tests/PatternsTests/ConcatenationTests.swift @@ -46,6 +46,12 @@ class ConcatenationTests: XCTestCase { assertParseAll( "x" • Capture() • "a", input: "xaxa xa", result: "", count: 3) + assertParseAll( + Capture() • "a", + input: "xaa xa".utf8, result: "".utf8, count: 3) + assertParseAll( + "x" • Capture() • "a", + input: "xaxa xa".unicodeScalars, result: "".unicodeScalars, count: 3) let text = "This is a test text." assertParseAll( @@ -60,15 +66,20 @@ class ConcatenationTests: XCTestCase { assertParseAll( " " • Capture("te"), input: text, result: "te", count: 2) - } - func testRepeatOrThenEndOfLine() throws { - assertParseAll( - Capture((alphanumeric / OneOf(" "))+ • Line.end), - input: "FMA026712 TECNOAUTOMOTRIZ ATLACOMULCO S", - result: ["FMA026712 TECNOAUTOMOTRIZ ATLACOMULCO S"]) + XCTAssert(type(of: Capture()).Input == String.self) + XCTAssert(type(of: "q" • Capture()).Input == String.self) + XCTAssert(type(of: Literal("q".utf8) • Capture()).Input == String.UTF8View.self) } + /* + func testRepeatOrThenEndOfLine() throws { + assertParseAll( + Capture((alphanumeric / OneOf(" "))+ • Line.end), + input: "FMA026712 TECNOAUTOMOTRIZ ATLACOMULCO S", + result: ["FMA026712 TECNOAUTOMOTRIZ ATLACOMULCO S"]) + } + */ func testMatchFullRange() throws { let text = """ line 1 @@ -145,44 +156,44 @@ class ConcatenationTests: XCTestCase { 0005..0010 ; Common # Cc [32] .. 002F ; Common # Zs SPACE """ - - lazy var rangeAndProperty: Parser = { - let hexNumber = Capture(name: "codePoint", hexDigit+) - let hexRange = AnyPattern("\(hexNumber)..\(hexNumber)") / hexNumber - return try! Parser(search: AnyPattern("\n\(hexRange • Skip()); \(Capture(name: "property", Skip())) ")) - }() - - func testStringInterpolation() throws { - assertCaptures(rangeAndProperty, input: text, result: [["0005", "0010", "Common"], ["002F", "Common"]]) - } - - func testMatchDecoding() throws { - struct Property: Decodable, Equatable { - let codePoint: [Int] - let property: String - let notCaptured: String? - } - - let matches = Array(rangeAndProperty.matches(in: text)) - let property = try matches.first!.decode(Property.self, from: text) - XCTAssertEqual(property, Property(codePoint: [5, 10], property: "Common", notCaptured: nil)) - - XCTAssertThrowsError(try matches.last!.decode(Property.self, from: text)) - } - - func testParserDecoding() { - struct Property: Decodable, Equatable { - let codePoint: [String] - let property: String - } - - XCTAssertEqual(try rangeAndProperty.decode([Property].self, from: text), - [Property(codePoint: ["0005", "0010"], property: "Common"), - Property(codePoint: ["002F"], property: "Common")]) - XCTAssertEqual(try rangeAndProperty.decodeFirst(Property.self, from: text), - Property(codePoint: ["0005", "0010"], property: "Common")) - } - + /* + lazy var rangeAndProperty: Parser = { + let hexNumber = Capture(name: "codePoint", hexDigit+) + let hexRange = AnyPattern("\(hexNumber)..\(hexNumber)") / hexNumber + return try! Parser(search: AnyPattern("\n\(hexRange • Skip()); \(Capture(name: "property", Skip())) ")) + }() + + func testStringInterpolation() throws { + assertCaptures(rangeAndProperty, input: text, result: [["0005", "0010", "Common"], ["002F", "Common"]]) + } + + func testMatchDecoding() throws { + struct Property: Decodable, Equatable { + let codePoint: [Int] + let property: String + let notCaptured: String? + } + + let matches = Array(rangeAndProperty.matches(in: text)) + let property = try matches.first!.decode(Property.self, from: text) + XCTAssertEqual(property, Property(codePoint: [5, 10], property: "Common", notCaptured: nil)) + + XCTAssertThrowsError(try matches.last!.decode(Property.self, from: text)) + } + + func testParserDecoding() { + struct Property: Decodable, Equatable { + let codePoint: [String] + let property: String + } + + XCTAssertEqual(try rangeAndProperty.decode([Property].self, from: text), + [Property(codePoint: ["0005", "0010"], property: "Common"), + Property(codePoint: ["002F"], property: "Common")]) + XCTAssertEqual(try rangeAndProperty.decodeFirst(Property.self, from: text), + Property(codePoint: ["0005", "0010"], property: "Common")) + } + */ func testReadmeExample() throws { let text = "This is a point: (43,7), so is (0,5). But my final point is (3,-1)." @@ -203,11 +214,13 @@ class ConcatenationTests: XCTestCase { _ = (pointsAsSubstrings, points) } - func testOperatorPrecedence() throws { - let p1 = "a" • Skip() • letter • !alphanumeric • "b"+ - XCTAssert(type(of: p1.first.first.first.second) == Skip.self) - XCTAssert(type(of: "a" • "b" / "c" • "d") - == OrPattern, Concat>.self, - #"`/` should have lower precedence than `•`"#) - } + /* + func testOperatorPrecedence() throws { + let p1 = "a" • Skip() • letter • !alphanumeric • "b"+ + XCTAssert(type(of: p1.first.first.first.second) == Skip.self) + XCTAssert(type(of: "a" • "b" / "c" • "d") + == OrPattern, Concat>.self, + #"`/` should have lower precedence than `•`"#) + } + */ } diff --git a/Tests/PatternsTests/GrammarTests.swift b/Tests/PatternsTests/GrammarTests.swift index 68cdccb..196f91d 100644 --- a/Tests/PatternsTests/GrammarTests.swift +++ b/Tests/PatternsTests/GrammarTests.swift @@ -9,7 +9,7 @@ import XCTest class GrammarTests: XCTestCase { - let grammar1: Grammar = { + let grammar1: Grammar = { let g = Grammar() g.letter <- Capture(letter) g.space <- whitespace @@ -17,7 +17,7 @@ class GrammarTests: XCTestCase { }() func testNamesAnonymousCaptures() { - XCTAssertEqual((grammar1.patterns.first?.pattern.wrapped as? Capture)?.name, "letter") + XCTAssertEqual((grammar1.patterns.first?.pattern.wrapped as? Capture>)?.name, "letter") } func testSetsFirstPattern() { @@ -56,11 +56,11 @@ class GrammarTests: XCTestCase { } func testOptimisesTailCall() throws { - let g = Grammar { g in + let g = Grammar { g in g.a <- " " / Skip() • g.a } - func isCall(_ inst: Instruction) -> Bool { + func isCall(_ inst: Instruction) -> Bool { switch inst { case .call: return true diff --git a/Tests/PatternsTests/PatternTests.swift b/Tests/PatternsTests/PatternTests.swift index 12225c1..e6c08af 100644 --- a/Tests/PatternsTests/PatternTests.swift +++ b/Tests/PatternsTests/PatternTests.swift @@ -9,11 +9,22 @@ import Patterns import XCTest +let asciiDigit = OneOf(UInt8(ascii: "0") ... UInt8(ascii: "9")) +let asciiLowercase = OneOf(UInt8(ascii: "a") ... UInt8(ascii: "z")) +let asciiUppercase = OneOf(UInt8(ascii: "A") ... UInt8(ascii: "Z")) +let asciiLetter = OneOf(asciiLowercase, asciiUppercase) + class PatternTests: XCTestCase { func testLiteral() { - assertParseAll(Capture(Literal("a")), input: "abcd", result: "a", count: 1) - assertParseAll(Capture(Literal("b")), input: "abcdb", result: "b", count: 2) - assertParseAll(Capture(Literal("ab")), input: "abcaba", result: "ab", count: 2) + assertParseAll(Capture("a"), input: "abcd", result: "a", count: 1) + assertParseAll(Capture("b"), input: "abcdb", result: "b", count: 2) + assertParseAll(Capture("ab"), input: "abcaba", result: "ab", count: 2) + } + + func testLiteralUTF8() { + assertParseAll(Capture(Literal("a".utf8)), input: "abcd".utf8, result: "a".utf8, count: 1) + assertParseAll(Capture(Literal("b".utf8)), input: "abcdb".utf8, result: "b".utf8, count: 2) + assertParseAll(Capture(Literal("ab".utf8)), input: "abcaba".utf8, result: "ab".utf8, count: 2) } func testOneOf() { @@ -28,6 +39,31 @@ class PatternTests: XCTestCase { assertParseAll(Capture(lowercaseASCII), input: "aTæøåk☀️", result: ["a", "k"]) assertParseAll(digit, input: "ab12c3,d4", count: 4) + + assertParseAll(Capture(OneOf("a" ... "e")), + input: "abgkxeryza", result: ["a", "b", "e", "a"]) + assertParseAll(Capture(OneOf(not: "a" ..< "f")), + input: "abgkxeryza", result: ["g", "k", "x", "r", "y", "z"]) + } + + func testOneOfUTF8() { + let vowels = OneOf("aeiouAEIOU".utf8) + assertParseAll(Capture(vowels), input: "I am, you are".utf8, result: ["I", "a", "o", "u", "a", "e"].map { $0.utf8 }) + let notVowels = OneOf(not: "aeiouAEIOU".utf8) + assertParseAll(Capture(notVowels), input: "I am, you are".utf8, result: [" ", "m", ",", " ", "y", " ", "r"].map { $0.utf8 }) + + let lowercaseASCII = OneOf(description: "lowercaseASCII") { character in + (UInt8(ascii: "a") ... UInt8(ascii: "z")).contains(character) + } + assertParseAll(Capture(lowercaseASCII), input: "aTæøåk☀️".utf8, result: ["a", "k"].map { $0.utf8 }) + + assertParseAll(Capture(OneOf(UInt8(ascii: "a") ... UInt8(ascii: "e"))), + input: "abgkxeryza".utf8, result: ["a", "b", "e", "a"].map { $0.utf8 }) + assertParseAll(Capture(OneOf(not: UInt8(ascii: "a") ..< UInt8(ascii: "f"))), + input: "abgkxeryza".utf8, result: ["g", "k", "x", "r", "y", "z"].map { $0.utf8 }) + + // requires String.UTF8View to be ExpressibleByStringLiteral + // assertParseAll(OneOf(".,"), input: "., ,".utf8, result: [".", ",", ","].map{$0.utf8}) } func testOneOfsMultiple() { @@ -46,6 +82,10 @@ class PatternTests: XCTestCase { assertParseAll(letter • digit*, input: "123abc123d", count: 4) assertParseAll(Capture(digit¿ • letter), input: "123abc", result: ["3a", "b", "c"]) + + assertParseAll(asciiLetter • asciiDigit*, input: "123abc123d".utf8, count: 4) + assertParseAll(Capture(asciiDigit¿ • asciiLetter), + input: "123abc".utf8, result: ["3a", "b", "c"].map { $0.utf8 }) } func testRepeat() throws { @@ -66,10 +106,6 @@ class PatternTests: XCTestCase { assertParseAll(Capture("a"* • "b"), input: "b aabb ab", result: ["b", "aab", "b", "ab"]) assertParseAll(Capture("a"*), input: "b aabb ab", result: ["", "", "aa", "", "", "", "a", "", ""]) - // !a b == b - a - assertParseAll( - Capture((!newline • ascii)+), - input: "123\n4567\n89", result: ["123", "4567", "89"]) assertParseAll( Capture((!newline • ascii)+), input: "123\n4567\n89", result: ["123", "4567", "89"]) @@ -77,9 +113,16 @@ class PatternTests: XCTestCase { XCTAssertEqual(digit+.description, "digit{1...}") } + func testRepeatLiterals() throws { + assertParseAll(Capture("a"+), input: "a aa aa", result: ["a", "aa", "aa"]) + assertParseAll(Capture("a"+), input: "a aa aa".utf8, result: ["a", "aa", "aa"].map { $0.utf8 }) + assertParseAll(Capture("a" • "a"*), input: "a aaa aa".utf16, result: ["a", "aaa", "aa"].map { $0.utf16 }) + assertParseAll(Capture("a" • "a"¿), input: "a aa aa".unicodeScalars, result: ["a", "aa", "aa"].map { $0.unicodeScalars }) + } + func testOr() { + assertParseAll(Capture("a" / "b"), input: "bcbd".utf16, result: "b".utf16, count: 2) let pattern = Capture("a" / "b") - assertParseAll(pattern, input: "bcbd", result: "b", count: 2) assertParseAll(pattern, input: "acdaa", result: "a", count: 3) assertParseAll(pattern, input: "abcdb", count: 3) } @@ -95,9 +138,6 @@ class PatternTests: XCTestCase { """ - let hexDigit = OneOf(description: "hexDigit", contains: { - $0.unicodeScalars.first!.properties.isHexDigit - }) let hexNumber = Capture(hexDigit+) let hexRange = (hexNumber • ".." • hexNumber) / hexNumber let rangeAndProperty = Line.start • hexRange • Skip() • "; " • Capture(Skip()) • " " @@ -166,6 +206,32 @@ class PatternTests: XCTestCase { assertParseAll(Line.end, input: "\n", count: 2) } + func testLineEndUTF8_16_UnicodeScalars() throws { + let pattern = Line.End() + assertParseAll(pattern, input: "".utf16, result: "".utf16, count: 1) + assertParseAll(pattern, input: "\n".utf16, count: 2) + assertParseAll(pattern, input: "\n\n".utf16, count: 3) + + let text = """ + line 1 + line 2 + line 3 + line 4 + """.utf8 + assertParseAll(Line.End(), input: text, count: 4) + assertParseAll( + " " • Capture(Skip()) • Line.End(), + input: text, result: ["1", "2", "3", "4"].map { $0.utf8 }) + assertParseAll( + Capture(asciiDigit • Line.End()), + input: text, result: ["1", "2", "3", "4"].map { $0.utf8 }) + assertParseAll( + Capture(asciiDigit • Line.End() • Skip() • "l"), + input: text, result: ["1\nl", "2\nl", "3\nl"].map { $0.utf8 }) + + assertParseAll(Line.End(), input: "\n".unicodeScalars, count: 2) + } + func testLine() throws { let text = """ line 1 @@ -176,6 +242,7 @@ class PatternTests: XCTestCase { """ assertParseAll(Capture(Line()), input: text, result: ["line 1", "", "line 3", "line 4", ""]) + assertParseAll(Capture(Line()), input: text.utf8, result: ["line 1", "", "line 3", "line 4", ""].map { $0.utf8 }) } func testWordBoundary() throws { @@ -199,18 +266,29 @@ class PatternTests: XCTestCase { input: "ab abc abcd efg", result: ["ab", "bc", "bcd", "efg"]) + func any() -> OneOf { OneOf(description: "any", contains: { _ in true }) } + + assertParseAll( + Capture(!"abc" • !" " • any()), + input: "ab abc abcd ".utf8, + result: ["a", "b", "b", "c", "b", "c", "d"].map { $0.utf8 }) + assertParseAll( Capture(" " • (!OneOf(" ")).repeat(2) • "d"), // repeat a parser of length 0. input: " d cd", result: [" d"]) - assertParseMarkers(!any, input: " |") // EOF - assertParseMarkers(try Parser(!any), input: "|") + assertParseMarkers(!any(), input: " |") // EOF + assertParseMarkers(try Parser(!any()), input: "|") } func testAnd() throws { assertParseAll(Capture(&&letter • ascii), input: "1abøcæ", result: ["a", "b", "c"]) - // find last occurence of "xuxu", even if it overlaps with itself. - assertParseMarkers(try Parser(Grammar { g in g.last <- &&"xuxu" • any / any • g.last }+ • any.repeat(3)), - input: "xuxuxuxu|i") + assertParseAll(Capture(&&Line.Start() • "a"), input: "abø\ncæa\na".utf8, result: "a".utf8, count: 2) + + /* TODO: uncomment + // find last occurence of "xuxu", even if it overlaps with itself. + assertParseMarkers(try Parser(Grammar { g in g.last <- &&"xuxu" • any / any • g.last }+ • any.repeat(3)), + input: "xuxuxuxu|i") + */ } } diff --git a/Tests/PatternsTests/SkipTests.swift b/Tests/PatternsTests/SkipTests.swift index b5bac90..f534809 100644 --- a/Tests/PatternsTests/SkipTests.swift +++ b/Tests/PatternsTests/SkipTests.swift @@ -28,9 +28,13 @@ class SkipTests: XCTestCase { assertParseAll( " " • Capture(Skip() • letter+) • " ", input: text, result: ["is", "a", "test"]) + let p = " " • Capture(Skip()) • " " assertParseAll( - " " • Capture(Skip()) • " ", + p, input: text, result: ["is", "a", "test"]) + assertParseAll( + " " • Capture(Skip()) • " ", + input: text.utf8, result: ["is", "a", "test"].map { $0.utf8 }) let lines = """ 1 diff --git a/Tests/PatternsTests/TestHelpers.swift b/Tests/PatternsTests/TestHelpers.swift index 4080b14..3ee2bbe 100644 --- a/Tests/PatternsTests/TestHelpers.swift +++ b/Tests/PatternsTests/TestHelpers.swift @@ -25,15 +25,33 @@ extension Parser { } extension XCTestCase { - func assertParseAll(_ parser: Parser, input: String, result: [String], - file: StaticString = #file, line: UInt = #line) { - let parsed = parser.ranges(in: input).map { String(input[$0]) } - XCTAssertEqual(parsed, result, "\nThe differences are: \n" - + parsed.difference(from: result).sorted().joined(separator: "\n"), file: file, line: line) - } + func XCTAssertEqualElements + (_ seq1: @autoclosure () throws -> S1, + _ seq2: @autoclosure () throws -> S2, + _ message: @autoclosure () -> String = "", + file: StaticString = #file, line: UInt = #line) + where S1.Element: Sequence, S2.Element: Sequence, S1.Element.Element: Equatable, + S1.Element.Element == S2.Element.Element { + do { + let seq1 = try seq1() + let seq2 = try seq2() + if !seq1.elementsEqual(seq2, by: { $0.elementsEqual($1) }) { + XCTFail("'\(seq1)' and '\(seq2)' are not equal.") + } + } catch { + XCTFail(String(describing: error)) + } + } - func assertParseAll(_ pattern: P, input: String, result: [String], - file: StaticString = #file, line: UInt = #line) { + func assertParseAll + (_ parser: Parser, input: Input, result: [Input], file: StaticString = #file, line: UInt = #line) + where Input.Element: Hashable { + let parsed = parser.ranges(in: input).map { input[$0] } + XCTAssertEqualElements(parsed, result, file: file, line: line) + } + + func assertParseAll + (_ pattern: P, input: P.Input, result: [P.Input], file: StaticString = #file, line: UInt = #line) { do { let parser = try Parser(search: pattern) assertParseAll(parser, input: input, result: result, file: file, line: line) @@ -42,18 +60,19 @@ extension XCTestCase { } } - func assertParseAll(_ parser: Parser, input: String, result: String? = nil, count: Int, - file: StaticString = #file, line: UInt = #line) { - if let result = result { - assertParseAll(parser, input: input, result: Array(repeating: result, count: count), file: file, line: line) - return - } else { - let parsedCount = parser.matches(in: input).reduce(into: 0) { count, _ in count += 1 } - XCTAssertEqual(parsedCount, count, "Incorrect count.", file: file, line: line) + func assertParseAll + (_ parser: Parser, input: Input, result: Input? = nil, count: Int, file: StaticString = #file, line: UInt = #line) + where Input.Element: Hashable { + if let result = result { + assertParseAll(parser, input: input, result: Array(repeating: result, count: count), file: file, line: line) + return + } else { + let parsedCount = parser.matches(in: input).reduce(into: 0) { count, _ in count += 1 } + XCTAssertEqual(parsedCount, count, "Incorrect count.", file: file, line: line) + } } - } - func assertParseAll(_ pattern: P, input: String, result: String? = nil, count: Int, + func assertParseAll(_ pattern: P, input: P.Input, result: P.Input? = nil, count: Int, file: StaticString = #file, line: UInt = #line) { do { let parser = try Parser(search: pattern) @@ -75,7 +94,7 @@ extension XCTestCase { } func assertParseMarkers(_ pattern: P, input: String, - file: StaticString = #file, line: UInt = #line) { + file: StaticString = #file, line: UInt = #line) where P.Input == String { assertParseMarkers(try! Parser(search: pattern), input: input, file: file, line: line) } @@ -99,7 +118,7 @@ extension XCTestCase { } func assertCaptures(_ pattern: P, input: String, result: [[String]], - file: StaticString = #file, line: UInt = #line) { + file: StaticString = #file, line: UInt = #line) where P.Input == String { assertCaptures(try! Parser(search: pattern), input: input, result: result, file: file, line: line) } diff --git a/Tests/PerformanceTests/PerformanceTests.swift b/Tests/PerformanceTests/PerformanceTests.swift index f77c1ce..05172bc 100644 --- a/Tests/PerformanceTests/PerformanceTests.swift +++ b/Tests/PerformanceTests/PerformanceTests.swift @@ -13,7 +13,30 @@ import XCTest // It's just there to notify us when the number of hits changes. class PerformanceTests: XCTestCase { - func speedTest(_ pattern: Parser, testFile: String = "Long.txt", textFraction: Int = 1, hits: Int, file: StaticString = #file, line: UInt = #line) throws { + func speedTest(_ pattern: Parser, testFile: String = "Long.txt", textFraction: Int = 1, hits: Int, + file: StaticString = #filePath, line: UInt = #line) throws { + let fulltext = try String(contentsOf: getLocalURL(for: testFile)) + let text = String(fulltext.prefix(fulltext.count / textFraction)).utf8 + var result = 0 + let block = { + result = pattern.matches(in: text).reduce(into: 0) { c, _ in c += 1 } + } + #if DEBUG + block() + #else + if #available(OSX 10.15, *) { + let options = XCTMeasureOptions() + options.iterationCount = 10 + self.measure(metrics: [XCTCPUMetric(limitingToCurrentThread: true)], options: options, block: block) + } else { + self.measure(block) + } + #endif + XCTAssertEqual(result, hits, file: file, line: line) + } + + func speedTest(_ pattern: Parser, testFile: String = "Long.txt", textFraction: Int = 1, hits: Int, + file: StaticString = #filePath, line: UInt = #line) throws { let fulltext = try String(contentsOf: getLocalURL(for: testFile)) let text = String(fulltext.prefix(fulltext.count / textFraction)) var result = 0 @@ -50,34 +73,36 @@ class PerformanceTests: XCTestCase { } func testLine() throws { - let pattern = try Parser(search: Line.start • Capture(Skip()) • Line.end) + let pattern = try Parser(search: Line.Start() • Capture(Skip()) • Line.End()) try speedTest(pattern, textFraction: 2, hits: 7260) } func testNotNewLine() throws { - let pattern = try Parser(search: "," • Capture(Skip()) • Line.end) + let pattern = try Parser(search: "," • Capture(Skip()) • Line.End()) try speedTest(pattern, textFraction: 2, hits: 4933) } func testLiteralSearch() throws { - let pattern = try Parser(search: Literal("Prince")) + let pattern = try Parser(search: Literal("Prince")) try speedTest(pattern, textFraction: 1, hits: 2168) } func testGrammarLiteralSearch() throws { - let g = Grammar() - g.a <- Capture("Prince") / any • g.a + func any() -> OneOf { OneOf(description: "any", contains: { _ in true }) } + + let g = Grammar() + g.a <- Capture("Prince") / any() • g.a let pattern = try Parser(g) try speedTest(pattern, textFraction: 13, hits: 260) } func testNonExistentLiteralSearch() throws { - let pattern = try Parser(search: "\n" • Skip() • "DOESN'T EXIST") + let pattern = try Parser(search: "\n" • Skip() • "DOESN'T EXIST") try speedTest(pattern, textFraction: 1, hits: 0) } func testOptionalStringFollowedByNonOptionalString() throws { - let pattern = try Parser(search: Literal("\"")¿ • "I") + let pattern = try Parser(search: Literal("\"")¿ • "I") try speedTest(pattern, textFraction: 12, hits: 814) } @@ -88,7 +113,7 @@ class PerformanceTests: XCTestCase { func testSkipping1() throws { // [ word.boundary ] * " " * ":" * " " * " " * " " * "{" * Line.end - let pattern = try Parser(search: "." • Skip() • " " • Skip() • " ") + let pattern = try Parser(search: "." • Skip() • " " • Skip() • " ") try speedTest(pattern, textFraction: 2, hits: 13939) }