diff --git a/Sources/RegexBuilder/Anchor.swift b/Sources/RegexBuilder/Anchor.swift index e8cd4ac54..ae66310af 100644 --- a/Sources/RegexBuilder/Anchor.swift +++ b/Sources/RegexBuilder/Anchor.swift @@ -12,6 +12,12 @@ @_implementationOnly import _RegexParser @_spi(RegexBuilder) import _StringProcessing +/// A regex component that matches a specific condition at a particular position +/// in an input string. +/// +/// You can use anchors to guarantee that a match only occurs at certain points +/// in an input string, such as at the beginning of the string or at the end of +/// a line. @available(SwiftStdlib 5.7, *) public struct Anchor { internal enum Kind { @@ -53,14 +59,24 @@ extension Anchor: RegexComponent { @available(SwiftStdlib 5.7, *) extension Anchor { + /// An anchor that matches at the start of the input string. + /// + /// This anchor is equivalent to `\A` in regex syntax. public static var startOfSubject: Anchor { Anchor(kind: .startOfSubject) } - + + /// An anchor that matches at the end of the input string or at the end of + /// the line immediately before the the end of the string. + /// + /// This anchor is equivalent to `\Z` in regex syntax. public static var endOfSubjectBeforeNewline: Anchor { Anchor(kind: .endOfSubjectBeforeNewline) } - + + /// An anchor that matches at the end of the input string. + /// + /// This anchor is equivalent to `\z` in regex syntax. public static var endOfSubject: Anchor { Anchor(kind: .endOfSubject) } @@ -70,26 +86,53 @@ extension Anchor { // Anchor(kind: resetStartOfMatch) // } + /// An anchor that matches at the first position of a match in the input + /// string. public static var firstMatchingPositionInSubject: Anchor { Anchor(kind: .firstMatchingPositionInSubject) } + /// An anchor that matches at a grapheme cluster boundary. + /// + /// This anchor is equivalent to `\y` in regex syntax. public static var textSegmentBoundary: Anchor { Anchor(kind: .textSegmentBoundary) } + /// An anchor that matches at the start of a line, including the start of + /// the input string. + /// + /// This anchor is equivalent to `^` in regex syntax when the `m` option + /// has been enabled or `anchorsMatchLineEndings(true)` has been called. public static var startOfLine: Anchor { Anchor(kind: .startOfLine) } + /// An anchor that matches at the end of a line, including at the end of + /// the input string. + /// + /// This anchor is equivalent to `$` in regex syntax when the `m` option + /// has been enabled or `anchorsMatchLineEndings(true)` has been called. public static var endOfLine: Anchor { Anchor(kind: .endOfLine) } + /// An anchor that matches at a word boundary. + /// + /// Word boundaries are identified using the Unicode default word boundary + /// algorithm by default. To specify a different word boundary algorithm, + /// see the `RegexComponent.wordBoundaryKind(_:)` method. + /// + /// This anchor is equivalent to `\b` in regex syntax. public static var wordBoundary: Anchor { Anchor(kind: .wordBoundary) } + /// The inverse of this anchor, which matches at every position that this + /// anchor does not. + /// + /// For the `wordBoundary` and `textSegmentBoundary` anchors, the inverted + /// version corresponds to `\B` and `\Y`, respectively. public var inverted: Anchor { var result = self result.isInverted.toggle() @@ -97,6 +140,13 @@ extension Anchor { } } +/// A regex component that allows a match to continue only if its contents +/// match at the given location. +/// +/// A lookahead is a zero-length assertion that its included regex matches at +/// a particular position. Lookaheads do not advance the overall matching +/// position in the input string — once a lookahead succeeds, matching continues +/// in the regex from the same position. @available(SwiftStdlib 5.7, *) public struct Lookahead: _BuiltinRegexComponent { public var regex: Regex @@ -105,19 +155,48 @@ public struct Lookahead: _BuiltinRegexComponent { self.regex = regex } + /// Creates a lookahead from the given regex component. public init( - _ component: R, - negative: Bool = false + _ component: R ) where R.RegexOutput == Output { - self.init(node: .nonCapturingGroup( - negative ? .negativeLookahead : .lookahead, component.regex.root)) + self.init(node: .nonCapturingGroup(.lookahead, component.regex.root)) } + + /// Creates a lookahead from the regex generated by the given builder closure. + public init( + @RegexComponentBuilder _ component: () -> R + ) where R.RegexOutput == Output { + self.init(node: .nonCapturingGroup(.lookahead, component().regex.root)) + } +} +/// A regex component that allows a match to continue only if its contents +/// do not match at the given location. +/// +/// A negative lookahead is a zero-length assertion that its included regex +/// does not match at a particular position. Lookaheads do not advance the +/// overall matching position in the input string — once a lookahead succeeds, +/// matching continues in the regex from the same position. +@available(SwiftStdlib 5.7, *) +public struct NegativeLookahead: _BuiltinRegexComponent { + public var regex: Regex + + init(_ regex: Regex) { + self.regex = regex + } + + /// Creates a negative lookahead from the given regex component. + public init( + _ component: R + ) where R.RegexOutput == Output { + self.init(node: .nonCapturingGroup(.negativeLookahead, component.regex.root)) + } + + /// Creates a negative lookahead from the regex generated by the given builder + /// closure. public init( - negative: Bool = false, @RegexComponentBuilder _ component: () -> R ) where R.RegexOutput == Output { - self.init(node: .nonCapturingGroup( - negative ? .negativeLookahead : .lookahead, component().regex.root)) + self.init(node: .nonCapturingGroup(.negativeLookahead, component().regex.root)) } } diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 4e08ea103..be2b054a5 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -115,7 +115,7 @@ class RegexDSLTests: XCTestCase { { let disallowedChars = CharacterClass.hexDigit .symmetricDifference("a"..."z") - Lookahead(disallowedChars, negative: true) // No: 0-9 + g-z + NegativeLookahead(disallowedChars) // No: 0-9 + g-z OneOrMore(("b"..."g").union("d"..."n")) // b-n @@ -487,7 +487,7 @@ class RegexDSLTests: XCTestCase { { OneOrMore("a") Lookahead(CharacterClass.digit) - Lookahead("2", negative: true) + NegativeLookahead { "2" } CharacterClass.word } }