-
Notifications
You must be signed in to change notification settings - Fork 51
Allow setting any of the three quant behaviors #311
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
9fa682b
a62476a
d706630
59e6124
c579f3e
a74779c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,6 +63,16 @@ extension MatchingOptions { | |
stack.last!.contains(.reluctantByDefault) | ||
} | ||
|
||
var defaultQuantificationKind: AST.Quantification.Kind { | ||
if stack.last!.contains(.possessiveByDefault) { | ||
natecook1000 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return .possessive | ||
} else if stack.last!.contains(.reluctantByDefault) { | ||
return .reluctant | ||
} else { | ||
return .eager | ||
} | ||
} | ||
|
||
var dotMatchesNewline: Bool { | ||
stack.last!.contains(.singleLine) | ||
} | ||
|
@@ -150,6 +160,9 @@ extension MatchingOptions { | |
case unicodeScalarSemantics | ||
case byteSemantics | ||
|
||
// Swift-only default possessive quantifier | ||
case possessiveByDefault | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to be modeled in this way? It seems like options would just have a quantification kind in it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bitset internally, so it's modeling the tripartite state with two Booleans and an invariant check. |
||
|
||
init?(_ astKind: AST.MatchingOption.Kind) { | ||
switch astKind { | ||
case .caseInsensitive: | ||
|
@@ -184,6 +197,8 @@ extension MatchingOptions { | |
self = .unicodeScalarSemantics | ||
case .byteSemantics: | ||
self = .byteSemantics | ||
case .possessiveByDefault: | ||
self = .possessiveByDefault | ||
|
||
// Whitespace options are only relevant during parsing, not compilation. | ||
case .extended, .extraExtended: | ||
|
@@ -219,6 +234,9 @@ extension MatchingOptions { | |
if Self.textSegmentOptions.contains(opt.representation) { | ||
remove(.textSegmentOptions) | ||
} | ||
if Self.quantificationBehaviors.contains(opt.representation) { | ||
remove(.quantificationBehaviors) | ||
} | ||
|
||
insert(opt.representation) | ||
} | ||
|
@@ -274,7 +292,15 @@ extension MatchingOptions.Representation { | |
static var semanticMatchingLevels: Self { | ||
[.graphemeClusterSemantics, .unicodeScalarSemantics, .byteSemantics] | ||
} | ||
|
||
|
||
// Quantification behavior options | ||
static var reluctantByDefault: Self { .init(.reluctantByDefault) } | ||
static var possessiveByDefault: Self { .init(.possessiveByDefault) } | ||
|
||
static var quantificationBehaviors: Self { | ||
[.reluctantByDefault, .possessiveByDefault] | ||
natecook1000 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/// The default set of options. | ||
static var `default`: Self { | ||
[.graphemeClusterSemantics, .textSegmentGraphemeMode] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,8 +58,8 @@ extension RegexComponent { | |
/// Returns a regular expression where the start and end of input | ||
/// anchors (`^` and `$`) also match against the start and end of a line. | ||
/// | ||
/// This method corresponds to applying the `m` option in a regular | ||
/// expression literal. For this behavior in the `RegexBuilder` syntax, see | ||
/// This method corresponds to applying the `m` option in regex syntax. For | ||
/// this behavior in the `RegexBuilder` syntax, see | ||
/// ``Anchor.startOfLine``, ``Anchor.endOfLine``, ``Anchor.startOfInput``, | ||
/// and ``Anchor.endOfInput``. | ||
/// | ||
|
@@ -69,16 +69,22 @@ extension RegexComponent { | |
wrapInOption(.multiline, addingIf: matchLineEndings) | ||
} | ||
|
||
/// Returns a regular expression where quantifiers are reluctant by default | ||
/// instead of eager. | ||
/// Returns a regular expression where quantifiers use the specified behavior | ||
/// by default. | ||
/// | ||
/// This method corresponds to applying the `U` option in a regular | ||
/// expression literal. | ||
/// This setting does not affect calls to quantifier methods, such as | ||
/// `OneOrMore`, that include an explicit `behavior` parameter. | ||
/// | ||
/// - Parameter useReluctantQuantifiers: A Boolean value indicating whether | ||
/// quantifiers should be reluctant by default. | ||
public func reluctantQuantifiers(_ useReluctantQuantifiers: Bool = true) -> Regex<RegexOutput> { | ||
wrapInOption(.reluctantByDefault, addingIf: useReluctantQuantifiers) | ||
/// Passing `.eager` or `.reluctant` to this method corresponds to applying | ||
/// the `(?-U)` or `(?U)` option in regex syntax, respectively. | ||
/// | ||
/// - Parameter behavior: The default behavior to use for quantifiers. | ||
public func quantificationBehavior(_ behavior: RegexQuantificationBehavior) -> Regex<RegexOutput> { | ||
if behavior == .possessive { | ||
return wrapInOption(.possessiveByDefault, addingIf: true) | ||
} else { | ||
return wrapInOption(.reluctantByDefault, addingIf: behavior == .reluctant) | ||
} | ||
} | ||
|
||
/// Returns a regular expression that matches with the specified semantic | ||
|
@@ -183,6 +189,46 @@ public struct RegexWordBoundaryKind: Hashable { | |
} | ||
} | ||
|
||
/// Specifies how much to attempt to match when using a quantifier. | ||
@available(SwiftStdlib 5.7, *) | ||
public struct RegexQuantificationBehavior: Hashable { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Do we want an unspecified case or do we want to model as optional behaviors? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no unspecified case — There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But the choice of default is contextual. I think we'd want to resolve an unspecified quantification at compilation time by checking our options context. If someone had a context that explicitly overrided unspecified with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The default isn't contextual, it's There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And how would they have a non-overridable |
||
internal enum Kind { | ||
case eager | ||
case reluctant | ||
case possessive | ||
} | ||
|
||
var kind: Kind | ||
|
||
@_spi(RegexBuilder) public var dslTreeKind: DSLTree._AST.QuantificationKind { | ||
switch kind { | ||
case .eager: return .eager | ||
case .reluctant: return .reluctant | ||
case .possessive: return .possessive | ||
} | ||
} | ||
} | ||
|
||
@available(SwiftStdlib 5.7, *) | ||
extension RegexQuantificationBehavior { | ||
/// Match as much of the input string as possible, backtracking when | ||
/// necessary. | ||
public static var eager: Self { | ||
.init(kind: .eager) | ||
} | ||
|
||
/// Match as little of the input string as possible, expanding the matched | ||
/// region as necessary to complete a match. | ||
public static var reluctant: Self { | ||
.init(kind: .reluctant) | ||
} | ||
|
||
/// Match as much of the input string as possible, performing no backtracking. | ||
public static var possessive: Self { | ||
.init(kind: .possessive) | ||
} | ||
} | ||
|
||
// MARK: - Helper method | ||
|
||
@available(SwiftStdlib 5.7, *) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In general, it's fine to model swift extensions (even ones that we haven't assigned letters to) in the options here. But, I'm curious if or why it's necessary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's just more of our existing AST leakage. I think it's okay since we'll want it in the AST eventually, but we still do need to audit and remove AST values from non-AST types.