Skip to content

Commit 9cf3cfc

Browse files
authored
Merge pull request #393 from hamishknight/stricter-syntax
2 parents 489c63c + db58c1b commit 9cf3cfc

File tree

12 files changed

+145
-20
lines changed

12 files changed

+145
-20
lines changed

Sources/_RegexParser/Regex/AST/AST.swift

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ extension AST {
5252
/// Comments, non-semantic whitespace, etc
5353
case trivia(Trivia)
5454

55+
/// Intepolation `<{...}>`, currently reserved for future use.
56+
case interpolation(Interpolation)
57+
5558
case atom(Atom)
5659

5760
case customCharacterClass(CustomCharacterClass)
@@ -77,6 +80,7 @@ extension AST.Node {
7780
case let .quantification(v): return v
7881
case let .quote(v): return v
7982
case let .trivia(v): return v
83+
case let .interpolation(v): return v
8084
case let .atom(v): return v
8185
case let .customCharacterClass(v): return v
8286
case let .empty(v): return v
@@ -129,7 +133,7 @@ extension AST.Node {
129133
case .conditional, .customCharacterClass, .absentFunction:
130134
return true
131135
case .alternation, .concatenation, .quantification, .quote, .trivia,
132-
.empty:
136+
.empty, .interpolation:
133137
return false
134138
}
135139
}
@@ -193,6 +197,16 @@ extension AST {
193197
}
194198
}
195199

200+
public struct Interpolation: Hashable, _ASTNode {
201+
public let contents: String
202+
public let location: SourceLocation
203+
204+
public init(_ contents: String, _ location: SourceLocation) {
205+
self.contents = contents
206+
self.location = location
207+
}
208+
}
209+
196210
public struct Empty: Hashable, _ASTNode {
197211
public let location: SourceLocation
198212

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ extension AST.Node {
819819
case .alternation, .concatenation, .group,
820820
.conditional, .quantification, .quote,
821821
.trivia, .customCharacterClass, .empty,
822-
.absentFunction:
822+
.absentFunction, .interpolation:
823823
return nil
824824
}
825825
}

Sources/_RegexParser/Regex/Parse/CaptureList.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ extension AST.Node {
103103
break
104104
}
105105

106-
case .quote, .trivia, .atom, .customCharacterClass, .empty:
106+
case .quote, .trivia, .atom, .customCharacterClass, .empty, .interpolation:
107107
break
108108
}
109109
}

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ enum ParseError: Error, Hashable {
4242
case expectedNonEmptyContents
4343
case expectedEscape
4444
case invalidEscape(Character)
45+
case confusableCharacter(Character)
4546

4647
case cannotReferToWholePattern
4748

@@ -128,6 +129,8 @@ extension ParseError: CustomStringConvertible {
128129
return "expected escape sequence"
129130
case .invalidEscape(let c):
130131
return "invalid escape sequence '\\\(c)'"
132+
case .confusableCharacter(let c):
133+
return "'\(c)' is confusable for a metacharacter; use '\\u{...}' instead"
131134
case .cannotReferToWholePattern:
132135
return "cannot refer to whole pattern here"
133136
case .quantifierRequiresOperand(let q):

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,26 @@ extension Source {
589589
return AST.Quote(str.value, str.location)
590590
}
591591

592+
/// Try to consume an interpolation sequence.
593+
///
594+
/// Interpolation -> '<{' String '}>'
595+
///
596+
mutating func lexInterpolation() throws -> AST.Interpolation? {
597+
let contents = try recordLoc { src -> String? in
598+
try src.tryEating { src in
599+
guard src.tryEat(sequence: "<{") else { return nil }
600+
_ = src.lexUntil { $0.isEmpty || $0.starts(with: "}>") }
601+
guard src.tryEat(sequence: "}>") else { return nil }
602+
603+
// Not currently supported. We error here instead of during Sema to
604+
// get a better error for something like `(<{)}>`.
605+
throw ParseError.unsupported("interpolation")
606+
}
607+
}
608+
guard let contents = contents else { return nil }
609+
return .init(contents.value, contents.location)
610+
}
611+
592612
/// Try to consume a comment
593613
///
594614
/// Comment -> '(?#' [^')']* ')'
@@ -1674,9 +1694,10 @@ extension Source {
16741694
break
16751695
}
16761696

1677-
// We only allow unknown escape sequences for non-letter ASCII, and
1678-
// non-ASCII whitespace.
1679-
guard (char.isASCII && !char.isLetter) ||
1697+
// We only allow unknown escape sequences for non-letter non-number ASCII,
1698+
// and non-ASCII whitespace.
1699+
// TODO: Once we have fix-its, suggest a `0` prefix for octal `[\7]`.
1700+
guard (char.isASCII && !char.isLetter && !char.isNumber) ||
16801701
(!char.isASCII && char.isWhitespace)
16811702
else {
16821703
throw ParseError.invalidEscape(char)
@@ -1981,10 +2002,21 @@ extension Source {
19812002

19822003
case "]":
19832004
assert(!customCC, "parser should have prevented this")
1984-
fallthrough
2005+
break
19852006

1986-
default: return .char(char)
2007+
default:
2008+
// Reject non-letter non-number non-`\r\n` ASCII characters that have
2009+
// multiple scalars. These may be confusable for metacharacters, e.g
2010+
// `[\u{301}]` wouldn't be interpreted as a custom character class due
2011+
// to the combining accent (assuming it is literal, not `\u{...}`).
2012+
let scalars = char.unicodeScalars
2013+
if scalars.count > 1 && scalars.first!.isASCII && char != "\r\n" &&
2014+
!char.isLetter && !char.isNumber {
2015+
throw ParseError.confusableCharacter(char)
2016+
}
2017+
break
19872018
}
2019+
return .char(char)
19882020
}
19892021
guard let kind = kind else { return nil }
19902022
return AST.Atom(kind.value, kind.location)

Sources/_RegexParser/Regex/Parse/Parse.swift

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,13 @@ extension Parser {
222222
result.append(.quote(quote))
223223
continue
224224
}
225+
226+
// Interpolation -> `lexInterpolation`
227+
if let interpolation = try source.lexInterpolation() {
228+
result.append(.interpolation(interpolation))
229+
continue
230+
}
231+
225232
// Quantification -> QuantOperand Quantifier?
226233
if let operand = try parseQuantifierOperand() {
227234
if let (amt, kind, trivia) =

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,11 @@ extension RegexValidator {
395395
// These are Oniguruma specific.
396396
throw error(.unsupported("absent function"), at: a.location)
397397

398+
case .interpolation(let i):
399+
// This is currently rejected in the parser for better diagnostics, but
400+
// reject here too until we get runtime support.
401+
throw error(.unsupported("interpolation"), at: i.location)
402+
398403
case .quote, .trivia, .empty:
399404
break
400405
}

Sources/_RegexParser/Regex/Printing/DumpAST.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@ extension AST.Trivia {
101101
}
102102
}
103103

104+
extension AST.Interpolation {
105+
public var _dumpBase: String { "interpolation <\(contents)>" }
106+
}
107+
104108
extension AST.Empty {
105109
public var _dumpBase: String { "" }
106110
}

Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ extension PrettyPrinter {
9797
case let .trivia(t):
9898
output(t._canonicalBase)
9999

100+
case let .interpolation(i):
101+
output(i._canonicalBase)
102+
100103
case let .atom(a):
101104
output(a._canonicalBase)
102105

@@ -178,6 +181,12 @@ extension AST.Quote {
178181
}
179182
}
180183

184+
extension AST.Interpolation {
185+
var _canonicalBase: String {
186+
"<{\(contents)}>"
187+
}
188+
}
189+
181190
extension AST.Group.Kind {
182191
var _canonicalBase: String {
183192
switch self {

Sources/_StringProcessing/Regex/ASTConversion.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ extension AST.Node {
137137
case let .trivia(v):
138138
return .trivia(v.contents)
139139

140+
case .interpolation:
141+
throw Unsupported("TODO: interpolation")
142+
140143
case let .atom(v):
141144
switch v.kind {
142145
case .scalarSequence(let seq):

0 commit comments

Comments
 (0)