Skip to content

Commit eff4cb1

Browse files
authored
Merge pull request #12305 from dotty-staging/fix-12150
Refactor handling of statement separators in Parser
2 parents 6c81a74 + 78d370a commit eff4cb1

24 files changed

+177
-155
lines changed

compiler/src/dotty/tools/dotc/parsing/Parsers.scala

+82-100
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ object Parsers {
167167
class Parser(source: SourceFile)(using Context) extends ParserCommon(source) {
168168

169169
val in: Scanner = new Scanner(source)
170+
// in.debugTokenStream = true // uncomment to see the token stream of the standard scanner, but not syntax highlighting
170171

171172
/** This is the general parse entry point.
172173
* Overridden by ScriptParser
@@ -233,15 +234,6 @@ object Parsers {
233234

234235
/* ------------- ERROR HANDLING ------------------------------------------- */
235236

236-
/** The offset of the last time when a statement on a new line was definitely
237-
* encountered in the current scope or an outer scope.
238-
*/
239-
private var lastStatOffset = -1
240-
241-
def setLastStatOffset(): Unit =
242-
if (mustStartStat && in.isAfterLineEnd)
243-
lastStatOffset = in.offset
244-
245237
/** Is offset1 less or equally indented than offset2?
246238
* This is the case if the characters between the preceding end-of-line and offset1
247239
* are a prefix of the characters between the preceding end-of-line and offset2.
@@ -262,6 +254,7 @@ object Parsers {
262254
|| skipStopTokens.contains(in.token) && (in.currentRegion eq lastRegion)
263255
while !atStop do
264256
in.nextToken()
257+
lastErrorOffset = in.offset
265258

266259
def warning(msg: Message, sourcePos: SourcePosition): Unit =
267260
report.warning(msg, sourcePos)
@@ -281,11 +274,9 @@ object Parsers {
281274
*/
282275
def syntaxErrorOrIncomplete(msg: Message, offset: Int = in.offset): Unit =
283276
if (in.token == EOF) incompleteInputError(msg)
284-
else {
277+
else
285278
syntaxError(msg, offset)
286279
skip()
287-
lastErrorOffset = in.offset
288-
}
289280

290281
/** Consume one token of the specified type, or
291282
* signal an error if it is not there.
@@ -320,22 +311,45 @@ object Parsers {
320311
def acceptStatSep(): Unit =
321312
if in.isNewLine then in.nextToken() else accept(SEMI)
322313

323-
def acceptStatSepUnlessAtEnd[T <: Tree](stats: ListBuffer[T], altEnd: Token = EOF): Unit =
324-
def skipEmptyStats(): Unit =
325-
while (in.token == SEMI || in.token == NEWLINE || in.token == NEWLINES) do in.nextToken()
326-
327-
in.observeOutdented()
328-
in.token match
329-
case SEMI | NEWLINE | NEWLINES =>
330-
skipEmptyStats()
314+
/** Parse statement separators and end markers. Ensure that there is at least
315+
* one statement separator unless the next token terminates a statement´sequence.
316+
* @param stats the statements parsed to far
317+
* @param noPrevStat true if there was no immediately preceding statement parsed
318+
* @param what a string indicating what kind of statement is parsed
319+
* @param altEnd a token that is also considered as a terminator of the statement
320+
* sequence (the default `EOF` already assumes to terminate a statement
321+
* sequence).
322+
* @return true if the statement sequence continues, false if it terminates.
323+
*/
324+
def statSepOrEnd[T <: Tree](stats: ListBuffer[T], noPrevStat: Boolean = false, what: String = "statement", altEnd: Token = EOF): Boolean =
325+
def recur(sepSeen: Boolean, endSeen: Boolean): Boolean =
326+
if isStatSep then
327+
in.nextToken()
328+
recur(true, endSeen)
329+
else if in.token == END then
330+
if endSeen then syntaxError("duplicate end marker")
331331
checkEndMarker(stats)
332-
skipEmptyStats()
333-
case `altEnd` =>
334-
case _ =>
335-
if !isStatSeqEnd then
336-
syntaxError(i"end of statement expected but ${showToken(in.token)} found")
332+
recur(sepSeen, true)
333+
else if isStatSeqEnd || in.token == altEnd then
334+
false
335+
else if sepSeen || endSeen then
336+
true
337+
else
338+
val found = in.token
339+
val statFollows = mustStartStatTokens.contains(found)
340+
syntaxError(
341+
if noPrevStat then IllegalStartOfStatement(what, isModifier, statFollows)
342+
else i"end of $what expected but ${showToken(found)} found")
343+
if mustStartStatTokens.contains(found) then
344+
false // it's a statement that might be legal in an outer context
345+
else
337346
in.nextToken() // needed to ensure progress; otherwise we might cycle forever
338-
accept(SEMI)
347+
skip()
348+
true
349+
350+
in.observeOutdented()
351+
recur(false, false)
352+
end statSepOrEnd
339353

340354
def rewriteNotice(version: String = "3.0", additionalOption: String = "") = {
341355
val optionStr = if (additionalOption.isEmpty) "" else " " ++ additionalOption
@@ -533,11 +547,8 @@ object Parsers {
533547
if (in.rewriteToIndent) bracesToIndented(body, rewriteWithColon)
534548
else inBraces(body)
535549

536-
def inDefScopeBraces[T](body: => T, rewriteWithColon: Boolean = false): T = {
537-
val saved = lastStatOffset
538-
try inBracesOrIndented(body, rewriteWithColon)
539-
finally lastStatOffset = saved
540-
}
550+
def inDefScopeBraces[T](body: => T, rewriteWithColon: Boolean = false): T =
551+
inBracesOrIndented(body, rewriteWithColon)
541552

542553
/** part { `separator` part }
543554
*/
@@ -1254,7 +1265,7 @@ object Parsers {
12541265
def possibleTemplateStart(isNew: Boolean = false): Unit =
12551266
in.observeColonEOL()
12561267
if in.token == COLONEOL then
1257-
if in.lookahead.isIdent(nme.end) then in.token = NEWLINE
1268+
if in.lookahead.token == END then in.token = NEWLINE
12581269
else
12591270
in.nextToken()
12601271
if in.token != INDENT && in.token != LBRACE then
@@ -1284,25 +1295,12 @@ object Parsers {
12841295
case _: (ForYield | ForDo) => in.token == FOR
12851296
case _ => false
12861297

1287-
if isIdent(nme.end) then
1288-
val start = in.offset
1289-
val isEndMarker =
1290-
val endLine = source.offsetToLine(start)
1291-
val lookahead = in.LookaheadScanner()
1292-
lookahead.nextToken()
1293-
source.offsetToLine(lookahead.offset) == endLine
1294-
&& endMarkerTokens.contains(in.token)
1295-
&& {
1296-
lookahead.nextToken()
1297-
lookahead.token == EOF
1298-
|| source.offsetToLine(lookahead.offset) > endLine
1299-
}
1300-
if isEndMarker then
1301-
in.nextToken()
1302-
if stats.isEmpty || !matches(stats.last) then
1303-
syntaxError("misaligned end marker", Span(start, in.lastCharOffset))
1304-
in.token = IDENTIFIER // Leaving it as the original token can confuse newline insertion
1305-
in.nextToken()
1298+
if in.token == END then
1299+
val start = in.skipToken()
1300+
if stats.isEmpty || !matches(stats.last) then
1301+
syntaxError("misaligned end marker", Span(start, in.lastCharOffset))
1302+
in.token = IDENTIFIER // Leaving it as the original token can confuse newline insertion
1303+
in.nextToken()
13061304
end checkEndMarker
13071305

13081306
/* ------------- TYPES ------------------------------------------------------ */
@@ -1538,10 +1536,7 @@ object Parsers {
15381536
else t
15391537

15401538
/** The block in a quote or splice */
1541-
def stagedBlock() =
1542-
val saved = lastStatOffset
1543-
try inBraces(block(simplify = true))
1544-
finally lastStatOffset = saved
1539+
def stagedBlock() = inBraces(block(simplify = true))
15451540

15461541
/** SimpleEpxr ::= spliceId | ‘$’ ‘{’ Block ‘}’)
15471542
* SimpleType ::= spliceId | ‘$’ ‘{’ Block ‘}’)
@@ -3641,11 +3636,10 @@ object Parsers {
36413636
*/
36423637
def extMethods(numLeadParams: Int): List[DefDef] = checkNoEscapingPlaceholders {
36433638
val meths = new ListBuffer[DefDef]
3644-
val exitOnError = false
3645-
while !isStatSeqEnd && !exitOnError do
3646-
setLastStatOffset()
3639+
while
36473640
meths += extMethod(numLeadParams)
3648-
acceptStatSepUnlessAtEnd(meths)
3641+
statSepOrEnd(meths, what = "extension method")
3642+
do ()
36493643
if meths.isEmpty then syntaxError("`def` expected")
36503644
meths.toList
36513645
}
@@ -3781,8 +3775,8 @@ object Parsers {
37813775
*/
37823776
def topStatSeq(outermost: Boolean = false): List[Tree] = {
37833777
val stats = new ListBuffer[Tree]
3784-
while (!isStatSeqEnd) {
3785-
setLastStatOffset()
3778+
while
3779+
var empty = false
37863780
if (in.token == PACKAGE) {
37873781
val start = in.skipToken()
37883782
if (in.token == OBJECT) {
@@ -3799,13 +3793,10 @@ object Parsers {
37993793
stats += extension()
38003794
else if isDefIntro(modifierTokens) then
38013795
stats +++= defOrDcl(in.offset, defAnnotsMods(modifierTokens))
3802-
else if !isStatSep then
3803-
if (in.token == CASE)
3804-
syntaxErrorOrIncomplete(OnlyCaseClassOrCaseObjectAllowed())
3805-
else
3806-
syntaxErrorOrIncomplete(ExpectedToplevelDef())
3807-
acceptStatSepUnlessAtEnd(stats)
3808-
}
3796+
else
3797+
empty = true
3798+
statSepOrEnd(stats, empty, "toplevel definition")
3799+
do ()
38093800
stats.toList
38103801
}
38113802

@@ -3837,14 +3828,12 @@ object Parsers {
38373828
in.token = SELFARROW // suppresses INDENT insertion after `=>`
38383829
in.nextToken()
38393830
}
3840-
else {
3831+
else
38413832
stats += first
3842-
acceptStatSepUnlessAtEnd(stats)
3843-
}
3833+
statSepOrEnd(stats)
38443834
}
3845-
var exitOnError = false
3846-
while (!isStatSeqEnd && !exitOnError) {
3847-
setLastStatOffset()
3835+
while
3836+
var empty = false
38483837
if (in.token == IMPORT)
38493838
stats ++= importClause(IMPORT, mkImport())
38503839
else if (in.token == EXPORT)
@@ -3855,12 +3844,10 @@ object Parsers {
38553844
stats +++= defOrDcl(in.offset, defAnnotsMods(modifierTokens))
38563845
else if (isExprIntro)
38573846
stats += expr1()
3858-
else if (!isStatSep) {
3859-
exitOnError = mustStartStat
3860-
syntaxErrorOrIncomplete("illegal start of definition")
3861-
}
3862-
acceptStatSepUnlessAtEnd(stats)
3863-
}
3847+
else
3848+
empty = true
3849+
statSepOrEnd(stats, empty)
3850+
do ()
38643851
(self, if (stats.isEmpty) List(EmptyTree) else stats.toList)
38653852
}
38663853

@@ -3889,16 +3876,14 @@ object Parsers {
38893876
if problem.isEmpty then tree :: Nil
38903877
else { syntaxError(problem, tree.span); Nil }
38913878

3892-
while (!isStatSeqEnd) {
3893-
if (isDclIntro)
3879+
while
3880+
val dclFound = isDclIntro
3881+
if dclFound then
38943882
stats ++= checkLegal(defOrDcl(in.offset, Modifiers()))
3895-
else if (!isStatSep)
3896-
syntaxErrorOrIncomplete(
3897-
"illegal start of declaration" +
3898-
(if (inFunReturnType) " (possible cause: missing `=` in front of current method body)"
3899-
else ""))
3900-
acceptStatSepUnlessAtEnd(stats)
3901-
}
3883+
var what = "declaration"
3884+
if inFunReturnType then what += " (possible cause: missing `=` in front of current method body)"
3885+
statSepOrEnd(stats, !dclFound, what)
3886+
do ()
39023887
stats.toList
39033888
}
39043889

@@ -3922,9 +3907,8 @@ object Parsers {
39223907
*/
39233908
def blockStatSeq(): List[Tree] = checkNoEscapingPlaceholders {
39243909
val stats = new ListBuffer[Tree]
3925-
var exitOnError = false
3926-
while (!isStatSeqEnd && in.token != CASE && !exitOnError) {
3927-
setLastStatOffset()
3910+
while
3911+
var empty = false
39283912
if (in.token == IMPORT)
39293913
stats ++= importClause(IMPORT, mkImport())
39303914
else if (isExprIntro)
@@ -3935,12 +3919,10 @@ object Parsers {
39353919
stats += extension()
39363920
else if isDefIntro(localModifierTokens, excludedSoftModifiers = Set(nme.`opaque`)) then
39373921
stats +++= localDef(in.offset)
3938-
else if (!isStatSep && (in.token != CASE)) {
3939-
exitOnError = mustStartStat
3940-
syntaxErrorOrIncomplete(IllegalStartOfStatement(isModifier))
3941-
}
3942-
acceptStatSepUnlessAtEnd(stats, CASE)
3943-
}
3922+
else
3923+
empty = true
3924+
statSepOrEnd(stats, empty, altEnd = CASE)
3925+
do ()
39443926
stats.toList
39453927
}
39463928

@@ -3957,7 +3939,7 @@ object Parsers {
39573939
in.nextToken()
39583940
ts += objectDef(start, Modifiers(Package))
39593941
if (in.token != EOF) {
3960-
acceptStatSepUnlessAtEnd(ts)
3942+
statSepOrEnd(ts, what = "toplevel definition")
39613943
ts ++= topStatSeq()
39623944
}
39633945
}
@@ -3974,7 +3956,7 @@ object Parsers {
39743956
acceptStatSep()
39753957
ts += makePackaging(start, pkg, topstats())
39763958
if continue then
3977-
acceptStatSepUnlessAtEnd(ts)
3959+
statSepOrEnd(ts, what = "toplevel definition")
39783960
ts ++= topStatSeq()
39793961
}
39803962
else

compiler/src/dotty/tools/dotc/parsing/Scanners.scala

+24-5
Original file line numberDiff line numberDiff line change
@@ -135,14 +135,16 @@ object Scanners {
135135
*/
136136
protected def putChar(c: Char): Unit = litBuf.append(c)
137137

138-
/** Clear buffer and set name and token */
139-
def finishNamed(idtoken: Token = IDENTIFIER, target: TokenData = this): Unit = {
138+
/** Clear buffer and set name and token
139+
* If `target` is different from `this`, don't treat identifiers as end tokens
140+
*/
141+
def finishNamed(idtoken: Token = IDENTIFIER, target: TokenData = this): Unit =
140142
target.name = termName(litBuf.chars, 0, litBuf.length)
141143
litBuf.clear()
142144
target.token = idtoken
143-
if (idtoken == IDENTIFIER)
144-
target.token = toToken(target.name)
145-
}
145+
if idtoken == IDENTIFIER then
146+
val converted = toToken(target.name)
147+
if converted != END || (target eq this) then target.token = converted
146148

147149
/** The token for given `name`. Either IDENTIFIER or a keyword. */
148150
def toToken(name: SimpleName): Token
@@ -656,6 +658,8 @@ object Scanners {
656658
() /* skip the trailing comma */
657659
else
658660
reset()
661+
case END =>
662+
if !isEndMarker then token = IDENTIFIER
659663
case COLON =>
660664
if fewerBracesEnabled then observeColonEOL()
661665
case RBRACE | RPAREN | RBRACKET =>
@@ -666,6 +670,21 @@ object Scanners {
666670
}
667671
}
668672

673+
protected def isEndMarker: Boolean =
674+
if indentSyntax && isAfterLineEnd then
675+
val endLine = source.offsetToLine(offset)
676+
val lookahead = new LookaheadScanner():
677+
override def isEndMarker = false
678+
lookahead.nextToken()
679+
if endMarkerTokens.contains(lookahead.token)
680+
&& source.offsetToLine(lookahead.offset) == endLine
681+
then
682+
lookahead.nextToken()
683+
if lookahead.token == EOF
684+
|| source.offsetToLine(lookahead.offset) > endLine
685+
then return true
686+
false
687+
669688
/** Is there a blank line between the current token and the last one?
670689
* A blank line consists only of characters <= ' '.
671690
* @pre afterLineEnd().

0 commit comments

Comments
 (0)