Skip to content

Refactor handling of statement separators in Parser #12305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 82 additions & 100 deletions compiler/src/dotty/tools/dotc/parsing/Parsers.scala
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ object Parsers {
class Parser(source: SourceFile)(using Context) extends ParserCommon(source) {

val in: Scanner = new Scanner(source)
// in.debugTokenStream = true // uncomment to see the token stream of the standard scanner, but not syntax highlighting

/** This is the general parse entry point.
* Overridden by ScriptParser
Expand Down Expand Up @@ -233,15 +234,6 @@ object Parsers {

/* ------------- ERROR HANDLING ------------------------------------------- */

/** The offset of the last time when a statement on a new line was definitely
* encountered in the current scope or an outer scope.
*/
private var lastStatOffset = -1

def setLastStatOffset(): Unit =
if (mustStartStat && in.isAfterLineEnd)
lastStatOffset = in.offset

/** Is offset1 less or equally indented than offset2?
* This is the case if the characters between the preceding end-of-line and offset1
* are a prefix of the characters between the preceding end-of-line and offset2.
Expand All @@ -262,6 +254,7 @@ object Parsers {
|| skipStopTokens.contains(in.token) && (in.currentRegion eq lastRegion)
while !atStop do
in.nextToken()
lastErrorOffset = in.offset

def warning(msg: Message, sourcePos: SourcePosition): Unit =
report.warning(msg, sourcePos)
Expand All @@ -281,11 +274,9 @@ object Parsers {
*/
def syntaxErrorOrIncomplete(msg: Message, offset: Int = in.offset): Unit =
if (in.token == EOF) incompleteInputError(msg)
else {
else
syntaxError(msg, offset)
skip()
lastErrorOffset = in.offset
}

/** Consume one token of the specified type, or
* signal an error if it is not there.
Expand Down Expand Up @@ -320,22 +311,45 @@ object Parsers {
def acceptStatSep(): Unit =
if in.isNewLine then in.nextToken() else accept(SEMI)

def acceptStatSepUnlessAtEnd[T <: Tree](stats: ListBuffer[T], altEnd: Token = EOF): Unit =
def skipEmptyStats(): Unit =
while (in.token == SEMI || in.token == NEWLINE || in.token == NEWLINES) do in.nextToken()

in.observeOutdented()
in.token match
case SEMI | NEWLINE | NEWLINES =>
skipEmptyStats()
/** Parse statement separators and end markers. Ensure that there is at least
* one statement separator unless the next token terminates a statement´sequence.
* @param stats the statements parsed to far
* @param noPrevStat true if there was no immediately preceding statement parsed
* @param what a string indicating what kind of statement is parsed
* @param altEnd a token that is also considered as a terminator of the statement
* sequence (the default `EOF` already assumes to terminate a statement
* sequence).
* @return true if the statement sequence continues, false if it terminates.
*/
def statSepOrEnd[T <: Tree](stats: ListBuffer[T], noPrevStat: Boolean = false, what: String = "statement", altEnd: Token = EOF): Boolean =
def recur(sepSeen: Boolean, endSeen: Boolean): Boolean =
if isStatSep then
in.nextToken()
recur(true, endSeen)
else if in.token == END then
if endSeen then syntaxError("duplicate end marker")
checkEndMarker(stats)
skipEmptyStats()
case `altEnd` =>
case _ =>
if !isStatSeqEnd then
syntaxError(i"end of statement expected but ${showToken(in.token)} found")
recur(sepSeen, true)
else if isStatSeqEnd || in.token == altEnd then
false
else if sepSeen || endSeen then
true
else
val found = in.token
val statFollows = mustStartStatTokens.contains(found)
syntaxError(
if noPrevStat then IllegalStartOfStatement(what, isModifier, statFollows)
else i"end of $what expected but ${showToken(found)} found")
if mustStartStatTokens.contains(found) then
false // it's a statement that might be legal in an outer context
else
in.nextToken() // needed to ensure progress; otherwise we might cycle forever
accept(SEMI)
skip()
true

in.observeOutdented()
recur(false, false)
end statSepOrEnd

def rewriteNotice(version: String = "3.0", additionalOption: String = "") = {
val optionStr = if (additionalOption.isEmpty) "" else " " ++ additionalOption
Expand Down Expand Up @@ -533,11 +547,8 @@ object Parsers {
if (in.rewriteToIndent) bracesToIndented(body, rewriteWithColon)
else inBraces(body)

def inDefScopeBraces[T](body: => T, rewriteWithColon: Boolean = false): T = {
val saved = lastStatOffset
try inBracesOrIndented(body, rewriteWithColon)
finally lastStatOffset = saved
}
def inDefScopeBraces[T](body: => T, rewriteWithColon: Boolean = false): T =
inBracesOrIndented(body, rewriteWithColon)

/** part { `separator` part }
*/
Expand Down Expand Up @@ -1254,7 +1265,7 @@ object Parsers {
def possibleTemplateStart(isNew: Boolean = false): Unit =
in.observeColonEOL()
if in.token == COLONEOL then
if in.lookahead.isIdent(nme.end) then in.token = NEWLINE
if in.lookahead.token == END then in.token = NEWLINE
else
in.nextToken()
if in.token != INDENT && in.token != LBRACE then
Expand Down Expand Up @@ -1284,25 +1295,12 @@ object Parsers {
case _: (ForYield | ForDo) => in.token == FOR
case _ => false

if isIdent(nme.end) then
val start = in.offset
val isEndMarker =
val endLine = source.offsetToLine(start)
val lookahead = in.LookaheadScanner()
lookahead.nextToken()
source.offsetToLine(lookahead.offset) == endLine
&& endMarkerTokens.contains(in.token)
&& {
lookahead.nextToken()
lookahead.token == EOF
|| source.offsetToLine(lookahead.offset) > endLine
}
if isEndMarker then
in.nextToken()
if stats.isEmpty || !matches(stats.last) then
syntaxError("misaligned end marker", Span(start, in.lastCharOffset))
in.token = IDENTIFIER // Leaving it as the original token can confuse newline insertion
in.nextToken()
if in.token == END then
val start = in.skipToken()
if stats.isEmpty || !matches(stats.last) then
syntaxError("misaligned end marker", Span(start, in.lastCharOffset))
in.token = IDENTIFIER // Leaving it as the original token can confuse newline insertion
in.nextToken()
end checkEndMarker

/* ------------- TYPES ------------------------------------------------------ */
Expand Down Expand Up @@ -1538,10 +1536,7 @@ object Parsers {
else t

/** The block in a quote or splice */
def stagedBlock() =
val saved = lastStatOffset
try inBraces(block(simplify = true))
finally lastStatOffset = saved
def stagedBlock() = inBraces(block(simplify = true))

/** SimpleEpxr ::= spliceId | ‘$’ ‘{’ Block ‘}’)
* SimpleType ::= spliceId | ‘$’ ‘{’ Block ‘}’)
Expand Down Expand Up @@ -3640,11 +3635,10 @@ object Parsers {
*/
def extMethods(numLeadParams: Int): List[DefDef] = checkNoEscapingPlaceholders {
val meths = new ListBuffer[DefDef]
val exitOnError = false
while !isStatSeqEnd && !exitOnError do
setLastStatOffset()
while
meths += extMethod(numLeadParams)
acceptStatSepUnlessAtEnd(meths)
statSepOrEnd(meths, what = "extension method")
do ()
if meths.isEmpty then syntaxError("`def` expected")
meths.toList
}
Expand Down Expand Up @@ -3780,8 +3774,8 @@ object Parsers {
*/
def topStatSeq(outermost: Boolean = false): List[Tree] = {
val stats = new ListBuffer[Tree]
while (!isStatSeqEnd) {
setLastStatOffset()
while
var empty = false
if (in.token == PACKAGE) {
val start = in.skipToken()
if (in.token == OBJECT) {
Expand All @@ -3798,13 +3792,10 @@ object Parsers {
stats += extension()
else if isDefIntro(modifierTokens) then
stats +++= defOrDcl(in.offset, defAnnotsMods(modifierTokens))
else if !isStatSep then
if (in.token == CASE)
syntaxErrorOrIncomplete(OnlyCaseClassOrCaseObjectAllowed())
else
syntaxErrorOrIncomplete(ExpectedToplevelDef())
acceptStatSepUnlessAtEnd(stats)
}
else
empty = true
statSepOrEnd(stats, empty, "toplevel definition")
do ()
stats.toList
}

Expand Down Expand Up @@ -3836,14 +3827,12 @@ object Parsers {
in.token = SELFARROW // suppresses INDENT insertion after `=>`
in.nextToken()
}
else {
else
stats += first
acceptStatSepUnlessAtEnd(stats)
}
statSepOrEnd(stats)
}
var exitOnError = false
while (!isStatSeqEnd && !exitOnError) {
setLastStatOffset()
while
var empty = false
if (in.token == IMPORT)
stats ++= importClause(IMPORT, mkImport())
else if (in.token == EXPORT)
Expand All @@ -3854,12 +3843,10 @@ object Parsers {
stats +++= defOrDcl(in.offset, defAnnotsMods(modifierTokens))
else if (isExprIntro)
stats += expr1()
else if (!isStatSep) {
exitOnError = mustStartStat
syntaxErrorOrIncomplete("illegal start of definition")
}
acceptStatSepUnlessAtEnd(stats)
}
else
empty = true
statSepOrEnd(stats, empty)
do ()
(self, if (stats.isEmpty) List(EmptyTree) else stats.toList)
}

Expand Down Expand Up @@ -3888,16 +3875,14 @@ object Parsers {
if problem.isEmpty then tree :: Nil
else { syntaxError(problem, tree.span); Nil }

while (!isStatSeqEnd) {
if (isDclIntro)
while
val dclFound = isDclIntro
if dclFound then
stats ++= checkLegal(defOrDcl(in.offset, Modifiers()))
else if (!isStatSep)
syntaxErrorOrIncomplete(
"illegal start of declaration" +
(if (inFunReturnType) " (possible cause: missing `=` in front of current method body)"
else ""))
acceptStatSepUnlessAtEnd(stats)
}
var what = "declaration"
if inFunReturnType then what += " (possible cause: missing `=` in front of current method body)"
statSepOrEnd(stats, !dclFound, what)
do ()
stats.toList
}

Expand All @@ -3921,9 +3906,8 @@ object Parsers {
*/
def blockStatSeq(): List[Tree] = checkNoEscapingPlaceholders {
val stats = new ListBuffer[Tree]
var exitOnError = false
while (!isStatSeqEnd && in.token != CASE && !exitOnError) {
setLastStatOffset()
while
var empty = false
if (in.token == IMPORT)
stats ++= importClause(IMPORT, mkImport())
else if (isExprIntro)
Expand All @@ -3934,12 +3918,10 @@ object Parsers {
stats += extension()
else if isDefIntro(localModifierTokens, excludedSoftModifiers = Set(nme.`opaque`)) then
stats +++= localDef(in.offset)
else if (!isStatSep && (in.token != CASE)) {
exitOnError = mustStartStat
syntaxErrorOrIncomplete(IllegalStartOfStatement(isModifier))
}
acceptStatSepUnlessAtEnd(stats, CASE)
}
else
empty = true
statSepOrEnd(stats, empty, altEnd = CASE)
do ()
stats.toList
}

Expand All @@ -3956,7 +3938,7 @@ object Parsers {
in.nextToken()
ts += objectDef(start, Modifiers(Package))
if (in.token != EOF) {
acceptStatSepUnlessAtEnd(ts)
statSepOrEnd(ts, what = "toplevel definition")
ts ++= topStatSeq()
}
}
Expand All @@ -3973,7 +3955,7 @@ object Parsers {
acceptStatSep()
ts += makePackaging(start, pkg, topstats())
if continue then
acceptStatSepUnlessAtEnd(ts)
statSepOrEnd(ts, what = "toplevel definition")
ts ++= topStatSeq()
}
else
Expand Down
29 changes: 24 additions & 5 deletions compiler/src/dotty/tools/dotc/parsing/Scanners.scala
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,16 @@ object Scanners {
*/
protected def putChar(c: Char): Unit = litBuf.append(c)

/** Clear buffer and set name and token */
def finishNamed(idtoken: Token = IDENTIFIER, target: TokenData = this): Unit = {
/** Clear buffer and set name and token
* If `target` is different from `this`, don't treat identifiers as end tokens
*/
def finishNamed(idtoken: Token = IDENTIFIER, target: TokenData = this): Unit =
target.name = termName(litBuf.chars, 0, litBuf.length)
litBuf.clear()
target.token = idtoken
if (idtoken == IDENTIFIER)
target.token = toToken(target.name)
}
if idtoken == IDENTIFIER then
val converted = toToken(target.name)
if converted != END || (target eq this) then target.token = converted

/** The token for given `name`. Either IDENTIFIER or a keyword. */
def toToken(name: SimpleName): Token
Expand Down Expand Up @@ -656,6 +658,8 @@ object Scanners {
() /* skip the trailing comma */
else
reset()
case END =>
if !isEndMarker then token = IDENTIFIER
case COLON =>
if fewerBracesEnabled then observeColonEOL()
case RBRACE | RPAREN | RBRACKET =>
Expand All @@ -666,6 +670,21 @@ object Scanners {
}
}

protected def isEndMarker: Boolean =
if indentSyntax && isAfterLineEnd then
val endLine = source.offsetToLine(offset)
val lookahead = new LookaheadScanner():
override def isEndMarker = false
lookahead.nextToken()
if endMarkerTokens.contains(lookahead.token)
&& source.offsetToLine(lookahead.offset) == endLine
then
lookahead.nextToken()
if lookahead.token == EOF
|| source.offsetToLine(lookahead.offset) > endLine
then return true
false

/** Is there a blank line between the current token and the last one?
* A blank line consists only of characters <= ' '.
* @pre afterLineEnd().
Expand Down
Loading