scala · odersky · May 5, 2021 · May 1, 2021 · May 1, 2021 · May 1, 2021
diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala
@@ -167,6 +167,7 @@ object Parsers {
   class Parser(source: SourceFile)(using Context) extends ParserCommon(source) {
 
     val in: Scanner = new Scanner(source)
+    // in.debugTokenStream = true    // uncomment to see the token stream of the standard scanner, but not syntax highlighting
 
     /** This is the general parse entry point.
      *  Overridden by ScriptParser
@@ -233,15 +234,6 @@ object Parsers {
 
 /* ------------- ERROR HANDLING ------------------------------------------- */
 
-    /** The offset of the last time when a statement on a new line was definitely
-     *  encountered in the current scope or an outer scope.
-     */
-    private var lastStatOffset = -1
-
-    def setLastStatOffset(): Unit =
-      if (mustStartStat && in.isAfterLineEnd)
-        lastStatOffset = in.offset
-
     /** Is offset1 less or equally indented than offset2?
      *  This is the case if the characters between the preceding end-of-line and offset1
      *  are a prefix of the characters between the preceding end-of-line and offset2.
@@ -262,6 +254,7 @@ object Parsers {
         || skipStopTokens.contains(in.token) && (in.currentRegion eq lastRegion)
       while !atStop do
         in.nextToken()
+      lastErrorOffset = in.offset
 
     def warning(msg: Message, sourcePos: SourcePosition): Unit =
       report.warning(msg, sourcePos)
@@ -281,11 +274,9 @@ object Parsers {
      */
     def syntaxErrorOrIncomplete(msg: Message, offset: Int = in.offset): Unit =
       if (in.token == EOF) incompleteInputError(msg)
-      else {
+      else
         syntaxError(msg, offset)
         skip()
-        lastErrorOffset = in.offset
-      }
 
     /** Consume one token of the specified type, or
       * signal an error if it is not there.
@@ -320,22 +311,45 @@ object Parsers {
     def acceptStatSep(): Unit =
       if in.isNewLine then in.nextToken() else accept(SEMI)
 
-    def acceptStatSepUnlessAtEnd[T <: Tree](stats: ListBuffer[T], altEnd: Token = EOF): Unit =
-      def skipEmptyStats(): Unit =
-        while (in.token == SEMI || in.token == NEWLINE || in.token == NEWLINES) do in.nextToken()
-
-      in.observeOutdented()
-      in.token match
-        case SEMI | NEWLINE | NEWLINES =>
-          skipEmptyStats()
+    /** Parse statement separators and end markers. Ensure that there is at least
+     *  one statement separator unless the next token terminates a statement´sequence.
+     *  @param   stats      the statements parsed to far
+     *  @param   noPrevStat true if there was no immediately preceding statement parsed
+     *  @param   what       a string indicating what kind of statement is parsed
+     *  @param   altEnd     a token that is also considered as a terminator of the statement
+     *                      sequence (the default `EOF` already assumes to terminate a statement
+     *                      sequence).
+     *  @return  true if the statement sequence continues, false if it terminates.
+     */
+    def statSepOrEnd[T <: Tree](stats: ListBuffer[T], noPrevStat: Boolean = false, what: String = "statement", altEnd: Token = EOF): Boolean =
+      def recur(sepSeen: Boolean, endSeen: Boolean): Boolean =
+        if isStatSep then
+          in.nextToken()
+          recur(true, endSeen)
+        else if in.token == END then
+          if endSeen then syntaxError("duplicate end marker")
           checkEndMarker(stats)
-          skipEmptyStats()
-        case `altEnd` =>
-        case _ =>
-          if !isStatSeqEnd then
-            syntaxError(i"end of statement expected but ${showToken(in.token)} found")
+          recur(sepSeen, true)
+        else if isStatSeqEnd || in.token == altEnd then
+          false
+        else if sepSeen || endSeen then
+          true
+        else
+          val found = in.token
+          val statFollows = mustStartStatTokens.contains(found)
+          syntaxError(
+            if noPrevStat then IllegalStartOfStatement(what, isModifier, statFollows)
+            else i"end of $what expected but ${showToken(found)} found")
+          if mustStartStatTokens.contains(found) then
+            false // it's a statement that might be legal in an outer context
+          else
             in.nextToken() // needed to ensure progress; otherwise we might cycle forever
-            accept(SEMI)
+            skip()
+            true
+
+      in.observeOutdented()
+      recur(false, false)
+    end statSepOrEnd
 
     def rewriteNotice(version: String = "3.0", additionalOption: String = "") = {
       val optionStr = if (additionalOption.isEmpty) "" else " " ++ additionalOption
@@ -533,11 +547,8 @@ object Parsers {
         if (in.rewriteToIndent) bracesToIndented(body, rewriteWithColon)
         else inBraces(body)
 
-    def inDefScopeBraces[T](body: => T, rewriteWithColon: Boolean = false): T = {
-      val saved = lastStatOffset
-      try inBracesOrIndented(body, rewriteWithColon)
-      finally lastStatOffset = saved
-    }
+    def inDefScopeBraces[T](body: => T, rewriteWithColon: Boolean = false): T =
+      inBracesOrIndented(body, rewriteWithColon)
 
     /** part { `separator` part }
      */
@@ -1254,7 +1265,7 @@ object Parsers {
     def possibleTemplateStart(isNew: Boolean = false): Unit =
       in.observeColonEOL()
       if in.token == COLONEOL then
-        if in.lookahead.isIdent(nme.end) then in.token = NEWLINE
+        if in.lookahead.token == END then in.token = NEWLINE
         else
           in.nextToken()
           if in.token != INDENT && in.token != LBRACE then
@@ -1284,25 +1295,12 @@ object Parsers {
         case _: (ForYield | ForDo) => in.token == FOR
         case _ => false
 
-      if isIdent(nme.end) then
-        val start = in.offset
-        val isEndMarker =
-          val endLine = source.offsetToLine(start)
-          val lookahead = in.LookaheadScanner()
-          lookahead.nextToken()
-          source.offsetToLine(lookahead.offset) == endLine
-          && endMarkerTokens.contains(in.token)
-          && {
-            lookahead.nextToken()
-            lookahead.token == EOF
-            || source.offsetToLine(lookahead.offset) > endLine
-          }
-        if isEndMarker then
-          in.nextToken()
-          if stats.isEmpty || !matches(stats.last) then
-            syntaxError("misaligned end marker", Span(start, in.lastCharOffset))
-          in.token = IDENTIFIER // Leaving it as the original token can confuse newline insertion
-          in.nextToken()
+      if in.token == END then
+        val start = in.skipToken()
+        if stats.isEmpty || !matches(stats.last) then
+          syntaxError("misaligned end marker", Span(start, in.lastCharOffset))
+        in.token = IDENTIFIER // Leaving it as the original token can confuse newline insertion
+        in.nextToken()
     end checkEndMarker
 
 /* ------------- TYPES ------------------------------------------------------ */
@@ -1538,10 +1536,7 @@ object Parsers {
       else t
 
     /** The block in a quote or splice */
-    def stagedBlock() =
-      val saved = lastStatOffset
-      try inBraces(block(simplify = true))
-      finally lastStatOffset = saved
+    def stagedBlock() = inBraces(block(simplify = true))
 
     /** SimpleEpxr  ::=  spliceId | ‘$’ ‘{’ Block ‘}’)
      *  SimpleType  ::=  spliceId | ‘$’ ‘{’ Block ‘}’)
@@ -3640,11 +3635,10 @@ object Parsers {
      */
     def extMethods(numLeadParams: Int): List[DefDef] = checkNoEscapingPlaceholders {
       val meths = new ListBuffer[DefDef]
-      val exitOnError = false
-      while !isStatSeqEnd && !exitOnError do
-        setLastStatOffset()
+      while
         meths += extMethod(numLeadParams)
-        acceptStatSepUnlessAtEnd(meths)
+        statSepOrEnd(meths, what = "extension method")
+      do ()
       if meths.isEmpty then syntaxError("`def` expected")
       meths.toList
     }
@@ -3780,8 +3774,8 @@ object Parsers {
      */
     def topStatSeq(outermost: Boolean = false): List[Tree] = {
       val stats = new ListBuffer[Tree]
-      while (!isStatSeqEnd) {
-        setLastStatOffset()
+      while
+        var empty = false
         if (in.token == PACKAGE) {
           val start = in.skipToken()
           if (in.token == OBJECT) {
@@ -3798,13 +3792,10 @@ object Parsers {
           stats += extension()
         else if isDefIntro(modifierTokens) then
           stats +++= defOrDcl(in.offset, defAnnotsMods(modifierTokens))
-        else if !isStatSep then
-          if (in.token == CASE)
-            syntaxErrorOrIncomplete(OnlyCaseClassOrCaseObjectAllowed())
-          else
-            syntaxErrorOrIncomplete(ExpectedToplevelDef())
-        acceptStatSepUnlessAtEnd(stats)
-      }
+        else
+          empty = true
+        statSepOrEnd(stats, empty, "toplevel definition")
+      do ()
       stats.toList
     }
 
@@ -3836,14 +3827,12 @@ object Parsers {
           in.token = SELFARROW // suppresses INDENT insertion after `=>`
           in.nextToken()
         }
-        else {
+        else
           stats += first
-          acceptStatSepUnlessAtEnd(stats)
-        }
+          statSepOrEnd(stats)
       }
-      var exitOnError = false
-      while (!isStatSeqEnd && !exitOnError) {
-        setLastStatOffset()
+      while
+        var empty = false
         if (in.token == IMPORT)
           stats ++= importClause(IMPORT, mkImport())
         else if (in.token == EXPORT)
@@ -3854,12 +3843,10 @@ object Parsers {
           stats +++= defOrDcl(in.offset, defAnnotsMods(modifierTokens))
         else if (isExprIntro)
           stats += expr1()
-        else if (!isStatSep) {
-          exitOnError = mustStartStat
-          syntaxErrorOrIncomplete("illegal start of definition")
-        }
-        acceptStatSepUnlessAtEnd(stats)
-      }
+        else
+          empty = true
+        statSepOrEnd(stats, empty)
+      do ()
       (self, if (stats.isEmpty) List(EmptyTree) else stats.toList)
     }
 
@@ -3888,16 +3875,14 @@ object Parsers {
         if problem.isEmpty then tree :: Nil
         else { syntaxError(problem, tree.span); Nil }
 
-      while (!isStatSeqEnd) {
-        if (isDclIntro)
+      while
+        val dclFound = isDclIntro
+        if dclFound then
           stats ++= checkLegal(defOrDcl(in.offset, Modifiers()))
-        else if (!isStatSep)
-          syntaxErrorOrIncomplete(
-            "illegal start of declaration" +
-            (if (inFunReturnType) " (possible cause: missing `=` in front of current method body)"
-             else ""))
-        acceptStatSepUnlessAtEnd(stats)
-      }
+        var what = "declaration"
+        if inFunReturnType then what += " (possible cause: missing `=` in front of current method body)"
+        statSepOrEnd(stats, !dclFound, what)
+      do ()
       stats.toList
     }
 
@@ -3921,9 +3906,8 @@ object Parsers {
      */
     def blockStatSeq(): List[Tree] = checkNoEscapingPlaceholders {
       val stats = new ListBuffer[Tree]
-      var exitOnError = false
-      while (!isStatSeqEnd && in.token != CASE && !exitOnError) {
-        setLastStatOffset()
+      while
+        var empty = false
         if (in.token == IMPORT)
           stats ++= importClause(IMPORT, mkImport())
         else if (isExprIntro)
@@ -3934,12 +3918,10 @@ object Parsers {
           stats += extension()
         else if isDefIntro(localModifierTokens, excludedSoftModifiers = Set(nme.`opaque`)) then
           stats +++= localDef(in.offset)
-        else if (!isStatSep && (in.token != CASE)) {
-          exitOnError = mustStartStat
-          syntaxErrorOrIncomplete(IllegalStartOfStatement(isModifier))
-        }
-        acceptStatSepUnlessAtEnd(stats, CASE)
-      }
+        else
+          empty = true
+        statSepOrEnd(stats, empty, altEnd = CASE)
+      do ()
       stats.toList
     }
 
@@ -3956,7 +3938,7 @@ object Parsers {
             in.nextToken()
             ts += objectDef(start, Modifiers(Package))
             if (in.token != EOF) {
-              acceptStatSepUnlessAtEnd(ts)
+              statSepOrEnd(ts, what = "toplevel definition")
               ts ++= topStatSeq()
             }
           }
@@ -3973,7 +3955,7 @@ object Parsers {
               acceptStatSep()
               ts += makePackaging(start, pkg, topstats())
             if continue then
-              acceptStatSepUnlessAtEnd(ts)
+              statSepOrEnd(ts, what = "toplevel definition")
               ts ++= topStatSeq()
         }
         else

diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala
@@ -135,14 +135,16 @@ object Scanners {
       */
     protected def putChar(c: Char): Unit = litBuf.append(c)
 
-    /** Clear buffer and set name and token */
-    def finishNamed(idtoken: Token = IDENTIFIER, target: TokenData = this): Unit = {
+    /** Clear buffer and set name and token
+     *  If `target` is different from `this`, don't treat identifiers as end tokens
+     */
+    def finishNamed(idtoken: Token = IDENTIFIER, target: TokenData = this): Unit =
       target.name = termName(litBuf.chars, 0, litBuf.length)
       litBuf.clear()
       target.token = idtoken
-      if (idtoken == IDENTIFIER)
-        target.token = toToken(target.name)
-    }
+      if idtoken == IDENTIFIER then
+        val converted = toToken(target.name)
+        if converted != END || (target eq this) then target.token = converted
 
     /** The token for given `name`. Either IDENTIFIER or a keyword. */
     def toToken(name: SimpleName): Token
@@ -656,6 +658,8 @@ object Scanners {
                 () /* skip the trailing comma */
               else
                 reset()
+        case END =>
+          if !isEndMarker then token = IDENTIFIER
         case COLON =>
           if fewerBracesEnabled then observeColonEOL()
         case RBRACE | RPAREN | RBRACKET =>
@@ -666,6 +670,21 @@ object Scanners {
       }
     }
 
+    protected def isEndMarker: Boolean =
+      if indentSyntax && isAfterLineEnd then
+        val endLine = source.offsetToLine(offset)
+        val lookahead = new LookaheadScanner():
+          override def isEndMarker = false
+        lookahead.nextToken()
+        if endMarkerTokens.contains(lookahead.token)
+          && source.offsetToLine(lookahead.offset) == endLine
+        then
+          lookahead.nextToken()
+          if lookahead.token == EOF
+          || source.offsetToLine(lookahead.offset) > endLine
+          then return true
+      false
+
     /** Is there a blank line between the current token and the last one?
      *  A blank line consists only of characters <= ' '.
      *  @pre  afterLineEnd().