From 1e3a8d9750fd91d9e4bc664eeb320dd80058cd99 Mon Sep 17 00:00:00 2001 From: Martin Odersky Date: Thu, 22 Aug 2019 19:37:03 +0200 Subject: [PATCH] Support significant indentation --- .../src/dotty/tools/dotc/config/Config.scala | 3 + .../dotty/tools/dotc/config/Printers.scala | 1 + .../tools/dotc/config/ScalaSettings.scala | 2 + .../src/dotty/tools/dotc/core/StdNames.scala | 1 + .../tools/dotc/parsing/CharArrayReader.scala | 11 +- .../dotty/tools/dotc/parsing/Parsers.scala | 755 +++++++++++------- .../dotty/tools/dotc/parsing/Scanners.scala | 419 ++++++++-- .../src/dotty/tools/dotc/parsing/Tokens.scala | 17 +- .../dotc/parsing/xml/MarkupParsers.scala | 12 +- .../dotc/printing/SyntaxHighlighting.scala | 4 +- .../dotty/tools/dotc/rewrites/Rewrites.scala | 10 +- .../src/dotty/tools/dotc/util/Spans.scala | 11 + docs/docs/internals/syntax.md | 30 +- .../other-new-features/indentation.md | 188 +++++ docs/sidebar.yml | 2 + tests/neg/endmarkers.scala | 110 +++ tests/neg/i4373b.scala | 2 +- tests/pos/indent.scala | 107 +++ tests/pos/syntax-rewrite.scala | 40 + 19 files changed, 1345 insertions(+), 380 deletions(-) create mode 100644 docs/docs/reference/other-new-features/indentation.md create mode 100644 tests/neg/endmarkers.scala create mode 100644 tests/pos/indent.scala create mode 100644 tests/pos/syntax-rewrite.scala diff --git a/compiler/src/dotty/tools/dotc/config/Config.scala b/compiler/src/dotty/tools/dotc/config/Config.scala index 4366ac45170f..157ed40604cb 100644 --- a/compiler/src/dotty/tools/dotc/config/Config.scala +++ b/compiler/src/dotty/tools/dotc/config/Config.scala @@ -157,6 +157,9 @@ object Config { */ final val simplifyApplications = true + /** Always assume -indent */ + final val allowIndent = true + /** If set, prints a trace of all symbol completions */ final val showCompletions = false diff --git a/compiler/src/dotty/tools/dotc/config/Printers.scala b/compiler/src/dotty/tools/dotc/config/Printers.scala index 155f6984a7e9..6a0549501f3d 100644 --- a/compiler/src/dotty/tools/dotc/config/Printers.scala +++ b/compiler/src/dotty/tools/dotc/config/Printers.scala @@ -27,6 +27,7 @@ object Printers { val hk: Printer = noPrinter val implicits: Printer = noPrinter val implicitsDetailed: Printer = noPrinter + val lexical: Printer = noPrinter val inlining: Printer = noPrinter val interactiv: Printer = noPrinter val overload: Printer = noPrinter diff --git a/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala b/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala index 80a6c19b41ab..fc50873a4e68 100644 --- a/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala +++ b/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala @@ -50,6 +50,8 @@ class ScalaSettings extends Settings.SettingGroup { val newSyntax: Setting[Boolean] = BooleanSetting("-new-syntax", "Require `then` and `do` in control expressions") val oldSyntax: Setting[Boolean] = BooleanSetting("-old-syntax", "Require `(...)` around conditions") + val indent: Setting[Boolean] = BooleanSetting("-indent", "allow significant indentation") + val noindent: Setting[Boolean] = BooleanSetting("-noindent", "require classical {...} syntax, indentation is not significant") /** Decompiler settings */ val printTasty: Setting[Boolean] = BooleanSetting("-print-tasty", "Prints the raw tasty.") withAbbreviation "--print-tasty" diff --git a/compiler/src/dotty/tools/dotc/core/StdNames.scala b/compiler/src/dotty/tools/dotc/core/StdNames.scala index a25996e7d9aa..7fa657d754af 100644 --- a/compiler/src/dotty/tools/dotc/core/StdNames.scala +++ b/compiler/src/dotty/tools/dotc/core/StdNames.scala @@ -421,6 +421,7 @@ object StdNames { val elem: N = "elem" val elems: N = "elems" val emptyValDef: N = "emptyValDef" + val end: N = "end" val ensureAccessible : N = "ensureAccessible" val eq: N = "eq" val eqInstance: N = "eqInstance" diff --git a/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala b/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala index 8e4d524dda8b..585dc1d6620c 100644 --- a/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala +++ b/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala @@ -27,9 +27,6 @@ abstract class CharArrayReader { self => /** The start offset of the current line */ var lineStartOffset: Int = startFrom - /** The start offset of the line before the current one */ - var lastLineStartOffset: Int = startFrom - private[this] var lastUnicodeOffset = -1 /** Is last character a unicode escape \\uxxxx? */ @@ -112,12 +109,8 @@ abstract class CharArrayReader { self => } /** Handle line ends */ - private def potentialLineEnd(): Unit = { - if (ch == LF || ch == FF) { - lastLineStartOffset = lineStartOffset - lineStartOffset = charOffset - } - } + private def potentialLineEnd(): Unit = + if (ch == LF || ch == FF) lineStartOffset = charOffset def isAtEnd: Boolean = charOffset >= buf.length diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 81803ce86b9c..b1ab9cc624c8 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -24,7 +24,7 @@ import ScriptParsers._ import Decorators._ import scala.internal.Chars import scala.annotation.{tailrec, switch} -import rewrites.Rewrites.patch +import rewrites.Rewrites.{patch, overlapsPatch} object Parsers { @@ -152,12 +152,12 @@ object Parsers { def skipBracesHook(): Option[Tree] def skipBraces(): Unit = { - accept(LBRACE) + accept(if (in.token == INDENT) INDENT else LBRACE) var openBraces = 1 while (in.token != EOF && openBraces > 0) { skipBracesHook() getOrElse { - if (in.token == LBRACE) openBraces += 1 - else if (in.token == RBRACE) openBraces -= 1 + if (in.token == LBRACE || in.token == INDENT) openBraces += 1 + else if (in.token == RBRACE || in.token == OUTDENT) openBraces -= 1 in.nextToken() } } @@ -181,15 +181,15 @@ object Parsers { /* -------------- TOKEN CLASSES ------------------------------------------- */ - def isIdent: Boolean = in.token == IDENTIFIER || in.token == BACKQUOTED_IDENT - def isIdent(name: Name): Boolean = in.token == IDENTIFIER && in.name == name - def isSimpleLiteral: Boolean = simpleLiteralTokens contains in.token - def isLiteral: Boolean = literalTokens contains in.token - def isNumericLit: Boolean = numericLitTokens contains in.token - def isTemplateIntro: Boolean = templateIntroTokens contains in.token - def isDclIntro: Boolean = dclIntroTokens contains in.token - def isStatSeqEnd: Boolean = in.token == RBRACE || in.token == EOF - def mustStartStat: Boolean = mustStartStatTokens contains in.token + def isIdent = in.token == IDENTIFIER || in.token == BACKQUOTED_IDENT + def isIdent(name: Name) = in.token == IDENTIFIER && in.name == name + def isSimpleLiteral = simpleLiteralTokens contains in.token + def isLiteral = literalTokens contains in.token + def isNumericLit = numericLitTokens contains in.token + def isTemplateIntro = templateIntroTokens contains in.token + def isDclIntro = dclIntroTokens contains in.token + def isStatSeqEnd = in.isNestedEnd || in.token == EOF + def mustStartStat = mustStartStatTokens contains in.token /** Is current token a hard or soft modifier (in modifier position or not)? */ def isModifier: Boolean = modifierTokens.contains(in.token) || in.isSoftModifier @@ -233,7 +233,7 @@ object Parsers { private[this] var lastStatOffset = -1 def setLastStatOffset(): Unit = - if (mustStartStat && in.isAfterLineEnd()) + if (mustStartStat && in.isAfterLineEnd) lastStatOffset = in.offset /** Is offset1 less or equally indented than offset2? @@ -276,15 +276,21 @@ object Parsers { if (openParens.count(LBRACKET) > 0 && skippedParens.nonePositive) return skippedParens.change(LBRACKET, -1) + case OUTDENT => + if (openParens.count(INDENT) > 0 && skippedParens.count(INDENT) == 0) + return + skippedParens.change(INDENT, -1) case LBRACE => - skippedParens.change(LBRACE, + 1) + skippedParens.change(LBRACE, +1) case LPAREN => - skippedParens.change(LPAREN, + 1) + skippedParens.change(LPAREN, +1) case LBRACKET=> - skippedParens.change(LBRACKET, + 1) + skippedParens.change(LBRACKET, +1) + case INDENT => + skippedParens.change(INDENT, +1) case _ => if (mustStartStat && - in.isAfterLineEnd() && + in.isAfterLineEnd && isLeqIndented(in.offset, lastStatOffset max 0)) return } @@ -346,7 +352,7 @@ object Parsers { case NEWLINE | NEWLINES => in.nextToken() case SEMI => in.nextToken() case _ => - syntaxError("end of statement expected") + syntaxError(i"end of statement expected but $in found") in.nextToken() // needed to ensure progress; otherwise we might cycle forever accept(SEMI) } @@ -531,9 +537,21 @@ object Parsers { def inBraces[T](body: => T): T = enclosed(LBRACE, body) def inBrackets[T](body: => T): T = enclosed(LBRACKET, body) + def inBracesOrIndented[T](body: => T): T = + if (in.token == INDENT) { + val rewriteToBraces = + in.rewriteNoIndent && + !testChars(in.lastOffset - 3, " =>") // braces are always optional after `=>` so none should be inserted + if (rewriteToBraces) indentedToBraces(body) + else enclosed(INDENT, body) + } + else + if (in.rewriteToIndent) bracesToIndented(body) + else inBraces(body) + def inDefScopeBraces[T](body: => T): T = { val saved = lastStatOffset - try inBraces(body) + try inBracesOrIndented(body) finally lastStatOffset = saved } @@ -557,6 +575,11 @@ object Parsers { /* -------- REWRITES ----------------------------------------------------------- */ + /** The last offset where a colon at the end of line would be required if a subsequent { ... } + * block would be converted to an indentation region. + */ + var possibleColonOffset: Int = -1 + /** A list of pending patches, to be issued if we can rewrite all enclosing braces to * indentation regions. */ @@ -580,28 +603,146 @@ object Parsers { if (testChar(idx, c => c == ' ' || c == '\t' || c == Chars.CR)) skipBlanks(idx + step, step) else idx - def skipLineCommentsRightOf(idx: Int, column: Int): Int = { - val j = skipBlanks(idx) - if (testChar(j, '/') && testChar(j + 1, '/') && source.column(j) > column) - skipLineCommentsRightOf(source.nextLine(j), column) - else idx + /** Parse indentation region `body` and rewrite it to be in braces instead */ + def indentedToBraces[T](body: => T): T = { + val indentWidth = in.indent.enclosing.width + val followsColon = testChar(in.lastOffset - 1, ':') + val startOpening = + if (followsColon) + if (testChar(in.lastOffset - 2, ' ')) in.lastOffset - 2 + else in.lastOffset - 1 + else in.lastOffset + val endOpening = in.lastOffset + + val t = enclosed(INDENT, body) + + /** Is `expr` a tree that lacks a final `else`? Put such trees in `{...}` to make + * sure we don't accidentally merge them with a following `else`. + */ + def isPartialIf(expr: Tree): Boolean = expr match { + case If(_, _, EmptyTree) => true + case If(_, _, e) => isPartialIf(e) + case _ => false + } + + /** Is `expr` a (possibly curried) function that has a multi-statement block + * as body? Put such trees in `{...}` since we don't enclose statements following + * a `=>` in braces. + */ + def isBlockFunction[T](expr: T): Boolean = expr match { + case Function(_, body) => isBlockFunction(body) + case Block(stats, expr) => stats.nonEmpty || isBlockFunction(expr) + case _ => false + } + + /** Start of first line after in.lastOffset that does not have a comment + * at indent width greater than the indent width of the closing brace. + */ + def closingOffset(lineStart: Offset): Offset = { + if (lineStart >= in.lineOffset) in.lineOffset + else { + val candidate = source.nextLine(lineStart) + val commentStart = skipBlanks(lineStart) + if (testChar(commentStart, '/') && indentWidth < in.indentWidth(commentStart)) + closingOffset(source.nextLine(lineStart)) + else + lineStart + } + } + + val needsBraces = t match { + case Block(Nil, expr) => followsColon || isPartialIf(expr) || isBlockFunction(expr) + case _ => true + } + if (needsBraces) { + patch(source, Span(startOpening, endOpening), " {") + patch(source, Span(closingOffset(source.nextLine(in.lastOffset))), indentWidth.toPrefix ++ "}\n") + } + t } - /** The region to eliminate when replacing a closing `)` or `}` that starts - * a new line + /** The region to eliminate when replacing an opening `(` or `{` that ends a line. + * The `(` or `{` is at in.offset. + */ + def startingElimRegion(colonRequired: Boolean): (Offset, Offset) = { + val skipped = skipBlanks(in.offset + 1) + if (in.isAfterLineEnd) { + if (testChar(skipped, Chars.LF) && !colonRequired) + (in.lineOffset, skipped + 1) // skip the whole line + else + (in.offset, skipped) + } + else if (testChar(in.offset - 1, ' ')) (in.offset - 1, in.offset + 1) + else (in.offset, in.offset + 1) + } + + /** The region to eliminate when replacing a closing `)` or `}` that starts a new line + * The `)` or `}` precedes in.lastOffset. */ def closingElimRegion(): (Offset, Offset) = { val skipped = skipBlanks(in.lastOffset) - if (testChar(skipped, Chars.LF)) // if `}` is on a line by itself + if (testChar(skipped, Chars.LF)) // if `)` or `}` is on a line by itself (source.startOfLine(in.lastOffset), skipped + 1) // skip the whole line else // else - (in.lastOffset - 1, skipped) // move the following text up to where the `}` was + (in.lastOffset - 1, skipped) // move the following text up to where the `)` or `}` was + } + + /** Parse brace-enclosed `body` and rewrite it to be an indentation region instead, if possible. + * If possible means: + * 1. not inside (...), [...], case ... => + * 2. opening brace `{` is at end of line + * 3. closing brace `}` is at start of line + * 4. there is at least one token between the braces + * 5. the closing brace is also at the end of the line, or it is followed by one of + * `then`, `else`, `do`, `catch`, `finally`, `yield`, or `match`. + * 6. the opening brace does not follow a `=>`. The reason for this condition is that + * rewriting back to braces does not work after `=>` (since in most cases braces are omitted + * after a `=>` it would be annoying if braces were inserted). + */ + def bracesToIndented[T](body: => T): T = { + val colonRequired = possibleColonOffset == in.lastOffset + val (startOpening, endOpening) = startingElimRegion(colonRequired) + val isOuterMost = in.sepRegions.isEmpty + val savedPending = pendingPatches + var canRewrite = + in.sepRegions.forall(token => token == RBRACE || token == OUTDENT) && // test (1) + !testChars(in.lastOffset - 3, " =>") // test(6) + val t = enclosed(LBRACE, { + canRewrite &= in.isAfterLineEnd // test (2) + val curOffset = in.offset + try body + finally { + canRewrite &= in.isAfterLineEnd && in.offset != curOffset // test (3)(4) + } + }) + canRewrite &= (in.isAfterLineEnd || statCtdTokens.contains(in.token)) // test (5) + if (canRewrite) { + val openingPatchStr = + if (!colonRequired) "" + else if (testChar(startOpening - 1, Chars.isOperatorPart(_))) " :" + else ":" + val (startClosing, endClosing) = closingElimRegion() + val applyPatch = () => { + patch(source, Span(startOpening, endOpening), openingPatchStr) + patch(source, Span(startClosing, endClosing), "") + } + pendingPatches = applyPatch :: pendingPatches + if (isOuterMost) { + pendingPatches.reverse.foreach(_()) + pendingPatches = Nil + } + } + else pendingPatches = savedPending // can't rewrite, cancel all nested patches. + t } /** Drop (...) or { ... }, replacing the closing element with `endStr` */ def dropParensOrBraces(start: Offset, endStr: String): Unit = { - patch(source, Span(start, start + 1), - if (testChar(start - 1, Chars.isIdentifierPart)) " " else "") + if (testChar(start + 1, Chars.isLineBreakChar)) + patch(source, Span(if (testChar(start - 1, ' ')) start - 1 else start, start + 1), "") + else + patch(source, Span(start, start + 1), + if (testChar(start - 1, Chars.isIdentifierPart)) " " else "") val closingStartsLine = testChar(skipBlanks(in.lastOffset - 2, -1), Chars.LF) val preFill = if (closingStartsLine || endStr.isEmpty) "" else " " val postFill = if (in.lastOffset == in.offset) " " else "" @@ -611,17 +752,30 @@ object Parsers { patch(source, Span(startClosing, endClosing), s"$preFill$endStr$postFill") } + /** If all other characters on the same line as `span` are blanks, widen to + * the whole line. + */ + def widenIfWholeLine(span: Span): Span = { + val start = skipBlanks(span.start - 1, -1) + val end = skipBlanks(span.end, 1) + if (testChar(start, Chars.LF) && testChar(end, Chars.LF)) Span(start, end) + else span + } + /** Drop current token, which is assumed to be `then` or `do`. */ def dropTerminator(): Unit = { var startOffset = in.offset var endOffset = in.lastCharOffset - if (in.isAfterLineEnd()) { - if (testChar(endOffset, ' ')) endOffset += 1 + if (in.isAfterLineEnd) { + if (testChar(endOffset, ' ')) + endOffset += 1 } else { - if (testChar(startOffset - 1, ' ')) startOffset -= 1 + if (testChar(startOffset - 1, ' ') && + !overlapsPatch(source, Span(startOffset - 1, endOffset))) + startOffset -= 1 } - patch(source, Span(startOffset, endOffset), "") + patch(source, widenIfWholeLine(Span(startOffset, endOffset)), "") } /** rewrite code with (...) around the source code of `t` */ @@ -644,13 +798,13 @@ object Parsers { val token = lookahead.token if (braces == 0) { if (token == query) return true - if (stopScanTokens.contains(token) || lookahead.token == RBRACE) return false + if (stopScanTokens.contains(token) || lookahead.isNestedEnd) return false } else if (token == EOF) return false - else if (lookahead.token == RBRACE) + else if (lookahead.isNestedEnd) braces -= 1 - if (lookahead.token == LBRACE) braces += 1 + if (lookahead.isNestedStart) braces += 1 lookahead.nextToken() } false @@ -723,6 +877,7 @@ object Parsers { val op = if (isType) typeIdent() else termIdent() val top1 = reduceStack(base, top, precedence(op.name), isLeftAssoc(op.name), op.name, isType) opStack = OpInfo(top1, op, in.offset) :: opStack + colonAtEOLOpt() newLineOptWhenFollowing(canStartOperand) if (maybePostfix && !canStartOperand(in.token)) { val topInfo = opStack.head @@ -1002,6 +1157,28 @@ object Parsers { if (in.token == NEWLINE && p(in.next.token)) newLineOpt() } + def colonAtEOLOpt(): Unit = { + possibleColonOffset = in.lastOffset + if (in.token == COLONEOL) in.nextToken() + } + + def possibleBracesStart(): Unit = { + colonAtEOLOpt() + newLineOptWhenFollowedBy(LBRACE) + } + + def indentRegion[T](tag: EndMarkerTag)(op: => T): T = { + val iw = in.indent.width + val t = op + in.consumeEndMarker(tag, iw) + t + } + + def indentRegion[T](pid: Tree)(op: => T): T = pid match { + case pid: RefTree => indentRegion(pid.name.toTermName)(op) + case _ => op + } + /* ------------- TYPES ------------------------------------------------------ */ /** Same as [[typ]], but if this results in a wildcard it emits a syntax error and * returns a tree for type `Any` instead. @@ -1145,8 +1322,9 @@ object Parsers { val refinedType: () => Tree = () => refinedTypeRest(withType()) def refinedTypeRest(t: Tree): Tree = { - newLineOptWhenFollowedBy(LBRACE) - if (in.token == LBRACE) refinedTypeRest(atSpan(startOffset(t)) { RefinedTypeTree(rejectWildcardType(t), refinement()) }) + possibleBracesStart() + if (in.isNestedStart) + refinedTypeRest(atSpan(startOffset(t)) { RefinedTypeTree(rejectWildcardType(t), refinement()) }) else t } @@ -1331,7 +1509,7 @@ object Parsers { /** Refinement ::= `{' RefineStatSeq `}' */ - def refinement(): List[Tree] = inBraces(refineStatSeq()) + def refinement(): List[Tree] = inBracesOrIndented(refineStatSeq()) /** TypeBounds ::= [`>:' Type] [`<:' Type] */ @@ -1365,9 +1543,16 @@ object Parsers { Nil } - def typedOpt(): Tree = + def typedOpt(): Tree = { + if (in.token == COLONEOL) in.token = COLON + // a hack to allow + // + // def f(): + // T + // if (in.token == COLON) { in.nextToken(); toplevelTyp() } else TypeTree().withSpan(Span(in.lastOffset)) + } def typeDependingOn(location: Location.Value): Tree = if (location == Location.InParens) typ() @@ -1399,7 +1584,11 @@ object Parsers { dropParensOrBraces(t.span.start, s"${tokenString(altToken)}") t } else { - val t = inSepRegion(LPAREN, RPAREN)(expr()) + val t = + if (in.isNestedStart) + try expr() finally newLinesOpt() + else + inSepRegion(LPAREN, RPAREN)(expr()) if (rewriteToOldSyntax(t.span.startPos)) revertToParens(t) accept(altToken) t @@ -1419,7 +1608,6 @@ object Parsers { * | [‘inline’] `if' Expr `then' Expr [[semi] else Expr] * | `while' `(' Expr `)' {nl} Expr * | `while' Expr `do' Expr - * | `do' Expr [semi] `while' Expr * | `try' Expr Catches [`finally' Expr] * | `try' Expr [`finally' Expr] * | `throw' Expr @@ -1482,13 +1670,15 @@ object Parsers { def expr1(location: Location.Value = Location.ElseWhere): Tree = in.token match { case IF => - ifExpr(in.offset, If) + indentRegion(IF) { ifExpr(in.offset, If) } case WHILE => - atSpan(in.skipToken()) { - val cond = condExpr(DO) - newLinesOpt() - val body = expr() - WhileDo(cond, body) + indentRegion(WHILE) { + atSpan(in.skipToken()) { + val cond = condExpr(DO) + newLinesOpt() + val body = expr() + WhileDo(cond, body) + } } case DO => in.errorOrMigrationWarning( @@ -1517,41 +1707,46 @@ object Parsers { WhileDo(Block(body, cond), Literal(Constant(()))) } case TRY => - val tryOffset = in.offset - atSpan(in.skipToken()) { - val body = expr() - val (handler, handlerStart) = - if (in.token == CATCH) { - val span = in.offset - in.nextToken() - (expr(), span) - } else (EmptyTree, -1) - - handler match { - case Block(Nil, EmptyTree) => - assert(handlerStart != -1) - syntaxError( - EmptyCatchBlock(body), - Span(handlerStart, endOffset(handler)) - ) - case _ => - } - - val finalizer = - if (in.token == FINALLY) { in.nextToken(); expr() } - else { - if (handler.isEmpty) warning( - EmptyCatchAndFinallyBlock(body), - source.atSpan(Span(tryOffset, endOffset(body))) - ) - EmptyTree + indentRegion(TRY) { + val tryOffset = in.offset + atSpan(in.skipToken()) { + val body = expr() + val (handler, handlerStart) = + if (in.token == CATCH) { + val span = in.offset + in.nextToken() + (expr(), span) + } else (EmptyTree, -1) + + handler match { + case Block(Nil, EmptyTree) => + assert(handlerStart != -1) + syntaxError( + EmptyCatchBlock(body), + Span(handlerStart, endOffset(handler)) + ) + case _ => } - ParsedTry(body, handler, finalizer) + + val finalizer = + if (in.token == FINALLY) { in.nextToken(); expr() } + else { + if (handler.isEmpty) warning( + EmptyCatchAndFinallyBlock(body), + source.atSpan(Span(tryOffset, endOffset(body))) + ) + EmptyTree + } + ParsedTry(body, handler, finalizer) + } } case THROW => atSpan(in.skipToken()) { Throw(expr()) } case RETURN => - atSpan(in.skipToken()) { Return(if (isExprIntro) expr() else EmptyTree, EmptyTree) } + atSpan(in.skipToken()) { + colonAtEOLOpt() + Return(if (isExprIntro) expr() else EmptyTree, EmptyTree) + } case FOR => forExpr() case LBRACKET => @@ -1644,8 +1839,10 @@ object Parsers { /** `match' { CaseClauses } */ def matchExpr(t: Tree, start: Offset, mkMatch: (Tree, List[CaseDef]) => Match) = - atSpan(start, in.skipToken()) { - inBraces(mkMatch(t, caseClauses(caseClause))) + indentRegion(MATCH) { + atSpan(start, in.skipToken()) { + inBracesOrIndented(mkMatch(t, caseClauses(caseClause))) + } } /** `match' { ImplicitCaseClauses } @@ -1676,7 +1873,7 @@ object Parsers { */ def matchType(t: Tree): MatchTypeTree = atSpan(t.span.start, accept(MATCH)) { - inBraces(MatchTypeTree(EmptyTree, t, caseClauses(typeCaseClause))) + inBracesOrIndented(MatchTypeTree(EmptyTree, t, caseClauses(typeCaseClause))) } /** FunParams ::= Bindings @@ -1790,7 +1987,7 @@ object Parsers { atSpan(start) { Ident(pname) } case LPAREN => atSpan(in.offset) { makeTupleOrParens(inParens(exprsInParensOpt())) } - case LBRACE => + case LBRACE | INDENT => canApply = false blockExpr() case QUOTE => @@ -1816,6 +2013,10 @@ object Parsers { "Scala 2 macros are not supported, see http://dotty.epfl.ch/docs/reference/dropped-features/macros.html", start) unimplementedExpr + case COLONEOL => + syntaxError("':' not allowed here") + in.nextToken() + simpleExpr() case _ => if (isLiteral) literal() else { @@ -1827,7 +2028,7 @@ object Parsers { } def simpleExprRest(t: Tree, canApply: Boolean = true): Tree = { - if (canApply) newLineOptWhenFollowedBy(LBRACE) + if (canApply) possibleBracesStart() in.token match { case DOT => in.nextToken() @@ -1835,7 +2036,7 @@ object Parsers { case LBRACKET => val tapp = atSpan(startOffset(t), in.offset) { TypeApply(t, typeArgs(namedOK = true, wildOK = false)) } simpleExprRest(tapp, canApply = true) - case LPAREN | LBRACE if canApply => + case LPAREN | LBRACE | INDENT if canApply => val app = atSpan(startOffset(t), in.offset) { Apply(t, argumentExprs()) } simpleExprRest(app, canApply = true) case USCORE => @@ -1847,30 +2048,31 @@ object Parsers { /** SimpleExpr ::= ‘new’ (ConstrApp {`with` ConstrApp} [TemplateBody] | TemplateBody) */ - def newExpr(): Tree = { - val start = in.skipToken() - def reposition(t: Tree) = t.withSpan(Span(start, in.lastOffset)) - newLineOptWhenFollowedBy(LBRACE) - val parents = - if (in.token == LBRACE) Nil - else constrApp() :: { - if (in.token == WITH) { - // Enable this for 3.1, when we drop `with` for inheritance: - // in.errorUnlessInScala2Mode( - // "anonymous class with multiple parents is no longer supported; use a named class instead") - in.nextToken() - tokenSeparated(WITH, constrApp) + def newExpr(): Tree = + indentRegion(NEW) { + val start = in.skipToken() + def reposition(t: Tree) = t.withSpan(Span(start, in.lastOffset)) + possibleBracesStart() + val parents = + if (in.isNestedStart) Nil + else constrApp() :: { + if (in.token == WITH) { + // Enable this for 3.1, when we drop `with` for inheritance: + // in.errorUnlessInScala2Mode( + // "anonymous class with multiple parents is no longer supported; use a named class instead") + in.nextToken() + tokenSeparated(WITH, constrApp) + } + else Nil } - else Nil + possibleBracesStart() + parents match { + case parent :: Nil if !in.isNestedStart => + reposition(if (parent.isType) ensureApplied(wrapNew(parent)) else parent) + case _ => + New(reposition(templateBodyOpt(emptyConstructor, parents, Nil))) } - newLineOptWhenFollowedBy(LBRACE) - parents match { - case parent :: Nil if in.token != LBRACE => - reposition(if (parent.isType) ensureApplied(wrapNew(parent)) else parent) - case _ => - New(reposition(templateBodyOpt(emptyConstructor, parents, Nil))) } - } /** ExprsInParens ::= ExprInParens {`,' ExprInParens} */ @@ -1887,19 +2089,18 @@ object Parsers { * | [nl] BlockExpr */ def argumentExprs(): List[Tree] = - if (in.token == LBRACE) blockExpr() :: Nil else parArgumentExprs() + if (in.isNestedStart) blockExpr() :: Nil else parArgumentExprs() val argumentExpr: () => Tree = () => exprInParens() match { case arg @ Assign(Ident(id), rhs) => cpy.NamedArg(arg)(id, rhs) case arg => arg } - /** ArgumentExprss ::= {ArgumentExprs} */ def argumentExprss(fn: Tree): Tree = { - newLineOptWhenFollowedBy(LBRACE) - if (in.token == LPAREN || in.token == LBRACE) argumentExprss(Apply(fn, argumentExprs())) + possibleBracesStart() + if (in.token == LPAREN || in.isNestedStart) argumentExprss(Apply(fn, argumentExprs())) else fn } @@ -2004,77 +2205,82 @@ object Parsers { * {nl} [`yield'] Expr * | `for' Enumerators (`do' Expr | `yield' Expr) */ - def forExpr(): Tree = atSpan(in.skipToken()) { - var wrappedEnums = true - val start = in.offset - val forEnd = in.lastOffset - val leading = in.token - val enums = - if (leading == LBRACE || leading == LPAREN && parensEncloseGenerators) { - in.nextToken() - openParens.change(leading, 1) - val res = - if (leading == LBRACE || in.token == CASE) - enumerators() - else { - val pats = patternsOpt() - val pat = - if (in.token == RPAREN || pats.length > 1) { - wrappedEnums = false - accept(RPAREN) - openParens.change(LPAREN, -1) - atSpan(start) { makeTupleOrParens(pats) } // note: alternatives `|' need to be weeded out by typer. + def forExpr(): Tree = indentRegion(FOR) { + atSpan(in.skipToken()) { + var wrappedEnums = true + val start = in.offset + val forEnd = in.lastOffset + val leading = in.token + val enums = + if (leading == LBRACE || leading == LPAREN && parensEncloseGenerators) { + in.nextToken() + openParens.change(leading, 1) + val res = + if (leading == LBRACE || in.token == CASE) + enumerators() + else { + val pats = patternsOpt() + val pat = + if (in.token == RPAREN || pats.length > 1) { + wrappedEnums = false + accept(RPAREN) + openParens.change(LPAREN, -1) + atSpan(start) { makeTupleOrParens(pats) } // note: alternatives `|' need to be weeded out by typer. + } + else pats.head + generatorRest(pat, casePat = false) :: enumeratorsRest() + } + if (wrappedEnums) { + val closingOnNewLine = in.isAfterLineEnd + accept(leading + 1) + openParens.change(leading, -1) + def hasMultiLineEnum = + res.exists { t => + val pos = t.sourcePos + pos.startLine < pos.endLine } - else pats.head - generatorRest(pat, casePat = false) :: enumeratorsRest() - } - if (wrappedEnums) { - val closingOnNewLine = in.isAfterLineEnd() - accept(leading + 1) - openParens.change(leading, -1) - def hasMultiLineEnum = - res.exists { t => - val pos = t.sourcePos - pos.startLine < pos.endLine + if (rewriteToNewSyntax(Span(start)) && (leading == LBRACE || !hasMultiLineEnum)) { + // Don't rewrite if that could change meaning of newlines + newLinesOpt() + dropParensOrBraces(start, if (in.token == YIELD || in.token == DO) "" else "do") } - if (rewriteToNewSyntax(Span(start)) && (leading == LBRACE || !hasMultiLineEnum)) { - // Don't rewrite if that could change meaning of newlines - newLinesOpt() - dropParensOrBraces(start, if (in.token == YIELD || in.token == DO) "" else "do") } + res } - res - } - else { - wrappedEnums = false - - /*if (in.token == INDENT) inBracesOrIndented(enumerators()) else*/ - val ts = inSepRegion(LBRACE, RBRACE)(enumerators()) - if (rewriteToOldSyntax(Span(start)) && ts.nonEmpty) { - if (ts.length > 1 && ts.head.sourcePos.startLine != ts.last.sourcePos.startLine) { - patch(source, Span(forEnd), " {") - patch(source, Span(in.offset), "} ") - } + else { + wrappedEnums = false + + if (in.token == INDENT) + inBracesOrIndented(enumerators()) else { - patch(source, ts.head.span.startPos, "(") - patch(source, ts.last.span.endPos, ")") + val ts = inSepRegion(LBRACE, RBRACE)(enumerators()) + if (rewriteToOldSyntax(Span(start)) && ts.nonEmpty) { + if (ts.head.sourcePos.startLine != ts.last.sourcePos.startLine) { + patch(source, Span(forEnd), " {") + patch(source, Span(in.offset), "} ") + } + else { + patch(source, ts.head.span.startPos, "(") + patch(source, ts.last.span.endPos, ")") + } + } + ts } } - ts + newLinesOpt() + if (in.token == YIELD) { + in.nextToken() + ForYield(enums, expr()) + } + else if (in.token == DO) { + if (rewriteToOldSyntax()) dropTerminator() + in.nextToken() + ForDo(enums, expr()) + } + else { + if (!wrappedEnums) syntaxErrorOrIncomplete(YieldOrDoExpectedInForComprehension()) + ForDo(enums, expr()) } - newLinesOpt() - if (in.token == YIELD) { - in.nextToken() - ForYield(enums, expr()) - } - else if (in.token == DO) { - if (rewriteToOldSyntax()) dropTerminator() - in.nextToken() - ForDo(enums, expr()) - } - else { - if (!wrappedEnums) syntaxErrorOrIncomplete(YieldOrDoExpectedInForComprehension()) - ForDo(enums, expr()) } } @@ -2781,14 +2987,17 @@ object Parsers { else emptyType val rhs = if (tpt.isEmpty || in.token == EQUALS) { - accept(EQUALS) - if (in.token == USCORE && !tpt.isEmpty && mods.is(Mutable) && - (lhs.toList forall (_.isInstanceOf[Ident]))) { - wildcardIdent() - } else { - expr() + indentRegion(first) { + accept(EQUALS) + if (in.token == USCORE && !tpt.isEmpty && mods.is(Mutable) && + (lhs.toList forall (_.isInstanceOf[Ident]))) { + wildcardIdent() + } else { + expr() + } } - } else EmptyTree + } + else EmptyTree lhs match { case (id @ Ident(name: TermName)) :: Nil if name != nme.WILDCARD => val vdef = ValDef(name, tpt, rhs) @@ -2838,6 +3047,7 @@ object Parsers { (Nil, Method) val mods1 = addFlag(mods, flags) val ident = termIdent() + val name = ident.name.asTermName val tparams = typeParamClauseOpt(ParamOwner.Def) val vparamss = paramClauses() match { case rparams :: rparamss if leadingParamss.nonEmpty && !isLeftAssoc(ident.name) => @@ -2855,8 +3065,10 @@ object Parsers { if (in.isScala2Mode) newLineOptWhenFollowedBy(LBRACE) val rhs = if (in.token == EQUALS) { - in.nextToken() - expr() + indentRegion(name) { + in.nextToken() + expr() + } } else if (!tpt.isEmpty) EmptyTree @@ -2871,7 +3083,7 @@ object Parsers { expr() } - val ddef = DefDef(ident.name.asTermName, tparams, vparamss, tpt, rhs) + val ddef = DefDef(name, tparams, vparamss, tpt, rhs) if (isBackquoted(ident)) ddef.pushAttachment(Backquoted, ()) finalizeDef(ddef, mods1, start) } @@ -2881,29 +3093,25 @@ object Parsers { * | `{' SelfInvocation {semi BlockStat} `}' */ def constrExpr(): Tree = - if (in.token == LBRACE) constrBlock() + if (in.isNestedStart) + atSpan(in.offset) { + inBracesOrIndented { + val stats = selfInvocation() :: ( + if (isStatSep) { in.nextToken(); blockStatSeq() } + else Nil) + Block(stats, Literal(Constant(()))) + } + } else Block(selfInvocation() :: Nil, Literal(Constant(()))) /** SelfInvocation ::= this ArgumentExprs {ArgumentExprs} */ def selfInvocation(): Tree = atSpan(accept(THIS)) { - newLineOptWhenFollowedBy(LBRACE) + possibleBracesStart() argumentExprss(Apply(Ident(nme.CONSTRUCTOR), argumentExprs())) } - /** ConstrBlock ::= `{' SelfInvocation {semi BlockStat} `}' - */ - def constrBlock(): Tree = - atSpan(in.skipToken()) { - val stats = selfInvocation() :: { - if (isStatSep) { in.nextToken(); blockStatSeq() } - else Nil - } - accept(RBRACE) - Block(stats, Literal(Constant(()))) - } - /** TypeDcl ::= id [TypeParamClause] TypeBounds [‘=’ Type] */ def typeDefOrDcl(start: Offset, mods: Modifiers): Tree = { @@ -2948,7 +3156,7 @@ object Parsers { makeTypeDef(rhs) } else makeTypeDef(bounds) - case SEMI | NEWLINE | NEWLINES | COMMA | RBRACE | EOF => + case SEMI | NEWLINE | NEWLINES | COMMA | RBRACE | OUTDENT | EOF => makeTypeDef(typeBounds()) case _ => syntaxErrorOrIncomplete(ExpectedTypeBoundOrEquals(in.token)) @@ -2990,11 +3198,12 @@ object Parsers { classDefRest(start, mods, ident().toTypeName) } - def classDefRest(start: Offset, mods: Modifiers, name: TypeName): TypeDef = { - val constr = classConstr(isCaseClass = mods.is(Case)) - val templ = templateOpt(constr) - finalizeDef(TypeDef(name, templ), mods, start) - } + def classDefRest(start: Offset, mods: Modifiers, name: TypeName): TypeDef = + indentRegion(name.toTermName) { + val constr = classConstr(isCaseClass = mods.is(Case)) + val templ = templateOpt(constr) + finalizeDef(TypeDef(name, templ), mods, start) + } /** ClassConstr ::= [ClsTypeParamClause] [ConstrMods] ClsParamClauses */ @@ -3013,22 +3222,23 @@ object Parsers { /** ObjectDef ::= id TemplateOpt */ def objectDef(start: Offset, mods: Modifiers): ModuleDef = atSpan(start, nameStart) { - objectDefRest(start, mods, ident()) - } - - def objectDefRest(start: Offset, mods: Modifiers, name: TermName): ModuleDef = { - val templ = templateOpt(emptyConstructor) - finalizeDef(ModuleDef(name, templ), mods, start) + val name = ident() + indentRegion(name) { + val templ = templateOpt(emptyConstructor) + finalizeDef(ModuleDef(name, templ), mods, start) + } } /** EnumDef ::= id ClassConstr InheritClauses EnumBody */ def enumDef(start: Offset, mods: Modifiers): TypeDef = atSpan(start, nameStart) { - val modName = ident() - val clsName = modName.toTypeName - val constr = classConstr() - val templ = template(constr, isEnum = true) - finalizeDef(TypeDef(clsName, templ), mods, start) + val modulName = ident() + indentRegion(modulName) { + val clsName = modulName.toTypeName + val constr = classConstr() + val templ = template(constr, isEnum = true) + finalizeDef(TypeDef(clsName, templ), mods, start) + } } /** EnumCase = `case' (id ClassConstr [`extends' ConstrApps] | ids) @@ -3077,42 +3287,44 @@ object Parsers { def instanceDef(newStyle: Boolean, start: Offset, mods: Modifiers, instanceMod: Mod) = atSpan(start, nameStart) { var mods1 = addMod(mods, instanceMod) val name = if (isIdent && !(newStyle && isIdent(nme.as))) ident() else EmptyTermName - val tparams = typeParamClauseOpt(ParamOwner.Def) - var leadingParamss = - if (in.token == LPAREN) - try paramClause(prefix = true) :: Nil - finally { - newLineOptWhenFollowedBy(LBRACE) - if (in.token != LBRACE) syntaxErrorOrIncomplete("`{' expected") + indentRegion(name) { + val tparams = typeParamClauseOpt(ParamOwner.Def) + var leadingParamss = + if (in.token == LPAREN) + try paramClause(prefix = true) :: Nil + finally { + possibleBracesStart() + if (!in.isNestedStart) syntaxErrorOrIncomplete("`{' expected") + } + else Nil + val parents = + if (!newStyle && in.token == FOR || isIdent(nme.as)) { // for the moment, accept both `given for` and `given as` + in.nextToken() + tokenSeparated(COMMA, constrApp) } - else Nil - val parents = - if (!newStyle && in.token == FOR || isIdent(nme.as)) { // for the moment, accept both `given for` and `given as` - in.nextToken() - tokenSeparated(COMMA, constrApp) - } - else Nil - val vparamss = paramClauses(ofInstance = true) - val instDef = - if (in.token == EQUALS && parents.length == 1 && parents.head.isType) { - in.nextToken() - mods1 |= Final - DefDef(name, tparams, vparamss, parents.head, expr()) - } - else { - newLineOptWhenFollowedBy(LBRACE) - val (tparams1, vparamss1) = - if (leadingParamss.nonEmpty) - (tparams, leadingParamss) - else - (tparams.map(tparam => tparam.withMods(tparam.mods | PrivateLocal)), - vparamss.map(_.map(vparam => - vparam.withMods(vparam.mods &~ Param | ParamAccessor | PrivateLocal)))) - val templ = templateBodyOpt(makeConstructor(tparams1, vparamss1), parents, Nil) - if (tparams.isEmpty && vparamss1.isEmpty || leadingParamss.nonEmpty) ModuleDef(name, templ) - else TypeDef(name.toTypeName, templ) - } - finalizeDef(instDef, mods1, start) + else Nil + val vparamss = paramClauses(ofInstance = true) + val instDef = + if (in.token == EQUALS && parents.length == 1 && parents.head.isType) { + in.nextToken() + mods1 |= Final + DefDef(name, tparams, vparamss, parents.head, expr()) + } + else { + possibleBracesStart() + val (tparams1, vparamss1) = + if (leadingParamss.nonEmpty) + (tparams, leadingParamss) + else + (tparams.map(tparam => tparam.withMods(tparam.mods | PrivateLocal)), + vparamss.map(_.map(vparam => + vparam.withMods(vparam.mods &~ Param | ParamAccessor | PrivateLocal)))) + val templ = templateBodyOpt(makeConstructor(tparams1, vparamss1), parents, Nil) + if (tparams.isEmpty && vparamss1.isEmpty || leadingParamss.nonEmpty) ModuleDef(name, templ) + else TypeDef(name.toTypeName, templ) + } + finalizeDef(instDef, mods1, start) + } } /* -------- TEMPLATES ------------------------------------------- */ @@ -3180,7 +3392,7 @@ object Parsers { val extended = if (in.token == EXTENDS) { in.nextToken() - if (in.token == LBRACE) { + if (in.token == LBRACE || in.token == COLONEOL) { in.errorOrMigrationWarning("`extends' must be followed by at least one parent") Nil } @@ -3200,7 +3412,7 @@ object Parsers { */ def template(constr: DefDef, isEnum: Boolean = false): Template = { val (parents, derived) = inheritClauses() - newLineOptWhenFollowedBy(LBRACE) + possibleBracesStart() if (isEnum) { val (self, stats) = withinEnum(templateBody()) Template(constr, parents, derived, self, stats) @@ -3211,8 +3423,8 @@ object Parsers { /** TemplateOpt = [Template] */ def templateOpt(constr: DefDef): Template = { - newLineOptWhenFollowedBy(LBRACE) - if (in.token == EXTENDS || isIdent(nme.derives) || in.token == LBRACE) + possibleBracesStart() + if (in.token == EXTENDS || isIdent(nme.derives) || in.isNestedStart) template(constr) else Template(constr, Nil, Nil, EmptyValDef, Nil) @@ -3222,7 +3434,7 @@ object Parsers { */ def templateBodyOpt(constr: DefDef, parents: List[Tree], derived: List[Tree]): Template = { val (self, stats) = - if (in.token == LBRACE) templateBody() else (EmptyValDef, Nil) + if (in.isNestedStart) templateBody() else (EmptyValDef, Nil) Template(constr, parents, derived, self, stats) } @@ -3247,9 +3459,11 @@ object Parsers { */ def packaging(start: Int): Tree = { val pkg = qualId() - newLineOptWhenFollowedBy(LBRACE) - val stats = inDefScopeBraces(topStatSeq()) - makePackaging(start, pkg, stats) + indentRegion(pkg) { + possibleBracesStart() + val stats = inDefScopeBraces(topStatSeq()) + makePackaging(start, pkg, stats) + } } /** TopStatSeq ::= TopStat {semi TopStat} @@ -3313,6 +3527,7 @@ object Parsers { if (name != nme.ERROR) self = makeSelfDef(name, tpt).withSpan(first.span) } + in.token = EMPTY // hack to suppress INDENT insertion after `=>` in.nextToken() } else { stats += first @@ -3450,17 +3665,19 @@ object Parsers { } } else { val pkg = qualId() - newLineOptWhenFollowedBy(LBRACE) - if (in.token == EOF) - ts += makePackaging(start, pkg, List()) - else if (in.token == LBRACE) { - ts += inDefScopeBraces(makePackaging(start, pkg, topStatSeq())) - acceptStatSepUnlessAtEnd() - ts ++= topStatSeq() - } - else { - acceptStatSep() - ts += makePackaging(start, pkg, topstats()) + indentRegion(pkg) { + possibleBracesStart() + if (in.token == EOF) + ts += makePackaging(start, pkg, List()) + else if (in.isNestedStart) { + ts += inDefScopeBraces(makePackaging(start, pkg, topStatSeq())) + acceptStatSepUnlessAtEnd() + ts ++= topStatSeq() + } + else { + acceptStatSep() + ts += makePackaging(start, pkg, topstats()) + } } } } diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index b5a8c7686597..0508c63fa409 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -9,11 +9,13 @@ import java.lang.Character.isDigit import scala.internal.Chars._ import util.NameTransformer.avoidIllegalChars import util.Spans.Span +import config.Config import Tokens._ import scala.annotation.{ switch, tailrec } import scala.collection.mutable import scala.collection.immutable.{SortedMap, BitSet} import rewrites.Rewrites.patch +import config.Printers.lexical object Scanners { @@ -36,6 +38,11 @@ object Scanners { /** the offset of the character following the token preceding this one */ var lastOffset: Offset = 0 + /** the offset of the newline immediately preceding the token, or -1 if + * token is not preceded by a newline. + */ + var lineOffset: Offset = -1 + /** the name of an identifier */ var name: SimpleName = null @@ -49,6 +56,7 @@ object Scanners { this.token = td.token this.offset = td.offset this.lastOffset = td.lastOffset + this.lineOffset = td.lineOffset this.name = td.name this.strVal = td.strVal this.base = td.base @@ -213,11 +221,10 @@ object Scanners { if (isNumberSeparator(litBuf.last)) errorButContinue("trailing separator is not allowed", offset + litBuf.length - 1) } - } class Scanner(source: SourceFile, override val startFrom: Offset = 0)(implicit ctx: Context) extends ScannerCommon(source)(ctx) { - val keepComments: Boolean = !ctx.settings.YdropComments.value + val keepComments = !ctx.settings.YdropComments.value /** A switch whether operators at the start of lines can be infix operators */ private var allowLeadingInfixOperators = true @@ -226,6 +233,14 @@ object Scanners { val oldSyntax = ctx.settings.oldSyntax.value val newSyntax = ctx.settings.newSyntax.value + val noindentSyntax = ctx.settings.noindent.value + val indentSyntax = Config.allowIndent || ctx.settings.indent.value || noindentSyntax && rewrite + val rewriteToIndent = ctx.settings.indent.value && rewrite + val rewriteNoIndent = noindentSyntax && rewrite + + if (rewrite && oldSyntax & noindentSyntax) + error("-rewrite cannot be used with both -old-syntax and -noindent; -noindent must come first") + /** All doc comments kept by their end position in a `Map` */ private[this] var docstringMap: SortedMap[Int, Comment] = SortedMap.empty @@ -290,7 +305,13 @@ object Scanners { * (the STRINGLIT appears twice in succession on the stack iff the * expression is a multiline string literal). */ - var sepRegions: List[Token] = List() + var sepRegions: List[Token] = Nil + + /** Indentation widths, innermost to outermost */ + var indent: IndentRegion = IndentRegion(IndentWidth.Zero, Set(), EMPTY, null) + + /** The end marker that was skipped last */ + val endMarkers = new mutable.ListBuffer[EndMarker] // Scala 2 compatibility @@ -377,112 +398,280 @@ object Scanners { // Read a token or copy it from `next` tokenData if (next.token == EMPTY) { lastOffset = lastCharOffset - if (inStringInterpolation) fetchStringPart() - else fetchToken() + if (inStringInterpolation) fetchStringPart() else fetchToken() if (token == ERROR) adjustSepRegions(STRINGLIT) } else { - this copyFrom next + this.copyFrom(next) next.token = EMPTY } - def insertNL(nl: Token): Unit = { - next.copyFrom(this) - // todo: make offset line-end of previous line? - offset = if (lineStartOffset <= offset) lineStartOffset else lastLineStartOffset - token = nl - } + if (isAfterLineEnd) handleNewLine(lastToken) + postProcessToken() + //printState() + } + protected def printState() = + print("[" + show + "]") - /** A leading symbolic or backquoted identifier is treated as an infix operator - * if it is followed by at least one ' ' and a token on the same line - * that can start an expression. - */ - def isLeadingInfixOperator = - allowLeadingInfixOperators && - (token == BACKQUOTED_IDENT || - token == IDENTIFIER && isOperatorPart(name(name.length - 1))) && - (ch == ' ') && { - val lookahead = lookaheadScanner - lookahead.allowLeadingInfixOperators = false - // force a NEWLINE a after current token if it is on its own line + /** Insert `token` at assumed `offset` in front of current one. */ + def insert(token: Token, offset: Int) = { + next.copyFrom(this) + this.offset = offset + this.token = token + } + + /** If this token and the next constitute an end marker, skip them and append a new EndMarker + * value at the end of the endMarkers queue. + */ + private def handleEndMarkers(width: IndentWidth): Unit = + if (next.token == IDENTIFIER && next.name == nme.end && width == indent.width) { + val lookahead = lookaheadScanner + lookahead.nextToken() // skip the `end` + + def handle(tag: EndMarkerTag) = { + val skipTo = lookahead.charOffset lookahead.nextToken() - canStartExpressionTokens.contains(lookahead.token) + if (lookahead.isAfterLineEnd || lookahead.token == EOF) { + lexical.println(i"produce end marker $tag $width") + endMarkers += EndMarker(tag, width, offset) + next.token = EMPTY + while (charOffset < skipTo) nextChar() + } } - /** Insert NEWLINE or NEWLINES if - * - we are after a newline - * - we are within a { ... } or on toplevel (wrt sepRegions) - * - the current token can start a statement and the one before can end it - * insert NEWLINES if we are past a blank line, NEWLINE otherwise - */ - if (isAfterLineEnd() && - (canEndStatTokens contains lastToken) && - (canStartStatTokens contains token) && - (sepRegions.isEmpty || sepRegions.head == RBRACE || - sepRegions.head == ARROW && token == CASE)) { - if (pastBlankLine()) - insertNL(NEWLINES) - else if (!isLeadingInfixOperator) - insertNL(NEWLINE) - else if (isScala2Mode || oldSyntax) + lookahead.token match { + case IDENTIFIER | BACKQUOTED_IDENT => handle(lookahead.name) + case IF | WHILE | FOR | MATCH | TRY | NEW => handle(lookahead.token) + case _ => + } + } + + /** Consume and cancel the head of the end markers queue if it has the given `tag` and width. + * Flag end markers with higher indent widths as errors. + */ + def consumeEndMarker(tag: EndMarkerTag, width: IndentWidth): Unit = { + lexical.println(i"consume end marker $tag $width") + if (endMarkers.nonEmpty) { + val em = endMarkers.head + if (width <= em.width) { + if (em.tag != tag || em.width != width) { + lexical.println(i"misaligned end marker ${em.tag}, ${em.width} at ${width}") + errorButContinue("misaligned end marker", em.offset) + } + endMarkers.trimStart(1) + } + } + } + + /** A leading symbolic or backquoted identifier is treated as an infix operator if + * - it does not follow a blank line, and + * - it is followed on the same line by at least one ' ' + * and a token that can start an expression. + * If a leading infix operator is found and -language:Scala2 or -old-syntax is set, + * emit a change warning. + */ + def isLeadingInfixOperator() = ( + allowLeadingInfixOperators + && ( token == BACKQUOTED_IDENT + || token == IDENTIFIER && isOperatorPart(name(name.length - 1))) + && ch == ' ' + && !pastBlankLine + && { + val lookahead = lookaheadScanner + lookahead.allowLeadingInfixOperators = false + // force a NEWLINE a after current token if it is on its own line + lookahead.nextToken() + canStartExpressionTokens.contains(lookahead.token) + } + && { + if (isScala2Mode || oldSyntax && !rewrite) ctx.warning(em"""Line starts with an operator; |it is now treated as a continuation of the expression on the previous line, |not as a separate statement.""", source.atSpan(Span(offset))) + true } + ) - postProcessToken() - // print("[" + this +"]") + /** The indentation width of the given offset. + * It is assumed that only blank characters are between the start of the line and the offset. + */ + def indentWidth(offset: Offset): IndentWidth = { + import IndentWidth.{Run, Conc} + def recur(idx: Int, ch: Char, n: Int): IndentWidth = + if (idx < 0) Run(ch, n) + else { + val nextChar = buf(idx) + if (nextChar == ' ' || nextChar == '\t') + if (nextChar == ch) + recur(idx - 1, ch, n + 1) + else { + val prefix = recur(idx - 1, nextChar, 1) + if (n == 0) prefix else Conc(prefix, Run(ch, n)) + } + else Run(ch, n) + } + recur(offset - 1, ' ', 0) + } + + /** Handle newlines, possibly inserting an INDENT, OUTDENT, NEWLINE, or NEWLINES token + * in front of the current token. This depends on whether indentation is significant or not. + * + * Indentation is _significant_ if indentSyntax is set, and we are not inside a + * {...}, [...], (...), case ... => pair, nor in a if/while condition + * (i.e. sepRegions is empty). + * + * There are three rules: + * + * 1. Insert NEWLINE or NEWLINES if + * + * - the closest enclosing sepRegion is { ... } or for ... do/yield, + * or we are on the toplevel, i.e. sepRegions is empty, and + * - the previous token can end a statement, and + * - the current token can start a statement, and + * - the current token is not a leading infix operator, and + * - if indentation is significant then the current token starts at the current + * indentation width or to the right of it. + * + * The inserted token is NEWLINES if the current token is preceded by a + * whitespace line, or NEWLINE otherwise. + * + * 2. Insert INDENT if + * + * - indentation is significant, and + * - the last token can start an indentation region. + * - the indentation of the current token is strictly greater than the previous + * indentation width, or the two widths are the same and the current token is + * one of `:` or `match`. + * + * The following tokens can start an indentation region: + * + * : = => <- if then else while do try catch finally for yield match + * + * Inserting an INDENT starts a new indentation region with the indentation of the current + * token as indentation width. + * + * 3. Insert OUTDENT if + * + * - indentation is significant, and + * - the indentation of the current token is strictly less than the + * previous indentation width, + * - the current token is not a leading infix operator. + * + * Inserting an OUTDENT closes an indentation region. In this case, issue an error if + * the indentation of the current token does not match the indentation of some previous + * line in an enclosing indentation region. + * + * If a token is inserted and consumed, the original source token is still considered to + * start a new line, so the process that inserts an OUTDENT might repeat several times. + * + * Indentation widths are strings consisting of spaces and tabs, ordered by the prefix relation. + * I.e. `a <= b` iff `b.startsWith(a)`. If indentation is significant it is considered an error + * if the current indentation width and the indentation of the current token are incomparable. + */ + def handleNewLine(lastToken: Token) = { + val indentIsSignificant = indentSyntax && sepRegions.isEmpty + val newlineIsSeparating = ( + sepRegions.isEmpty + || sepRegions.head == RBRACE + || sepRegions.head == ARROW && token == CASE + ) + val curWidth = indentWidth(offset) + val lastWidth = indent.width + if (newlineIsSeparating && + canEndStatTokens.contains(lastToken)&& + canStartStatTokens.contains(token) && + (!indentIsSignificant || lastWidth <= curWidth) && + !isLeadingInfixOperator()) + insert(if (pastBlankLine) NEWLINES else NEWLINE, lineOffset) + else if (indentIsSignificant) { + if (lastWidth < curWidth || + lastWidth == curWidth && (lastToken == MATCH || lastToken == CATCH) && token == CASE) { + if (canStartIndentTokens.contains(lastToken)) { + indent = IndentRegion(curWidth, Set(), lastToken, indent) + insert(INDENT, offset) + } + } + else if (curWidth < lastWidth || + curWidth == lastWidth && (indent.token == MATCH || indent.token == CATCH) && token != CASE) { + if (!isLeadingInfixOperator()) { + indent = indent.enclosing + insert(OUTDENT, offset) + handleEndMarkers(curWidth) + } + } + else if (lastWidth != curWidth) + errorButContinue( + i"""Incompatible combinations of tabs and spaces in indentation prefixes. + |Previous indent : $lastWidth + |Latest indent : $curWidth""") + } + if (indentIsSignificant && indent.width < curWidth && !indent.others.contains(curWidth)) { + if (token == OUTDENT) + errorButContinue( + i"""The start of this line does not match any of the previous indentation widths. + |Indentation width of current line : $curWidth + |This falls between previous widths: ${indent.width} and $lastWidth""") + else + indent = IndentRegion(indent.width, indent.others + curWidth, indent.token, indent.outer) + } } + /** - Join CASE + CLASS => CASECLASS, CASE + OBJECT => CASEOBJECT, SEMI + ELSE => ELSE, COLON + => COLONEOL + * - Insert missing OUTDENTs at EOF + */ def postProcessToken(): Unit = { - // Join CASE + CLASS => CASECLASS, CASE + OBJECT => CASEOBJECT, SEMI + ELSE => ELSE def lookahead() = { - prev copyFrom this + prev.copyFrom(this) + lastOffset = lastCharOffset fetchToken() } - def reset(nextLastOffset: Offset) = { - lastOffset = nextLastOffset - next copyFrom this - this copyFrom prev + def reset() = { + next.copyFrom(this) + this.copyFrom(prev) } def fuse(tok: Int) = { token = tok offset = prev.offset lastOffset = prev.lastOffset + lineOffset = prev.lineOffset } - if (token == CASE) { - val nextLastOffset = lastCharOffset - lookahead() - if (token == CLASS) fuse(CASECLASS) - else if (token == OBJECT) fuse(CASEOBJECT) - else reset(nextLastOffset) - } else if (token == SEMI) { - val nextLastOffset = lastCharOffset - lookahead() - if (token != ELSE) reset(nextLastOffset) - } else if (token == COMMA){ - val nextLastOffset = lastCharOffset - lookahead() - if (isAfterLineEnd() && (token == RPAREN || token == RBRACKET || token == RBRACE)) { - /* skip the trailing comma */ - } else if (token == EOF) { // e.g. when the REPL is parsing "val List(x, y, _*," - /* skip the trailing comma */ - } else reset(nextLastOffset) + token match { + case CASE => + lookahead() + if (token == CLASS) fuse(CASECLASS) + else if (token == OBJECT) fuse(CASEOBJECT) + else reset() + case SEMI => + lookahead() + if (token != ELSE) reset() + case COMMA => + lookahead() + if (isAfterLineEnd && (token == RPAREN || token == RBRACKET || token == RBRACE || token == OUTDENT)) { + /* skip the trailing comma */ + } else if (token == EOF) { // e.g. when the REPL is parsing "val List(x, y, _*," + /* skip the trailing comma */ + } else reset() + case COLON => + lookahead() + val atEOL = isAfterLineEnd + reset() + if (atEOL) token = COLONEOL + case EOF if !indent.isOutermost => + insert(OUTDENT, offset) + indent = indent.outer + case _ => } - } /** Is current token first one after a newline? */ - def isAfterLineEnd(): Boolean = - lastOffset < lineStartOffset && - (lineStartOffset <= offset || - lastOffset < lastLineStartOffset && lastLineStartOffset <= offset) + def isAfterLineEnd: Boolean = lineOffset >= 0 /** Is there a blank line between the current token and the last one? + * A blank line consists only of characters <= ' '. * @pre afterLineEnd(). */ - private def pastBlankLine(): Boolean = { + private def pastBlankLine: Boolean = { val end = offset def recur(idx: Offset, isBlank: Boolean): Boolean = idx < end && { @@ -497,6 +686,7 @@ object Scanners { */ protected final def fetchToken(): Unit = { offset = charOffset - 1 + lineOffset = if (lastOffset < lineStartOffset) lineStartOffset else -1 name = null (ch: @switch) match { case ' ' | '\t' | CR | LF | FF => @@ -734,7 +924,13 @@ object Scanners { // Lookahead --------------------------------------------------------------- /** A new Scanner that starts at the current token offset */ - def lookaheadScanner: Scanner = new Scanner(source, offset) + def lookaheadScanner: Scanner = new Scanner(source, offset) { + override val indentSyntax = false + override protected def printState() = { + print("la:") + super.printState() + } + } /** Is the token following the current one in `tokens`? */ def lookaheadIn(tokens: BitSet): Boolean = { @@ -845,6 +1041,9 @@ object Scanners { def isSoftModifierInParamModifierPosition: Boolean = isSoftModifier && !lookaheadIn(BitSet(COLON)) + def isNestedStart = token == LBRACE || token == INDENT + def isNestedEnd = token == RBRACE || token == OUTDENT + // Literals ----------------------------------------------------------------- private def getStringLit() = { @@ -1124,8 +1323,8 @@ object Scanners { } /* Resume normal scanning after XML */ - def resume(lastToken: Token): Unit = { - token = lastToken + def resume(lastTokenData: TokenData): Unit = { + this.copyFrom(lastTokenData) if (next.token != EMPTY && !ctx.reporter.hasErrors) error("unexpected end of input: possible missing '}' in XML block") @@ -1137,6 +1336,74 @@ object Scanners { nextToken() } // end Scanner + /** A class describing an indentation region. + * @param width The principal indendation width + * @param others Other indendation widths > width of lines in the same region + */ + class IndentRegion(val width: IndentWidth, val others: Set[IndentWidth], val token: Token, val outer: IndentRegion | Null) { + def enclosing: IndentRegion = outer.asInstanceOf[IndentRegion] + def isOutermost = outer == null + } + + enum IndentWidth { + case Run(ch: Char, n: Int) + case Conc(l: IndentWidth, r: Run) + + def <= (that: IndentWidth): Boolean = this match { + case Run(ch1, n1) => + that match { + case Run(ch2, n2) => n1 <= n2 && (ch1 == ch2 || n1 == 0) + case Conc(l, r) => this <= l + } + case Conc(l1, r1) => + that match { + case Conc(l2, r2) => l1 == l2 && r1 <= r2 + case _ => false + } + } + + def < (that: IndentWidth): Boolean = this <= that && !(that <= this) + + def toPrefix: String = this match { + case Run(ch, n) => ch.toString * n + case Conc(l, r) => l.toPrefix ++ r.toPrefix + } + + override def toString: String = { + def kind(ch: Char) = ch match { + case ' ' => "space" + case '\t' => "tab" + case _ => s"'$ch'-character" + } + this match { + case Run(ch, n) => s"$n ${kind(ch)}${if (n == 1) "" else "s"}" + case Conc(l, r) => s"$l, $r" + } + } + } + object IndentWidth { + private inline val MaxCached = 40 + private val spaces = Array.tabulate(MaxCached + 1)(new Run(' ', _)) + private val tabs = Array.tabulate(MaxCached + 1)(new Run('\t', _)) + + def Run(ch: Char, n: Int): Run = + if (n <= MaxCached && ch == ' ') spaces(n) + else if (n <= MaxCached && ch == '\t') tabs(n) + else new Run(ch, n) + + val Zero = Run(' ', 0) + } + + /** What can be referred to in an end marker */ + type EndMarkerTag = TermName | Token + + /** A processed end marker + * @param tag The name or token referred to in the marker + * @param width The indentation width where the marker occurred + * @param offset The offset of the `end` + */ + case class EndMarker(tag: EndMarkerTag, width: IndentWidth, offset: Int) + // ------------- keyword configuration ----------------------------------- private val (lastKeywordStart, kwArray) = buildKeywordArray(keywords) diff --git a/compiler/src/dotty/tools/dotc/parsing/Tokens.scala b/compiler/src/dotty/tools/dotc/parsing/Tokens.scala index 4f458cf9d58c..96acb0557791 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Tokens.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Tokens.scala @@ -125,9 +125,11 @@ abstract class TokensCommon { final val RBRACKET = 93; enter(RBRACKET, "']'") final val LBRACE = 94; enter(LBRACE, "'{'") final val RBRACE = 95; enter(RBRACE, "'}'") + final val INDENT = 96; enter(INDENT, "indent") + final val OUTDENT = 97; enter(OUTDENT, "unindent") final val firstParen = LPAREN - final val lastParen = RBRACE + final val lastParen = OUTDENT def buildKeywordArray(keywords: TokenSet): (Int, Array[Int]) = { def start(tok: Token) = tokenString(tok).toTermName.asSimpleName.start @@ -186,6 +188,7 @@ object Tokens extends TokensCommon { /** special symbols */ final val NEWLINE = 78; enter(NEWLINE, "end of statement", "new line") final val NEWLINES = 79; enter(NEWLINES, "end of statement", "new lines") + final val COLONEOL = 88; enter(COLONEOL, ":", ": at eol") /** special keywords */ final val USCORE = 73; enter(USCORE, "_") @@ -200,7 +203,7 @@ object Tokens extends TokensCommon { final val QUOTE = 86; enter(QUOTE, "'") /** XML mode */ - final val XMLSTART = 96; enter(XMLSTART, "$XMLSTART$<") // TODO: deprecate + final val XMLSTART = 98; enter(XMLSTART, "$XMLSTART$<") // TODO: deprecate final val alphaKeywords: TokenSet = tokenRange(IF, MACRO) final val symbolicKeywords: TokenSet = tokenRange(USCORE, TLARROW) @@ -216,7 +219,7 @@ object Tokens extends TokensCommon { USCORE, NULL, THIS, SUPER, TRUE, FALSE, RETURN, QUOTEID, XMLSTART) final val canStartExpressionTokens: TokenSet = atomicExprTokens | BitSet( - LBRACE, LPAREN, QUOTE, IF, DO, WHILE, FOR, NEW, TRY, THROW, IMPLIED, GIVEN) + LBRACE, LPAREN, INDENT, QUOTE, IF, DO, WHILE, FOR, NEW, TRY, THROW, IMPLIED, GIVEN) final val canStartTypeTokens: TokenSet = literalTokens | identifierTokens | BitSet( THIS, SUPER, USCORE, LPAREN, AT) @@ -249,7 +252,7 @@ object Tokens extends TokensCommon { AT, CASE) final val canEndStatTokens: TokenSet = atomicExprTokens | BitSet( - TYPE, RPAREN, RBRACE, RBRACKET) + TYPE, RPAREN, RBRACE, RBRACKET, OUTDENT) /** Tokens that stop a lookahead scan search for a `<-`, `then`, or `do`. * Used for disambiguating between old and new syntax. @@ -259,6 +262,12 @@ object Tokens extends TokensCommon { final val numericLitTokens: TokenSet = BitSet(INTLIT, LONGLIT, FLOATLIT, DOUBLELIT) + final val statCtdTokens: BitSet = BitSet(THEN, ELSE, DO, CATCH, FINALLY, YIELD, MATCH) + + final val canStartIndentTokens: BitSet = + statCtdTokens | BitSet(COLONEOL, EQUALS, ARROW, LARROW, WHILE, TRY, FOR) + // `if` is excluded because it often comes after `else` which makes for awkward indentation rules + final val scala3keywords = BitSet(ENUM, ERASED, GIVEN, IMPLIED) final val softModifierNames = Set(nme.inline, nme.opaque) diff --git a/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala b/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala index 9279d0b72416..35d77e2c149a 100644 --- a/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala @@ -318,6 +318,9 @@ object MarkupParsers { /** Some try/catch/finally logic used by xLiteral and xLiteralPattern. */ @forceInline private def xLiteralCommon(f: () => Tree, ifTruncated: String => Unit): Tree = { + assert(parser.in.token == Tokens.XMLSTART) + val saved = parser.in.newTokenData + saved.copyFrom(parser.in) var output: Tree = null.asInstanceOf[Tree] try output = f() catch { @@ -328,7 +331,7 @@ object MarkupParsers { case _: ArrayIndexOutOfBoundsException => parser.syntaxError("missing end tag in XML literal for <%s>" format debugLastElem, debugLastPos) } - finally parser.in resume Tokens.XMLSTART + finally parser.in.resume(saved) if (output == null) parser.errorTermTree @@ -396,7 +399,12 @@ object MarkupParsers { def escapeToScala[A](op: => A, kind: String): A = { xEmbeddedBlock = false val res = saving[List[Int], A](parser.in.sepRegions, parser.in.sepRegions = _) { - parser.in resume LBRACE + val lbrace = parser.in.newTokenData + lbrace.token = LBRACE + lbrace.offset = parser.in.charOffset - 1 + lbrace.lastOffset = parser.in.lastOffset + lbrace.lineOffset = parser.in.lineOffset + parser.in.resume(lbrace) op } if (parser.in.token != RBRACE) diff --git a/compiler/src/dotty/tools/dotc/printing/SyntaxHighlighting.scala b/compiler/src/dotty/tools/dotc/printing/SyntaxHighlighting.scala index 96f091c392b3..ddad81ecd005 100644 --- a/compiler/src/dotty/tools/dotc/printing/SyntaxHighlighting.scala +++ b/compiler/src/dotty/tools/dotc/printing/SyntaxHighlighting.scala @@ -48,7 +48,9 @@ object SyntaxHighlighting { highlightRange(span.start, span.end, color) } - val scanner = new Scanner(source) + val scanner = new Scanner(source) { + override protected def printState() = () + } while (scanner.token != EOF) { val start = scanner.offset val token = scanner.token diff --git a/compiler/src/dotty/tools/dotc/rewrites/Rewrites.scala b/compiler/src/dotty/tools/dotc/rewrites/Rewrites.scala index d640544151ca..bb7e92890b7d 100644 --- a/compiler/src/dotty/tools/dotc/rewrites/Rewrites.scala +++ b/compiler/src/dotty/tools/dotc/rewrites/Rewrites.scala @@ -17,7 +17,7 @@ object Rewrites { } private class Patches(source: SourceFile) { - private val pbuf = new mutable.ListBuffer[Patch]() + private[Rewrites] val pbuf = new mutable.ListBuffer[Patch]() def addPatch(span: Span, replacement: String): Unit = pbuf += Patch(span, replacement) @@ -27,7 +27,7 @@ object Rewrites { val patches = pbuf.toList.sortBy(_.span.start) if (patches.nonEmpty) patches reduceLeft {(p1, p2) => - assert(p1.span.end <= p2.span.start, s"overlapping patches: $p1 and $p2") + assert(p1.span.end <= p2.span.start, s"overlapping patches in $source: $p1 and $p2") p2 } val ds = new Array[Char](cs.length + delta) @@ -74,6 +74,12 @@ object Rewrites { def patch(span: Span, replacement: String)(implicit ctx: Context): Unit = patch(ctx.compilationUnit.source, span, replacement) + /** Does `span` overlap with a patch region of `source`? */ + def overlapsPatch(source: SourceFile, span: Span) given (ctx: Context): Boolean = + ctx.settings.rewrite.value.exists(rewrites => + rewrites.patched.get(source).exists(patches => + patches.pbuf.exists(patch => patch.span.overlaps(span)))) + /** If -rewrite is set, apply all patches and overwrite patched source files. */ def writeBack()(implicit ctx: Context): Unit = diff --git a/compiler/src/dotty/tools/dotc/util/Spans.scala b/compiler/src/dotty/tools/dotc/util/Spans.scala index c3b1e371e7a2..7b4ef3c2e200 100644 --- a/compiler/src/dotty/tools/dotc/util/Spans.scala +++ b/compiler/src/dotty/tools/dotc/util/Spans.scala @@ -77,6 +77,17 @@ object Spans { def contains(that: Span): Boolean = !that.exists || exists && (start <= that.start && end >= that.end) + /** Does the range of this span overlap with the range of that span at more than a single point? */ + def overlaps(that: Span): Boolean = { + def containsInner(span: Span, offset: Int) = span.start < offset && offset < span.end + exists && that.exists && ( + containsInner(this, that.start) + || containsInner(this, that.end) + || containsInner(that, this.start) + || containsInner(that, this.end) + ) + } + /** Is this span synthetic? */ def isSynthetic: Boolean = pointDelta == SyntheticPointDelta diff --git a/docs/docs/internals/syntax.md b/docs/docs/internals/syntax.md index bf0b5bf9892c..9530bbb5bfa7 100644 --- a/docs/docs/internals/syntax.md +++ b/docs/docs/internals/syntax.md @@ -84,6 +84,7 @@ comment ::= ‘/*’ “any sequence of characters; nested comments ar | ‘//’ “any sequence of characters up to end of line” nl ::= “new line character” +cnl ::= nl | "colon at eol" semi ::= ‘;’ | nl {nl} ``` @@ -151,8 +152,8 @@ FunArgTypes ::= InfixType | ‘(’ TypedFunParam {‘,’ TypedFunParam } ‘)’ TypedFunParam ::= id ‘:’ Type MatchType ::= InfixType `match` TypeCaseClauses -InfixType ::= RefinedType {id [nl] RefinedType} InfixOp(t1, op, t2) -RefinedType ::= WithType {[nl] Refinement} RefinedTypeTree(t, ds) +InfixType ::= RefinedType {id [cnl] RefinedType} InfixOp(t1, op, t2) +RefinedType ::= WithType {[cnl] Refinement} RefinedTypeTree(t, ds) WithType ::= AnnotType {‘with’ AnnotType} (deprecated) AnnotType ::= SimpleType {Annotation} Annotated(t, annot) SimpleType ::= SimpleType TypeArgs AppliedTypeTree(t, args) @@ -189,7 +190,7 @@ FunParams ::= Bindings | ‘_’ Expr1 ::= ‘if’ ‘(’ Expr ‘)’ {nl} Expr [[semi] ‘else’ Expr] If(Parens(cond), thenp, elsep?) - | ‘if’ Expr ‘then’ Expr [[semi] ‘else’ Expr] If(cond, thenp, elsep?) + | ‘if’ Expr ‘then’ Expr [[semi] ‘else’ Expr] If(cond, thenp, elsep?) | ‘while’ ‘(’ Expr ‘)’ {nl} Expr WhileDo(Parens(cond), body) | ‘while’ Expr ‘do’ Expr WhileDo(cond, body) | ‘try’ Expr Catches [‘finally’ Expr] Try(expr, catches, expr?) @@ -209,7 +210,7 @@ Ascription ::= ‘:’ InfixType Catches ::= ‘catch’ Expr PostfixExpr ::= InfixExpr [id] PostfixOp(expr, op) InfixExpr ::= PrefixExpr - | InfixExpr id [nl] InfixExpr InfixOp(expr, op, expr) + | InfixExpr id [cnl] InfixExpr InfixOp(expr, op, expr) | InfixExpr ‘given’ (InfixExpr | ParArgumentExprs) PrefixExpr ::= [‘-’ | ‘+’ | ‘~’ | ‘!’] SimpleExpr PrefixOp(expr, op) SimpleExpr ::= ‘new’ (ConstrApp [TemplateBody] | TemplateBody) New(constr | templ) @@ -234,7 +235,7 @@ ExprInParens ::= PostfixExpr ‘:’ Type ParArgumentExprs ::= ‘(’ ExprsInParens ‘)’ exprs | ‘(’ [ExprsInParens ‘,’] PostfixExpr ‘:’ ‘_’ ‘*’ ‘)’ exprs :+ Typed(expr, Ident(wildcardStar)) ArgumentExprs ::= ParArgumentExprs - | [nl] BlockExpr + | [cnl] BlockExpr BlockExpr ::= ‘{’ CaseClauses | Block ‘}’ Block ::= {BlockStat semi} [BlockResult] Block(stats, expr?) BlockStat ::= Import @@ -263,7 +264,7 @@ Pattern ::= Pattern1 { ‘|’ Pattern1 } Pattern1 ::= PatVar ‘:’ RefinedType Bind(name, Typed(Ident(wildcard), tpe)) | Pattern2 Pattern2 ::= [id ‘@’] InfixPattern Bind(name, pat) -InfixPattern ::= SimplePattern { id [nl] SimplePattern } InfixOp(pat, op, pat) +InfixPattern ::= SimplePattern { id [cnl] SimplePattern } InfixOp(pat, op, pat) SimplePattern ::= PatVar Ident(wildcard) | Literal Bind(name, Ident(wildcard)) | ‘(’ [Patterns] ‘)’ Parens(pats) Tuple(pats) @@ -282,7 +283,7 @@ ArgumentPatterns ::= ‘(’ [Patterns] ‘)’ ### Type and Value Parameters ```ebnf ClsTypeParamClause::= ‘[’ ClsTypeParam {‘,’ ClsTypeParam} ‘]’ -ClsTypeParam ::= {Annotation} [‘+’ | ‘-’] TypeDef(Modifiers, name, tparams, bounds) +ClsTypeParam ::= {Annotation} [‘+’ | ‘-’] TypeDef(Modifiers, name, tparams, bounds) id [HkTypeParamClause] TypeParamBounds Bound(below, above, context) DefTypeParamClause::= ‘[’ DefTypeParam {‘,’ DefTypeParam} ‘]’ @@ -370,9 +371,7 @@ PatDef ::= ids [‘:’ Type] ‘=’ Expr VarDef ::= PatDef | ids ‘:’ Type ‘=’ ‘_’ DefDef ::= DefSig [(‘:’ | ‘<:’) Type] ‘=’ Expr DefDef(_, name, tparams, vparamss, tpe, expr) - | DefSig [nl] ‘{’ Block ‘}’ DefDef(_, name, tparams, vparamss, tpe, Block) - | ‘this’ DefParamClause DefParamClauses DefDef(_, , Nil, vparamss, EmptyTree, expr | Block) - (‘=’ ConstrExpr | [nl] ConstrBlock) + | ‘this’ DefParamClause DefParamClauses ‘=’ ConstrExpr DefDef(_, , Nil, vparamss, EmptyTree, expr | Block) TmplDef ::= ([‘case’] ‘class’ | ‘trait’) ClassDef | [‘case’] ‘object’ ObjectDef @@ -396,11 +395,10 @@ ConstrApp ::= SimpleConstrApp | ‘(’ SimpleConstrApp {‘given’ (PrefixExpr | ParArgumentExprs)} ‘)’ SimpleConstrApp ::= AnnotType {ArgumentExprs} Apply(tp, args) ConstrExpr ::= SelfInvocation - | ConstrBlock + | ‘{’ SelfInvocation {semi BlockStat} ‘}’ SelfInvocation ::= ‘this’ ArgumentExprs {ArgumentExprs} -ConstrBlock ::= ‘{’ SelfInvocation {semi BlockStat} ‘}’ -TemplateBody ::= [nl] ‘{’ [SelfType] TemplateStat {semi TemplateStat} ‘}’ (self, stats) +TemplateBody ::= [cnl] ‘{’ [SelfType] TemplateStat {semi TemplateStat} ‘}’ (self, stats) TemplateStat ::= Import | Export | {Annotation [nl]} {Modifier} Def @@ -410,7 +408,7 @@ TemplateStat ::= Import SelfType ::= id [‘:’ InfixType] ‘=>’ ValDef(_, name, tpt, _) | ‘this’ ‘:’ InfixType ‘=>’ -EnumBody ::= [nl] ‘{’ [SelfType] EnumStat {semi EnumStat} ‘}’ +EnumBody ::= [cnl] ‘{’ [SelfType] EnumStat {semi EnumStat} ‘}’ EnumStat ::= TemplateStat | {Annotation [nl]} {Modifier} EnumCase EnumCase ::= ‘case’ (id ClassConstr [‘extends’ ConstrApps]] | ids) @@ -422,8 +420,8 @@ TopStat ::= Import | Packaging | PackageObject | -Packaging ::= ‘package’ QualId [nl] ‘{’ TopStatSeq ‘}’ Package(qid, stats) +Packaging ::= ‘package’ QualId [cnl] ‘{’ TopStatSeq ‘}’ Package(qid, stats) PackageObject ::= ‘package’ ‘object’ ObjectDef object with package in mods. -CompilationUnit ::= {‘package’ QualId semi} TopStatSeq Package(qid, stats) +CompilationUnit ::= {‘package’ QualId (semi | cnl)} TopStatSeq Package(qid, stats) ``` diff --git a/docs/docs/reference/other-new-features/indentation.md b/docs/docs/reference/other-new-features/indentation.md new file mode 100644 index 000000000000..24ea40dc8631 --- /dev/null +++ b/docs/docs/reference/other-new-features/indentation.md @@ -0,0 +1,188 @@ +--- +layout: doc-page +title: Significant Indentation +--- + +As an experimental feature, Scala 3 treats indentation as significant. + +Indentation is significant everywhere except inside regions delineated by braces `{...}`, brackets `[...]` or parentheses `(...)` or within string or character literals. + +Where indentation is significant, the compiler will insert `` or `` +tokens at certain line breaks. Grammatically, pairs of `` and `` tokens have the same effect as pairs of braces `{` and `}`. + +The algorithm makes use of a stack `IW` of previously encountered indentation widths. The stack initially holds a single element with a zero indentation width. The _current indentation width_ is the indentation width of the top of the stack. + +There are two rules: + + 1. An `` is inserted at a line break, if + + - the first token on the next line has an indentation width strictly greater + than the current indentation width, and + - the last token on the previous line can start an indentation region. + + The following tokens can start an indentation region: + ``` + : = => <- if then else while do try catch finally for yield match + ``` + + If an `` is inserted, the indentation width of the token on the next line + is pushed onto `IW`, which makes it the new current indentation width. + + 2. An `` is inserted at a line break, if + + - the first token on the next line has an indentation width strictly less + than the current indentation width, and + - the first token on the next line is not a + [leading infix operator](../changed-features/operators.html). + + If an `` is inserted, the top element if popped from `IW`. + If the indentation width of the token on the next line is still less than the new current indentation width, step (2) repeats. Therefore, several `` tokens + may be inserted in a row. + +It is an error if the indentation width of the token following an `` does not +match the indentation of some previous line in the enclosing indentation region. For instance, the following would be rejected. +```scala +if x < 0 then + -x + else // error: `else` does not align correctly + x +``` + +Indentation prefixes can consist of spaces and tabs. Indentation widths are the indentation prefixes themselves, ordered by the string prefix relation. So, so for instance "2 tabs, followed by 4 spaces" is strictly less than "2 tabs, followed by 5 spaces", but "2 tabs, followed by 4 spaces" is incomparable to "6 tabs" or to "4 spaces, followed by 2 tabs". It is an error if the indentation width of some line is incomparable with the indentation width of the region that's current at that point. To avoid such errors, it is a good idea not to mix spaces and tabs in the same source file. + +### Indentation Marker `:` + +A colon `:` at the end of a line is one of the possible tokens that opens an indentation region. Examples: + +```scala + times(10): + println("ah") + println("ha") +``` +or +```scala + xs.map: + x => + val y = x - 1 + y * y +``` +Colons at the end of lines are their own token, distinct from normal `:`. +The Scala grammar is changed so that colons at end of lines are accepted at all points +where an opening brace is legal, except if the previous token can already start an +indentation region. Special provisions are taken so that method result types can still use a colon on +the end of a line, followed by the actual type on the next. + +### Special Treatment of Case Clauses + +The indentation rules for `match` expressions and `catch` clauses are refined as follows: + + - An indentation region is opened after a `match` or `catch` also if the following `case` + appears at the indentation width that's current for the `match` itself. + - In that case, the indentation region closes at the first token at that + same indentation width that is not a `case`, or at any token with a smaller + indentation width, whichever comes first. + +The rules allow to write `match` expressions where cases are not indented themselves, as in the example below: +```scala +x match +case 1 => print("I") +case 2 => print("II") +case 3 => print("III") +case 4 => print("IV") +case 5 => print("V") + +println(".") +``` + +### The End Marker + +Indentation-based syntax has many advantages over other conventions. But one possible problem is that it makes it hard to discern when a large indentation region ends, since there is no specific token that delineates the end. Braces are not much better since a brace by itself also contains no information about what region is closed. + +To solve this problem, Scala 3 offers an optional `end` marker. Example +```scala +def largeMethod(...) = + ... + if ... then ... + else + ... // a large block + end if + ... // more code +end largeMethod +``` +An `end` marker consists of the identifier `end` which follows an `` token, and is in turn followed on the same line by exactly one other token, which is either an identifier or one of the reserved words +```scala + if while for match try new +``` +If `end` is followed by a reserved word, the compiler checks that the marker closes an indentation region belonging to a construct that starts with the reserved word. If it is followed by an identifier _id_, the compiler checks that the marker closes an indentation region containing the right hand side of a `val`, `var`, or `def` or +the body of a class, trait, object, enum, given instance, or package clause that defines _id_. + +`end` itself is a soft keyword. It is only treated as an `end` marker if it +occurs at the start of a line and is followed by an identifier or one of the reserved words above. + +It is recommended that `end` markers are used for code where the extent of an indentation region is not immediately apparent "at a glance". Typically this is the case if an indentation region spans 20 lines or more. + +### Example + +Here is a (somewhat meta-circular) example of code using indentation. It provides a concrete representation of indentation widths as defined above together with efficient operations for constructing and comparing indentation widths. + +```scala +enum IndentWidth: + + /** A run of `n` characters `ch` */ + case Run(ch: Char, n: Int) + + /** `l` followed by `r` */ + case Conc(l: IndentWidth, r: Run) + + def <= (that: IndentWidth): Boolean = + this match + case Run(ch1, n1) => + that match + case Run(ch2, n2) => n1 <= n2 && (ch1 == ch2 || n1 == 0) + case Conc(l, r) => this <= l + case Conc(l1, r1) => + that match + case Conc(l2, r2) => l1 == l2 && r1 <= r2 + case _ => false + + def < (that: IndentWidth): Boolean = this <= that && !(that <= this) + + override def toString: String = + this match + case Run(ch, n) => + val kind = ch match + case ' ' => "space" + case '\t' => "tab" + case _ => s"'$ch'-character" + val suffix = if n == 1 then "" else "s" + s"$n $kind$suffix" + case Conc(l, r) => + s"$l, $r" + +object IndentWidth: + private inline val MaxCached = 40 + + private val spaces = IArray.tabulate(MaxCached + 1): + new Run(' ', _) + private val tabs = IArray.tabulate(MaxCached + 1): + new Run('\t', _) + + def Run(ch: Char, n: Int): Run = + if n <= MaxCached && ch == ' ' then + spaces(n) + else if n <= MaxCached && ch == '\t' then + tabs(n) + else + new Run(ch, n) + + val Zero = Run(' ', 0) +end IndentWidth +``` + +### Rewrites + +The Dotty compiler can rewrite source code to indented code and back. +When invoked with options `-rewrite -indent` it will rewrite braces to +indented regions where possible. When invoked with with options `-rewrite -noindent` it will rewrite in the reverse direction, inserting braces for indentation regions. +The `-indent` option only works on [new-style syntax](./control-syntax.html). So to go from old-style syntax to new-style indented code one has to invoke the compiler twice, first with options `-rewrite -new-syntax`, then again with options +`-rewrite-indent`. To go in the opposite direction, from indented code to old-style syntax, it's `-rewrite -noindent`, followed by `-rewrite -old-syntax`. diff --git a/docs/sidebar.yml b/docs/sidebar.yml index bc007227b073..62c1f3d1825c 100644 --- a/docs/sidebar.yml +++ b/docs/sidebar.yml @@ -103,6 +103,8 @@ sidebar: url: docs/reference/other-new-features/threadUnsafe-annotation.html - title: New Control Syntax url: docs/reference/other-new-features/control-syntax.html + - title: Significant Indentation + url: docs/reference/other-new-features/indentation.html - title: Other Changed Features subsection: - title: Structural Types diff --git a/tests/neg/endmarkers.scala b/tests/neg/endmarkers.scala new file mode 100644 index 000000000000..f371c05bb6cd --- /dev/null +++ b/tests/neg/endmarkers.scala @@ -0,0 +1,110 @@ +object Test: + + locally: + var x = 0 + while x < 10 do x += 1 + end while // error: end of statement expected but while found // error: not found: end + val f = 10 // error: ';' expected, but 'val' found + while + x += 1 + x < 10 + do () + end while // error: misaligned end marker + + def f(x: Int): Int = + val y = + if x > 0 then + println("hello") + 22 + else + println("world") + 33 + end f // error: misaligned end marker + + val z = 22 + x + y + z + end f // error: misaligned end marker + + def g = "!" + + val xs = List(1, 2, 3) + + xs.map: + x => + val y = x * x + y * y + + xs.map: + x => + val y = x * x + y + y + + println(f(2) + g) + + (new Test2).foo + (new Test3).foo + + var x = 1 + while + x += 1 + val y = x + println(y) + x < 10 + do () + +class Test2: + self => + def foo = 1 + + object x: + new Test2: + override def foo = 2 + end new // error: end of statement expected but new found // error: not found: end + def bar = 2 // error: ';' expected, but unindent found + end Test2 // error: misaligned end marker +end Test2 + +class Test3: + self => + def foo = 1 + end Test3 // error: not found: end + +import collection.mutable.HashMap + +class Coder(words: List[String]): + + class Foo: + println() + end Foo // error: not found: end + + (2 -> "ABC", new ArrowAssoc('3') -> "DEF") + + private val mnemonics = Map( + '2' -> "ABC", '3' -> "DEF", '4' -> "GHI", '5' -> "JKL", + '6' -> "MNO", '7' -> "PQRS", '8' -> "TUV", '9' -> "WXYZ") + + ('1', "1") match + case (digit, str) => true + case _ => false + + ('1', "1") match + case (digit, str) => true + case _ => false + + try List(1, 2, 3) match + case x :: xs => println(x) + case Nil => println("Nil") + catch + case ex: java.io.IOException => println(ex) + case ex: Throwable => throw ex + end try + + /** Invert the mnemonics map to give a map from chars 'A' ... 'Z' to '2' ... '9' */ + private val charCode0: Map[Char, Char] = + mnemonics + .withFilter: + case (digit, str) => true + case _ => false + .flatMap: + case (digit, str) => str map (ltr => ltr -> digit) + end Coder // error: The start of this line does not match any of the previous indentation widths. \ No newline at end of file diff --git a/tests/neg/i4373b.scala b/tests/neg/i4373b.scala index 50c8ef0813e7..a3d8f3850eff 100644 --- a/tests/neg/i4373b.scala +++ b/tests/neg/i4373b.scala @@ -1,5 +1,5 @@ // ==> 05bef7805687ba94da37177f7568e3ba7da1f91c.scala <== class x0 { x1: // error - x0 | _ // error + x0 | _ // error \ No newline at end of file diff --git a/tests/pos/indent.scala b/tests/pos/indent.scala new file mode 100644 index 000000000000..0c3ceca52b5d --- /dev/null +++ b/tests/pos/indent.scala @@ -0,0 +1,107 @@ +object Test: + + locally: + var x = 0 + while x < 10 do x += 1 + val f = 10 + while + x += 1 + x < 10 + do () + + def f(x: Int): Int = + val y = + if x > 0 then + println("hello") + 22 + else + println("world") + 33 + val z = 22 + x + y + z + end f + + def g = "!" + + val xs = List(1, 2, 3) + + xs.map: + x => + val y = x * x + y * y + + xs.map: + x => + val y = x * x + y + y + + println(f(2) + g) + + (new Test2).foo + (new Test3).foo + + var x = 1 + while + x += 1 + val y = x + println(y) + x < 10 + do () + +class Test2: + self => + def foo = 1 + + val x = + new Test2: + override def foo = 2 + end new + end x +end Test2 + +class Test3: + self => + def foo = 1 + +import collection.mutable.HashMap + +class Coder(words: List[String]): + + class Foo: + println() + end Foo + + class Bar + + (2 -> "ABC", new ArrowAssoc('3') -> "DEF") + + private val mnemonics = Map( + '2' -> "ABC", '3' -> "DEF", '4' -> "GHI", '5' -> "JKL", + '6' -> "MNO", '7' -> "PQRS", '8' -> "TUV", '9' -> "WXYZ") + + ('1', "1") match + case (digit, str) => true + case _ => false + + ('1', "1") match + case (digit, str) => true + case _ => false + + try List(1, 2, 3) match + case x :: xs => println(x) + case Nil => println("Nil") + catch + case ex: java.io.IOException => println(ex) + case ex: Throwable => + throw ex + end try + + /** Invert the mnemonics map to give a map from chars 'A' ... 'Z' to '2' ... '9' */ + private val charCode0: Map[Char, Char] = + mnemonics + .withFilter: + case (digit, str) => true + case _ => false + .flatMap: + case (digit, str) => str map (ltr => ltr -> digit) +end Coder \ No newline at end of file diff --git a/tests/pos/syntax-rewrite.scala b/tests/pos/syntax-rewrite.scala new file mode 100644 index 000000000000..f94e2a8e99d5 --- /dev/null +++ b/tests/pos/syntax-rewrite.scala @@ -0,0 +1,40 @@ +// This test source should be invariant under the following 4 compilation steps with options +// -rewrite -new-syntax +// -rewrite -indent +// -rewrite -noindent +// -rewrite -old-syntax +object test { + + for { + x <- List(1, 2, 3) + } + println(x) + + for (x <- List(1, 2, 3)) yield x + + for { + x <- List(1, 2, 3) + if x == 0 + } + println(x) + + def foo = { + println("hi") + println("ho") + // this comment goes inside braces + } + // this comment follows the brace + // this comment as well + object o { + } + + def loop[T]()(x: T): T = x + + def g() = /*>*/ loop() /*<*/ { + println() + 1 + } + + def bar() = { /* */ + } +}