Skip to content

Commit c8ee991

Browse files
authored
Merge pull request scala#9805 from som-snytt/issue/12482
2 parents 273e549 + 180b7ed commit c8ee991

File tree

8 files changed

+165
-111
lines changed

8 files changed

+165
-111
lines changed

spec/01-lexical-syntax.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ interpolatedString ::= alphaid ‘"’ {[‘\’] interpolatedStringPart |
506506
interpolatedStringPart ::= printableChar \ (‘"’ | ‘$’ | ‘\’) | escape
507507
escape ::= ‘$$’
508508
| ‘$"’
509-
| ‘$’ id
509+
| ‘$’ alphaid
510510
| ‘$’ BlockExpr
511511
alphaid ::= upper idrest
512512
| varid
@@ -533,9 +533,9 @@ in an interpolated string. A single ‘$’-sign can still be obtained by doubli
533533
character: ‘$$’. A single ‘"’-sign can be obtained by the sequence ‘\$"’.
534534
535535
The simpler form consists of a ‘$’-sign followed by an identifier starting with
536-
a letter and followed only by letters, digits, and underscore characters,
537-
e.g `$id`. The simpler form is expanded by putting braces around the identifier,
538-
e.g `$id` is equivalent to `${id}`. In the following, unless we explicitly state otherwise,
536+
a letter and followed only by letters, digits, and underscore characters, e.g., `$id`.
537+
The simpler form is expanded by putting braces around the identifier,
538+
e.g., `$id` is equivalent to `${id}`. In the following, unless we explicitly state otherwise,
539539
we assume that this expansion has already been performed.
540540
541541
The expanded expression is type checked normally. Usually, `StringContext` will resolve to

spec/13-syntax-summary.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ The lexical syntax of Scala is given by the following grammar in EBNF form:
1515
```ebnf
1616
whiteSpace ::= ‘\u0020’ | ‘\u0009’ | ‘\u000D’ | ‘\u000A’
1717
upper ::= ‘A’ | … | ‘Z’ | ‘$’ and any character in Unicode categories Lu, Lt or Nl,
18-
and any character in Unicode categories Lo and Lm that don't have
18+
and any character in Unicode categories Lo and Lm that doesn't have
1919
contributory property Other_Lowercase
2020
lower ::= ‘a’ | … | ‘z’ | ‘_’ and any character in Unicode category Ll,
2121
and any character in Unicode categories Lo or Lm that has contributory
@@ -72,7 +72,7 @@ interpolatedStringPart
7272
::= printableChar \ (‘"’ | ‘$’ | ‘\’) | escape
7373
escape ::= ‘\$\$’
7474
| ‘\$"’
75-
| ‘\$’ id
75+
| ‘\$’ alphaid
7676
| ‘\$’ BlockExpr
7777
alphaid ::= upper idrest
7878
| varid

src/compiler/scala/tools/nsc/ast/parser/Scanners.scala

Lines changed: 51 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -182,22 +182,26 @@ trait Scanners extends ScannersCommon {
182182
private def isSupplementary(high: Char, test: Int => Boolean, strict: Boolean = true): Boolean =
183183
isHighSurrogate(high) && {
184184
var res = false
185-
nextChar()
186-
val low = ch
185+
val low = lookaheadReader.getc()
187186
if (isLowSurrogate(low)) {
188-
nextChar()
189-
val codepoint = toCodePoint(high, low)
190-
if (isValidCodePoint(codepoint) && test(codepoint)) {
191-
putChar(high)
192-
putChar(low)
193-
res = true
194-
} else
195-
syntaxError(f"illegal character '\\u$high%04x\\u$low%04x'")
196-
} else if (!strict) {
187+
val codePoint = toCodePoint(high, low)
188+
if (isValidCodePoint(codePoint)) {
189+
if (test(codePoint)) {
190+
putChar(high)
191+
putChar(low)
192+
nextChar()
193+
nextChar()
194+
res = true
195+
}
196+
}
197+
else syntaxError(f"illegal character '\\u$high%04x\\u$low%04x'")
198+
}
199+
else if (!strict) {
197200
putChar(high)
201+
nextChar()
198202
res = true
199-
} else
200-
syntaxError(f"illegal character '\\u$high%04x' missing low surrogate")
203+
}
204+
else syntaxError(f"illegal character '\\u$high%04x' missing low surrogate")
201205
res
202206
}
203207
private def atSupplementary(ch: Char, f: Int => Boolean): Boolean =
@@ -621,8 +625,7 @@ trait Scanners extends ScannersCommon {
621625
putChar(ch)
622626
nextChar()
623627
getIdentRest()
624-
if (ch == '"' && token == IDENTIFIER)
625-
token = INTERPOLATIONID
628+
if (ch == '"' && token == IDENTIFIER) token = INTERPOLATIONID
626629
case '<' => // is XMLSTART?
627630
def fetchLT() = {
628631
val last = if (charOffset >= 2) buf(charOffset - 2) else ' '
@@ -729,12 +732,31 @@ trait Scanners extends ScannersCommon {
729732
}
730733
syntaxError(msg)
731734
}
735+
/** Either at closing quote of charlit
736+
* or run the op and take it as a (deprecated) Symbol identifier.
737+
*/
738+
def charLitOrSymbolAfter(op: () => Unit): Unit =
739+
if (ch == '\'') {
740+
nextChar()
741+
token = CHARLIT
742+
setStrVal()
743+
} else {
744+
op()
745+
token = SYMBOLLIT
746+
strVal = name.toString
747+
}
732748
def fetchSingleQuote() = {
733749
nextChar()
734-
if (isIdentifierStart(ch))
735-
charLitOr(() => getIdentRest())
736-
else if (isOperatorPart(ch) && (ch != '\\'))
737-
charLitOr(() => getOperatorRest())
750+
if (isIdentifierStart(ch)) {
751+
putChar(ch)
752+
nextChar()
753+
charLitOrSymbolAfter(() => getIdentRest())
754+
}
755+
else if (isOperatorPart(ch) && (ch != '\\')) {
756+
putChar(ch)
757+
nextChar()
758+
charLitOrSymbolAfter(() => getOperatorRest())
759+
}
738760
else if (!isAtEnd && (ch != SU && ch != CR && ch != LF)) {
739761
val isEmptyCharLit = (ch == '\'')
740762
getLitChar()
@@ -801,12 +823,16 @@ trait Scanners extends ScannersCommon {
801823
putChar(ch)
802824
nextChar()
803825
getIdentRest()
826+
if (ch == '"' && token == IDENTIFIER) token = INTERPOLATIONID
804827
} else if (isSpecial(ch)) {
805828
putChar(ch)
806829
nextChar()
807830
getOperatorRest()
808831
} else if (isSupplementary(ch, isUnicodeIdentifierStart)) {
809832
getIdentRest()
833+
if (ch == '"' && token == IDENTIFIER) token = INTERPOLATIONID
834+
} else if (isSupplementary(ch, isSpecial)) {
835+
getOperatorRest()
810836
} else {
811837
syntaxError(f"illegal character '\\u$ch%04x'")
812838
nextChar()
@@ -872,7 +898,8 @@ trait Scanners extends ScannersCommon {
872898
putChar(ch)
873899
nextChar()
874900
getIdentOrOperatorRest()
875-
case SU => // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true!
901+
case ' ' | LF | // optimize for common whitespace
902+
SU => // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true!
876903
finishNamed()
877904
case _ =>
878905
if (isUnicodeIdentifierPart(ch)) {
@@ -888,6 +915,7 @@ trait Scanners extends ScannersCommon {
888915

889916
@tailrec
890917
private def getOperatorRest(): Unit = (ch: @switch) match {
918+
case ' ' | LF => finishNamed() // optimize
891919
case '~' | '!' | '@' | '#' | '%' |
892920
'^' | '*' | '+' | '-' | '<' |
893921
'>' | '?' | ':' | '=' | '&' |
@@ -899,24 +927,12 @@ trait Scanners extends ScannersCommon {
899927
else { putChar('/'); getOperatorRest() }
900928
case _ =>
901929
if (isSpecial(ch)) { putChar(ch); nextChar(); getOperatorRest() }
930+
else if (isSupplementary(ch, isSpecial)) getOperatorRest()
902931
else finishNamed()
903932
}
904933

905-
private def getIdentOrOperatorRest(): Unit = {
906-
if (isIdentifierPart(ch))
907-
getIdentRest()
908-
else ch match {
909-
case '~' | '!' | '@' | '#' | '%' |
910-
'^' | '*' | '+' | '-' | '<' |
911-
'>' | '?' | ':' | '=' | '&' |
912-
'|' | '\\' | '/' =>
913-
getOperatorRest()
914-
case _ =>
915-
if (isSpecial(ch)) getOperatorRest()
916-
else finishNamed()
917-
}
918-
}
919-
934+
private def getIdentOrOperatorRest(): Unit =
935+
if (isIdentifierPart(ch) || isSupplementary(ch, isIdentifierPart)) getIdentRest() else getOperatorRest()
920936

921937
// Literals -----------------------------------------------------------------
922938

@@ -1040,10 +1056,6 @@ trait Scanners extends ScannersCommon {
10401056
getInterpolatedIdentRest()
10411057
} else if (atSupplementary(ch, isUnicodeIdentifierStart)) {
10421058
finishStringPart()
1043-
putChar(ch)
1044-
nextRawChar()
1045-
putChar(ch)
1046-
nextRawChar()
10471059
getInterpolatedIdentRest()
10481060
} else {
10491061
val expectations = "$$, $\", $identifier or ${expression}"
@@ -1372,23 +1384,6 @@ trait Scanners extends ScannersCommon {
13721384
if (detectedFloat) restOfNonIntegralNumber() else restOfNumber()
13731385
}
13741386

1375-
/** Parse character literal if current character is followed by \',
1376-
* or follow with given op and return a symbol literal token
1377-
*/
1378-
def charLitOr(op: () => Unit): Unit = {
1379-
putChar(ch)
1380-
nextChar()
1381-
if (ch == '\'') {
1382-
nextChar()
1383-
token = CHARLIT
1384-
setStrVal()
1385-
} else {
1386-
op()
1387-
token = SYMBOLLIT
1388-
strVal = name.toString
1389-
}
1390-
}
1391-
13921387
// Errors -----------------------------------------------------------------
13931388

13941389
/** generate an error at the given offset */

src/reflect/scala/reflect/internal/Chars.scala

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ package reflect
1515
package internal
1616

1717
import scala.annotation.switch
18-
import java.lang.{ Character => JCharacter }
1918

2019
/** Contains constants and classifier methods for characters */
2120
trait Chars {
21+
import Chars.CodePoint
2222
// Be very careful touching these.
2323
// Apparently trivial changes to the way you write these constants
2424
// will cause Scanners.scala to go from a nice efficient switch to
@@ -72,28 +72,46 @@ trait Chars {
7272
'0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
7373

7474
/** Can character start an alphanumeric Scala identifier? */
75-
def isIdentifierStart(c: Char): Boolean =
76-
(c == '_') || (c == '$') || Character.isUnicodeIdentifierStart(c)
75+
def isIdentifierStart(c: Char): Boolean = (c == '_') || (c == '$') || Character.isUnicodeIdentifierStart(c)
76+
def isIdentifierStart(c: CodePoint): Boolean = (c == '_') || (c == '$') || Character.isUnicodeIdentifierStart(c)
7777

7878
/** Can character form part of an alphanumeric Scala identifier? */
79-
def isIdentifierPart(c: Char) =
80-
(c == '$') || Character.isUnicodeIdentifierPart(c)
79+
def isIdentifierPart(c: Char) = (c == '$') || Character.isUnicodeIdentifierPart(c)
80+
81+
def isIdentifierPart(c: CodePoint) = (c == '$') || Character.isUnicodeIdentifierPart(c)
8182

8283
/** Is character a math or other symbol in Unicode? */
8384
def isSpecial(c: Char) = {
8485
val chtp = Character.getType(c)
8586
chtp == Character.MATH_SYMBOL.toInt || chtp == Character.OTHER_SYMBOL.toInt
8687
}
87-
88-
private final val otherLetters = Set[Char]('\u0024', '\u005F') // '$' and '_'
89-
private final val letterGroups = {
90-
import JCharacter._
91-
Set[Byte](LOWERCASE_LETTER, UPPERCASE_LETTER, OTHER_LETTER, TITLECASE_LETTER, LETTER_NUMBER)
88+
def isSpecial(codePoint: CodePoint) = {
89+
val chtp = Character.getType(codePoint)
90+
chtp == Character.MATH_SYMBOL.toInt || chtp == Character.OTHER_SYMBOL.toInt
9291
}
93-
def isScalaLetter(ch: Char) = letterGroups(JCharacter.getType(ch).toByte) || otherLetters(ch)
92+
93+
// used for precedence
94+
import Character.{LOWERCASE_LETTER, UPPERCASE_LETTER, OTHER_LETTER, TITLECASE_LETTER, LETTER_NUMBER}
95+
def isScalaLetter(c: Char): Boolean =
96+
Character.getType(c) match {
97+
case LOWERCASE_LETTER | UPPERCASE_LETTER | OTHER_LETTER | TITLECASE_LETTER | LETTER_NUMBER => true
98+
case _ => c == '$' || c == '_'
99+
}
100+
def isScalaLetter(c: CodePoint): Boolean =
101+
Character.getType(c) match {
102+
case LOWERCASE_LETTER | UPPERCASE_LETTER | OTHER_LETTER | TITLECASE_LETTER | LETTER_NUMBER => true
103+
case _ => c == '$' || c == '_'
104+
}
94105

95106
/** Can character form part of a Scala operator name? */
96-
def isOperatorPart(c : Char) : Boolean = (c: @switch) match {
107+
def isOperatorPart(c: Char): Boolean = (c: @switch) match {
108+
case '~' | '!' | '@' | '#' | '%' |
109+
'^' | '*' | '+' | '-' | '<' |
110+
'>' | '?' | ':' | '=' | '&' |
111+
'|' | '/' | '\\' => true
112+
case c => isSpecial(c)
113+
}
114+
def isOperatorPart(c: CodePoint): Boolean = (c: @switch) match {
97115
case '~' | '!' | '@' | '#' | '%' |
98116
'^' | '*' | '+' | '-' | '<' |
99117
'>' | '?' | ':' | '=' | '&' |
@@ -102,4 +120,6 @@ trait Chars {
102120
}
103121
}
104122

105-
object Chars extends Chars { }
123+
object Chars extends Chars {
124+
type CodePoint = Int
125+
}

src/reflect/scala/reflect/internal/Precedence.scala

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,23 @@
1010
* additional information regarding copyright ownership.
1111
*/
1212

13-
package scala
14-
package reflect
15-
package internal
13+
package scala.reflect.internal
1614

1715
import scala.annotation.switch
18-
import Chars._
16+
import Chars.{CodePoint, isOperatorPart, isScalaLetter}
1917

2018
final class Precedence private (val level: Int) extends AnyVal with Ordered[Precedence] {
21-
def compare(that: Precedence): Int = level compare that.level
19+
def compare(that: Precedence): Int = level.compare(that.level)
2220
override def toString = s"Precedence($level)"
2321
}
2422

25-
2623
object Precedence extends (Int => Precedence) {
2724
private[this] val ErrorName = "<error>"
2825
private def isAssignmentOp(name: String) = name match {
2926
case "!=" | "<=" | ">=" | "" => false
30-
case _ => name.last == '=' && name.head != '=' && isOperatorPart(name.head)
27+
case _ => name.last == '=' && name.head != '=' && isOperatorPart(name.codePointAt(0))
3128
}
32-
private def firstChar(ch: Char): Precedence = apply((ch: @switch) match {
29+
private def firstChar(c: CodePoint): Precedence = apply((c: @switch) match {
3330
case '|' => 2
3431
case '^' => 3
3532
case '&' => 4
@@ -38,13 +35,13 @@ object Precedence extends (Int => Precedence) {
3835
case ':' => 7
3936
case '+' | '-' => 8
4037
case '*' | '/' | '%' => 9
41-
case _ => if (isScalaLetter(ch)) 1 else 10
38+
case _ => if (isScalaLetter(c)) 1 else 10
4239
})
4340

4441
def apply(level: Int): Precedence = new Precedence(level)
4542
def apply(name: String): Precedence = name match {
4643
case "" | ErrorName => this(-1)
4744
case _ if isAssignmentOp(name) => this(0)
48-
case _ => firstChar(name charAt 0)
45+
case _ => firstChar(name.codePointAt(0))
4946
}
5047
}

0 commit comments

Comments
 (0)