@@ -182,22 +182,26 @@ trait Scanners extends ScannersCommon {
182
182
private def isSupplementary (high : Char , test : Int => Boolean , strict : Boolean = true ): Boolean =
183
183
isHighSurrogate(high) && {
184
184
var res = false
185
- nextChar()
186
- val low = ch
185
+ val low = lookaheadReader.getc()
187
186
if (isLowSurrogate(low)) {
188
- nextChar()
189
- val codepoint = toCodePoint(high, low)
190
- if (isValidCodePoint(codepoint) && test(codepoint)) {
191
- putChar(high)
192
- putChar(low)
193
- res = true
194
- } else
195
- syntaxError(f " illegal character ' \\ u $high%04x \\ u $low%04x' " )
196
- } else if (! strict) {
187
+ val codePoint = toCodePoint(high, low)
188
+ if (isValidCodePoint(codePoint)) {
189
+ if (test(codePoint)) {
190
+ putChar(high)
191
+ putChar(low)
192
+ nextChar()
193
+ nextChar()
194
+ res = true
195
+ }
196
+ }
197
+ else syntaxError(f " illegal character ' \\ u $high%04x \\ u $low%04x' " )
198
+ }
199
+ else if (! strict) {
197
200
putChar(high)
201
+ nextChar()
198
202
res = true
199
- } else
200
- syntaxError(f " illegal character ' \\ u $high%04x' missing low surrogate " )
203
+ }
204
+ else syntaxError(f " illegal character ' \\ u $high%04x' missing low surrogate " )
201
205
res
202
206
}
203
207
private def atSupplementary (ch : Char , f : Int => Boolean ): Boolean =
@@ -621,8 +625,7 @@ trait Scanners extends ScannersCommon {
621
625
putChar(ch)
622
626
nextChar()
623
627
getIdentRest()
624
- if (ch == '"' && token == IDENTIFIER )
625
- token = INTERPOLATIONID
628
+ if (ch == '"' && token == IDENTIFIER ) token = INTERPOLATIONID
626
629
case '<' => // is XMLSTART?
627
630
def fetchLT () = {
628
631
val last = if (charOffset >= 2 ) buf(charOffset - 2 ) else ' '
@@ -729,12 +732,31 @@ trait Scanners extends ScannersCommon {
729
732
}
730
733
syntaxError(msg)
731
734
}
735
+ /** Either at closing quote of charlit
736
+ * or run the op and take it as a (deprecated) Symbol identifier.
737
+ */
738
+ def charLitOrSymbolAfter (op : () => Unit ): Unit =
739
+ if (ch == '\' ' ) {
740
+ nextChar()
741
+ token = CHARLIT
742
+ setStrVal()
743
+ } else {
744
+ op()
745
+ token = SYMBOLLIT
746
+ strVal = name.toString
747
+ }
732
748
def fetchSingleQuote () = {
733
749
nextChar()
734
- if (isIdentifierStart(ch))
735
- charLitOr(() => getIdentRest())
736
- else if (isOperatorPart(ch) && (ch != '\\ ' ))
737
- charLitOr(() => getOperatorRest())
750
+ if (isIdentifierStart(ch)) {
751
+ putChar(ch)
752
+ nextChar()
753
+ charLitOrSymbolAfter(() => getIdentRest())
754
+ }
755
+ else if (isOperatorPart(ch) && (ch != '\\ ' )) {
756
+ putChar(ch)
757
+ nextChar()
758
+ charLitOrSymbolAfter(() => getOperatorRest())
759
+ }
738
760
else if (! isAtEnd && (ch != SU && ch != CR && ch != LF )) {
739
761
val isEmptyCharLit = (ch == '\' ' )
740
762
getLitChar()
@@ -801,12 +823,16 @@ trait Scanners extends ScannersCommon {
801
823
putChar(ch)
802
824
nextChar()
803
825
getIdentRest()
826
+ if (ch == '"' && token == IDENTIFIER ) token = INTERPOLATIONID
804
827
} else if (isSpecial(ch)) {
805
828
putChar(ch)
806
829
nextChar()
807
830
getOperatorRest()
808
831
} else if (isSupplementary(ch, isUnicodeIdentifierStart)) {
809
832
getIdentRest()
833
+ if (ch == '"' && token == IDENTIFIER ) token = INTERPOLATIONID
834
+ } else if (isSupplementary(ch, isSpecial)) {
835
+ getOperatorRest()
810
836
} else {
811
837
syntaxError(f " illegal character ' \\ u $ch%04x' " )
812
838
nextChar()
@@ -872,7 +898,8 @@ trait Scanners extends ScannersCommon {
872
898
putChar(ch)
873
899
nextChar()
874
900
getIdentOrOperatorRest()
875
- case SU => // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true!
901
+ case ' ' | LF | // optimize for common whitespace
902
+ SU => // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true!
876
903
finishNamed()
877
904
case _ =>
878
905
if (isUnicodeIdentifierPart(ch)) {
@@ -888,6 +915,7 @@ trait Scanners extends ScannersCommon {
888
915
889
916
@ tailrec
890
917
private def getOperatorRest (): Unit = (ch : @ switch) match {
918
+ case ' ' | LF => finishNamed() // optimize
891
919
case '~' | '!' | '@' | '#' | '%' |
892
920
'^' | '*' | '+' | '-' | '<' |
893
921
'>' | '?' | ':' | '=' | '&' |
@@ -899,24 +927,12 @@ trait Scanners extends ScannersCommon {
899
927
else { putChar('/' ); getOperatorRest() }
900
928
case _ =>
901
929
if (isSpecial(ch)) { putChar(ch); nextChar(); getOperatorRest() }
930
+ else if (isSupplementary(ch, isSpecial)) getOperatorRest()
902
931
else finishNamed()
903
932
}
904
933
905
- private def getIdentOrOperatorRest (): Unit = {
906
- if (isIdentifierPart(ch))
907
- getIdentRest()
908
- else ch match {
909
- case '~' | '!' | '@' | '#' | '%' |
910
- '^' | '*' | '+' | '-' | '<' |
911
- '>' | '?' | ':' | '=' | '&' |
912
- '|' | '\\ ' | '/' =>
913
- getOperatorRest()
914
- case _ =>
915
- if (isSpecial(ch)) getOperatorRest()
916
- else finishNamed()
917
- }
918
- }
919
-
934
+ private def getIdentOrOperatorRest (): Unit =
935
+ if (isIdentifierPart(ch) || isSupplementary(ch, isIdentifierPart)) getIdentRest() else getOperatorRest()
920
936
921
937
// Literals -----------------------------------------------------------------
922
938
@@ -1040,10 +1056,6 @@ trait Scanners extends ScannersCommon {
1040
1056
getInterpolatedIdentRest()
1041
1057
} else if (atSupplementary(ch, isUnicodeIdentifierStart)) {
1042
1058
finishStringPart()
1043
- putChar(ch)
1044
- nextRawChar()
1045
- putChar(ch)
1046
- nextRawChar()
1047
1059
getInterpolatedIdentRest()
1048
1060
} else {
1049
1061
val expectations = " $$, $\" , $identifier or ${expression}"
@@ -1372,23 +1384,6 @@ trait Scanners extends ScannersCommon {
1372
1384
if (detectedFloat) restOfNonIntegralNumber() else restOfNumber()
1373
1385
}
1374
1386
1375
- /** Parse character literal if current character is followed by \',
1376
- * or follow with given op and return a symbol literal token
1377
- */
1378
- def charLitOr (op : () => Unit ): Unit = {
1379
- putChar(ch)
1380
- nextChar()
1381
- if (ch == '\' ' ) {
1382
- nextChar()
1383
- token = CHARLIT
1384
- setStrVal()
1385
- } else {
1386
- op()
1387
- token = SYMBOLLIT
1388
- strVal = name.toString
1389
- }
1390
- }
1391
-
1392
1387
// Errors -----------------------------------------------------------------
1393
1388
1394
1389
/** generate an error at the given offset */
0 commit comments