Skip to content

Commit eeda5fd

Browse files
authored
Fix #15867, #15868, #15869: Add missing byte chars notations, enforce limits in decimal notation in byte char & string (#15898)
1 parent ccccab9 commit eeda5fd

File tree

26 files changed

+694
-115
lines changed

26 files changed

+694
-115
lines changed

docs/release-notes/.FSharp.Compiler.Service/9.0.100.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* Nullness export - make sure option<> and other UseNullAsTrueValue types are properly annotated as nullable for C# and reflection consumers [PR #17528](https://github.com/dotnet/fsharp/pull/17528)
1212
* MethodAccessException on equality comparison of a type private to module. ([Issue #17541](https://github.com/dotnet/fsharp/issues/17541), [PR #17548](https://github.com/dotnet/fsharp/pull/17548))
1313
* Fixed checking failure when `global` namespace is involved with enabled GraphBasedChecking ([PR #17553](https://github.com/dotnet/fsharp/pull/17553))
14+
* Add missing byte chars notations, enforce limits in decimal notation in byte char & string (Issues [#15867](https://github.com/dotnet/fsharp/issues/15867), [#15868](https://github.com/dotnet/fsharp/issues/15868), [#15869](https://github.com/dotnet/fsharp/issues/15869), [PR #15898](https://github.com/dotnet/fsharp/pull/15898))
1415

1516
### Added
1617

src/Compiler/FSComp.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,7 @@ lexfltSeparatorTokensOfPatternMatchMisaligned,"The '|' tokens separating rules o
10201020
# -----------------------------------------------------------------------------
10211021
lexCharNotAllowedInOperatorNames,"'%s' is not permitted as a character in operator names and is reserved for future use"
10221022
lexUnexpectedChar,"Unexpected character '%s'"
1023-
1140,lexByteArrayCannotEncode,"This byte array literal contains characters that do not encode as a single byte"
1023+
1140,lexByteArrayCannotEncode,"This byte array literal contains %d characters that do not encode as a single byte"
10241024
1141,lexIdentEndInMarkReserved,"Identifiers followed by '%s' are reserved for future use"
10251025
1142,lexOutsideEightBitSigned,"This number is outside the allowable range for 8-bit signed integers"
10261026
1143,lexOutsideEightBitSignedHex,"This number is outside the allowable range for hexadecimal 8-bit signed integers"
@@ -1037,7 +1037,8 @@ lexUnexpectedChar,"Unexpected character '%s'"
10371037
1154,lexOutsideDecimal,"This number is outside the allowable range for decimal literals"
10381038
1155,lexOutsideThirtyTwoBitFloat,"This number is outside the allowable range for 32-bit floats"
10391039
1156,lexInvalidNumericLiteral,"This is not a valid numeric literal. Valid numeric literals include 1, 0x1, 0o1, 0b1, 1l (int/int32), 1u (uint/uint32), 1L (int64), 1UL (uint64), 1s (int16), 1us (uint16), 1y (int8/sbyte), 1uy (uint8/byte), 1.0 (float/double), 1.0f (float32/single), 1.0m (decimal), 1I (bigint)."
1040-
1157,lexInvalidByteLiteral,"This is not a valid byte literal"
1040+
1157,lexInvalidAsciiByteLiteral,"This is not a valid byte character literal. The value must be less than or equal to '\127'B."
1041+
1157,lexInvalidTrigraphAsciiByteLiteral,"This is not a valid byte character literal. The value must be less than or equal to '\127'B.\nNote: In a future F# version this warning will be promoted to an error."
10411042
1158,lexInvalidCharLiteral,"This is not a valid character literal"
10421043
1159,lexThisUnicodeOnlyInStringLiterals,"This Unicode encoding is only valid in string literals"
10431044
1160,lexTokenReserved,"This token is reserved for future use"
@@ -1131,6 +1132,8 @@ lexIfOCaml,"IF-FSHARP/IF-CAML regions are no longer supported"
11311132
1249,lexUnmatchedRBracesInTripleQuote,"The interpolated string contains unmatched closing braces."
11321133
1250,lexTooManyPercentsInTripleQuote,"The interpolated triple quoted string literal does not start with enough '$' characters to allow this many consecutive '%%' characters."
11331134
1251,lexExtendedStringInterpolationNotSupported,"Extended string interpolation is not supported in this version of F#."
1135+
1252,lexInvalidCharLiteralInString,"'%s' is not a valid character literal.\nNote: Currently the value is wrapped around byte range to '%s'. In a future F# version this warning will be promoted to an error."
1136+
1253,lexByteArrayOutisdeAscii,"This byte array literal contains %d non-ASCII characters. All characters should be < 128y."
11341137
# reshapedmsbuild.fs
11351138
1300,toolLocationHelperUnsupportedFrameworkVersion,"The specified .NET Framework version '%s' is not supported. Please specify a value from the enumeration Microsoft.Build.Utilities.TargetDotNetFrameworkVersion."
11361139
# -----------------------------------------------------------------------------

src/Compiler/SyntaxTree/LexHelpers.fs

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -222,16 +222,32 @@ let addUnicodeChar buf c = addIntChar buf (int c)
222222

223223
let addByteChar buf (c: char) = addIntChar buf (int32 c % 256)
224224

225+
type LargerThanOneByte = int
226+
type LargerThan127ButInsideByte = int
227+
225228
/// Sanity check that high bytes are zeros. Further check each low byte <= 127
226-
let stringBufferIsBytes (buf: ByteBuffer) =
229+
let errorsInByteStringBuffer (buf: ByteBuffer) =
227230
let bytes = buf.AsMemory()
228-
let mutable ok = true
231+
assert (bytes.Length % 2 = 0)
232+
233+
// Enhancement?: return faulty values?
234+
// But issue: we don't know range of values -> no direct mapping from value to range & notation
235+
236+
// values with high byte <> 0
237+
let mutable largerThanOneByteCount = 0
238+
// values with high byte = 0, but low byte > 127
239+
let mutable largerThan127ButSingleByteCount = 0
229240

230241
for i = 0 to bytes.Length / 2 - 1 do
231242
if bytes.Span[i * 2 + 1] <> 0uy then
232-
ok <- false
243+
largerThanOneByteCount <- largerThanOneByteCount + 1
244+
elif bytes.Span[i * 2] > 127uy then
245+
largerThan127ButSingleByteCount <- largerThan127ButSingleByteCount + 1
233246

234-
ok
247+
if largerThanOneByteCount + largerThan127ButSingleByteCount > 0 then
248+
Some(largerThanOneByteCount, largerThan127ButSingleByteCount)
249+
else
250+
None
235251

236252
let newline (lexbuf: LexBuffer<_>) = lexbuf.EndPos <- lexbuf.EndPos.NextLine
237253

src/Compiler/SyntaxTree/LexHelpers.fsi

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ val stringBufferAsString: ByteBuffer -> string
9797

9898
val stringBufferAsBytes: ByteBuffer -> byte[]
9999

100-
val stringBufferIsBytes: ByteBuffer -> bool
100+
type LargerThanOneByte = int
101+
type LargerThan127ButInsideByte = int
102+
val errorsInByteStringBuffer: ByteBuffer -> Option<LargerThanOneByte * LargerThan127ButInsideByte>
101103

102104
val newline: Lexing.LexBuffer<'a> -> unit
103105

src/Compiler/lex.fsl

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,14 @@ let startString args (lexbuf: UnicodeLexing.Lexbuf) =
137137
if kind.IsInterpolated then
138138
fail args lexbuf (FSComp.SR.lexByteStringMayNotBeInterpolated()) ()
139139
BYTEARRAY (Lexhelp.stringBufferAsBytes buf, synByteStringKind, cont)
140-
elif Lexhelp.stringBufferIsBytes buf then
141-
BYTEARRAY (Lexhelp.stringBufferAsBytes buf, synByteStringKind, cont)
142140
else
143-
fail args lexbuf (FSComp.SR.lexByteArrayCannotEncode()) ()
141+
match Lexhelp.errorsInByteStringBuffer buf with
142+
| Some (largerThanOneByte, largerThan127) ->
143+
if largerThanOneByte > 0 then
144+
fail args lexbuf (FSComp.SR.lexByteArrayCannotEncode(largerThanOneByte)) ()
145+
if largerThan127 > 0 then
146+
warning (Error(FSComp.SR.lexByteArrayOutisdeAscii(largerThan127), lexbuf.LexemeRange))
147+
| None -> ()
144148
BYTEARRAY (Lexhelp.stringBufferAsBytes buf, synByteStringKind, cont)
145149
elif kind.IsInterpolated then
146150
let s = Lexhelp.stringBufferAsString buf
@@ -518,7 +522,7 @@ rule token (args: LexArgs) (skip: bool) = parse
518522
{ let s = lexeme lexbuf
519523
let x = int32 (if s.[1] = '\\' then escape s.[2] else s.[1])
520524
if x < 0 || x > 127 then
521-
fail args lexbuf (FSComp.SR.lexInvalidByteLiteral()) (UINT8(byte 0))
525+
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
522526
else
523527
UINT8 (byte(x)) }
524528

@@ -535,26 +539,50 @@ rule token (args: LexArgs) (skip: bool) = parse
535539
{ let s = lexeme lexbuf
536540
let x = int32 (trigraph s.[2] s.[3] s.[4])
537541
if x < 0 || x > 255 then
538-
fail args lexbuf (FSComp.SR.lexInvalidByteLiteral()) (UINT8(byte 0))
542+
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
543+
elif x > 127 then
544+
// TODO: Promote to Error:
545+
// * Adjust range check in `if` above to `x > 127`
546+
// * Remove this `elif` expression
547+
// * Remove `lexInvalidTrigraphAsciiByteLiteral` from `FSComp.txt`
548+
warning (Error(FSComp.SR.lexInvalidTrigraphAsciiByteLiteral(), lexbuf.LexemeRange))
549+
UINT8 (byte(x))
539550
else
540551
UINT8 (byte(x)) }
541552

553+
| '\'' unicodeGraphShort '\'' { CHAR (char (int32 (unicodeGraphShort (lexemeTrimBoth lexbuf 3 1)))) }
554+
542555
| '\'' unicodeGraphShort '\'' 'B'
543556
{ let x = int32 (unicodeGraphShort (lexemeTrimBoth lexbuf 3 2))
544557
if x < 0 || x > 127 then
545-
fail args lexbuf (FSComp.SR.lexInvalidByteLiteral()) (UINT8(byte 0))
558+
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
546559
else
547560
UINT8 (byte(x)) }
548561

549562
| '\'' hexGraphShort '\'' { CHAR (char (int32 (hexGraphShort (lexemeTrimBoth lexbuf 3 1)))) }
550563

551-
| '\'' unicodeGraphShort '\'' { CHAR (char (int32 (unicodeGraphShort (lexemeTrimBoth lexbuf 3 1)))) }
564+
| '\'' hexGraphShort '\'' 'B'
565+
{ let x = int32 (hexGraphShort (lexemeTrimBoth lexbuf 3 2))
566+
if x < 0 || x > 127 then
567+
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
568+
else
569+
UINT8 (byte(x)) }
552570

553571
| '\'' unicodeGraphLong '\''
554572
{ match unicodeGraphLong (lexemeTrimBoth lexbuf 3 1) with
555573
| SingleChar(c) -> CHAR (char c)
556574
| _ -> fail args lexbuf (FSComp.SR.lexThisUnicodeOnlyInStringLiterals()) (CHAR (char 0)) }
557575

576+
| '\'' unicodeGraphLong '\'' 'B'
577+
{ match unicodeGraphLong (lexemeTrimBoth lexbuf 3 2) with
578+
| SingleChar(c) ->
579+
let x = int32 c
580+
if x < 0 || x > 127 then
581+
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
582+
else
583+
UINT8 (byte(x))
584+
| _ -> fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0)) }
585+
558586
| "(*IF-FSHARP"
559587
{ if lexbuf.SupportsFeature LanguageFeature.MLCompatRevisions then
560588
mlCompatWarning (FSComp.SR.lexIndentOffForML()) lexbuf.LexemeRange
@@ -1201,11 +1229,26 @@ and singleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
12011229
| trigraph
12021230
{ let (buf, _fin, m, kind, args) = sargs
12031231
let s = lexeme lexbuf
1204-
addByteChar buf (trigraph s.[1] s.[2] s.[3])
1205-
if not skip then
1206-
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, args.interpolationDelimiterLength, m))
1232+
let result() =
1233+
if not skip then
1234+
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, args.interpolationDelimiterLength, m))
1235+
else
1236+
singleQuoteString sargs skip lexbuf
1237+
let c = trigraph s.[1] s.[2] s.[3]
1238+
let x = int c
1239+
if x < 0 || x > 255 then
1240+
// TODO: Promote to Error:
1241+
// * remove `addByteChar ...`
1242+
// * remove `warning ...`
1243+
// * Adjust `lexInvalidCharLiteralInString` in `FSComp.txt`: remove `Note` (incl. 2nd placeholder)
1244+
// * uncomment `fail ...`
1245+
addByteChar buf c
1246+
warning (Error(FSComp.SR.lexInvalidCharLiteralInString (s[0..3], sprintf "\\%03i" (x % 256)), lexbuf.LexemeRange))
1247+
//fail args lexbuf (FSComp.SR.lexInvalidCharLiteralInString (s[0..3])) ()
1248+
result()
12071249
else
1208-
singleQuoteString sargs skip lexbuf }
1250+
addByteChar buf c
1251+
result() }
12091252

12101253
| hexGraphShort
12111254
{ let (buf, _fin, m, kind, args) = sargs
@@ -1233,7 +1276,8 @@ and singleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
12331276
singleQuoteString sargs skip lexbuf
12341277
match unicodeGraphLong hexChars with
12351278
| Invalid ->
1236-
fail args lexbuf (FSComp.SR.lexInvalidUnicodeLiteral hexChars) (result())
1279+
fail args lexbuf (FSComp.SR.lexInvalidUnicodeLiteral hexChars) ()
1280+
result()
12371281
| SingleChar(c) ->
12381282
addUnicodeChar buf (int c)
12391283
result()

src/Compiler/xlf/FSComp.txt.cs.xlf

Lines changed: 22 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)