Skip to content

Commit 2bded9d

Browse files
martischandybons
authored andcommitted
[release-branch.go1.10] strings: fix encoding of \u0080 in map
Fix encoding of PAD (U+0080) which has the same value as utf8.RuneSelf being incorrectly encoded as \x80 in strings.Map due to using <= instead of a < comparison operator to check one byte encodings for utf8. Fixes #25479 Change-Id: Ib6c7d1f425a7ba81e431b6d64009e713d94ea3bc Reviewed-on: https://go-review.googlesource.com/111286 Run-TryBot: Martin Möhrmann <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Brad Fitzpatrick <[email protected]> (cherry picked from commit 8c62fc0) Reviewed-on: https://go-review.googlesource.com/114635 Run-TryBot: Andrew Bonventre <[email protected]>
1 parent 48ee689 commit 2bded9d

File tree

2 files changed

+25
-2
lines changed

2 files changed

+25
-2
lines changed

src/strings/strings.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ func Map(mapping func(rune) rune, s string) string {
474474
b = make([]byte, len(s)+utf8.UTFMax)
475475
nbytes = copy(b, s[:i])
476476
if r >= 0 {
477-
if r <= utf8.RuneSelf {
477+
if r < utf8.RuneSelf {
478478
b[nbytes] = byte(r)
479479
nbytes++
480480
} else {
@@ -504,7 +504,7 @@ func Map(mapping func(rune) rune, s string) string {
504504
r := mapping(c)
505505

506506
// common case
507-
if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) {
507+
if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) {
508508
b[nbytes] = byte(r)
509509
nbytes++
510510
continue

src/strings/strings_test.go

+23
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,7 @@ var upperTests = []StringTest{
528528
{"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
529529
{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
530530
{"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
531+
{"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
531532
}
532533

533534
var lowerTests = []StringTest{
@@ -538,6 +539,7 @@ var lowerTests = []StringTest{
538539
{"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
539540
{"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
540541
{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
542+
{"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
541543
}
542544

543545
const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
@@ -650,6 +652,27 @@ func TestMap(t *testing.T) {
650652
if m != expect {
651653
t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
652654
}
655+
656+
// 8. Check utf8.RuneSelf and utf8.MaxRune encoding
657+
encode := func(r rune) rune {
658+
switch r {
659+
case utf8.RuneSelf:
660+
return unicode.MaxRune
661+
case unicode.MaxRune:
662+
return utf8.RuneSelf
663+
}
664+
return r
665+
}
666+
s := string(utf8.RuneSelf) + string(utf8.MaxRune)
667+
r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s
668+
m = Map(encode, s)
669+
if m != r {
670+
t.Errorf("encoding not handled correctly: expected %q got %q", r, m)
671+
}
672+
m = Map(encode, r)
673+
if m != s {
674+
t.Errorf("encoding not handled correctly: expected %q got %q", s, m)
675+
}
653676
}
654677

655678
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }

0 commit comments

Comments
 (0)