Skip to content

Commit f371b30

Browse files
committed
unicode/utf8: add AppendRune
AppendRune appends the UTF-8 encoding of a rune to a []byte. It is a generally more user friendly than EncodeRune. EncodeASCIIRune-4 2.35ns ± 2% EncodeJapaneseRune-4 4.60ns ± 2% AppendASCIIRune-4 0.30ns ± 3% AppendJapaneseRune-4 4.70ns ± 2% The ASCII case is written to be inlineable. Fixes #47609 Change-Id: If4f71eedffd2bd4ef0d7f960cb55b41c637eec54 Reviewed-on: https://go-review.googlesource.com/c/go/+/345571 Trust: Joe Tsai <[email protected]> Reviewed-by: Rob Pike <[email protected]> Run-TryBot: Rob Pike <[email protected]> TryBot-Result: Go Bot <[email protected]>
1 parent ef4cb2f commit f371b30

File tree

3 files changed

+52
-0
lines changed

3 files changed

+52
-0
lines changed

src/cmd/compile/internal/test/inl_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ func TestIntendedInlining(t *testing.T) {
122122
"FullRune",
123123
"FullRuneInString",
124124
"RuneLen",
125+
"AppendRune",
125126
"ValidRune",
126127
},
127128
"reflect": {

src/unicode/utf8/utf8.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,32 @@ func EncodeRune(p []byte, r rune) int {
369369
}
370370
}
371371

372+
// AppendRune appends the UTF-8 encoding of r to the end of p and
373+
// returns the extended buffer. If the rune is out of range,
374+
// it appends the encoding of RuneError.
375+
func AppendRune(p []byte, r rune) []byte {
376+
// This function is inlineable for fast handling of ASCII.
377+
if uint32(r) <= rune1Max {
378+
return append(p, byte(r))
379+
}
380+
return appendRuneNonASCII(p, r)
381+
}
382+
383+
func appendRuneNonASCII(p []byte, r rune) []byte {
384+
// Negative values are erroneous. Making it unsigned addresses the problem.
385+
switch i := uint32(r); {
386+
case i <= rune2Max:
387+
return append(p, t2|byte(r>>6), tx|byte(r)&maskx)
388+
case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
389+
r = RuneError
390+
fallthrough
391+
case i <= rune3Max:
392+
return append(p, t3|byte(r>>12), tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
393+
default:
394+
return append(p, t4|byte(r>>18), tx|byte(r>>12)&maskx, tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
395+
}
396+
}
397+
372398
// RuneCount returns the number of runes in p. Erroneous and short
373399
// encodings are treated as single runes of width 1 byte.
374400
func RuneCount(p []byte) int {

src/unicode/utf8/utf8_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,17 @@ func TestEncodeRune(t *testing.T) {
127127
}
128128
}
129129

130+
func TestAppendRune(t *testing.T) {
131+
for _, m := range utf8map {
132+
if buf := AppendRune(nil, m.r); string(buf) != m.str {
133+
t.Errorf("AppendRune(nil, %#04x) = %s, want %s", m.r, buf, m.str)
134+
}
135+
if buf := AppendRune([]byte("init"), m.r); string(buf) != "init"+m.str {
136+
t.Errorf("AppendRune(nil, %#04x) = %s, want %s", m.r, buf, "init"+m.str)
137+
}
138+
}
139+
}
140+
130141
func TestDecodeRune(t *testing.T) {
131142
for _, m := range utf8map {
132143
b := []byte(m.str)
@@ -583,6 +594,20 @@ func BenchmarkEncodeJapaneseRune(b *testing.B) {
583594
}
584595
}
585596

597+
func BenchmarkAppendASCIIRune(b *testing.B) {
598+
buf := make([]byte, UTFMax)
599+
for i := 0; i < b.N; i++ {
600+
AppendRune(buf[:0], 'a')
601+
}
602+
}
603+
604+
func BenchmarkAppendJapaneseRune(b *testing.B) {
605+
buf := make([]byte, UTFMax)
606+
for i := 0; i < b.N; i++ {
607+
AppendRune(buf[:0], '本')
608+
}
609+
}
610+
586611
func BenchmarkDecodeASCIIRune(b *testing.B) {
587612
a := []byte{'a'}
588613
for i := 0; i < b.N; i++ {

0 commit comments

Comments
 (0)