Skip to content

Commit b9a59d9

Browse files
committed
cmd/compile: optimize len([]rune(string))
Adds a new runtime function to count runes in a string. Modifies the compiler to detect the pattern len([]rune(string)) and replaces it with the new rune counting runtime function. RuneCount/lenruneslice/ASCII 27.8ns ± 2% 14.5ns ± 3% -47.70% (p=0.000 n=10+10) RuneCount/lenruneslice/Japanese 126ns ± 2% 60ns ± 2% -52.03% (p=0.000 n=10+10) RuneCount/lenruneslice/MixedLength 104ns ± 2% 50ns ± 1% -51.71% (p=0.000 n=10+9) Fixes #24923 Change-Id: Ie9c7e7391a4e2cca675c5cdcc1e5ce7d523948b9 Reviewed-on: https://go-review.googlesource.com/108985 Run-TryBot: Martin Möhrmann <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Josh Bleecher Snyder <[email protected]>
1 parent a8a60ac commit b9a59d9

File tree

8 files changed

+265
-169
lines changed

8 files changed

+265
-169
lines changed

src/cmd/compile/internal/gc/builtin.go

Lines changed: 164 additions & 162 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/gc/builtin/runtime.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,12 @@ func slicebytetostringtmp([]byte) string
5555
func slicerunetostring(*[32]byte, []rune) string
5656
func stringtoslicebyte(*[32]byte, string) []byte
5757
func stringtoslicerune(*[32]rune, string) []rune
58-
func decoderune(string, int) (retv rune, retk int)
5958
func slicecopy(to any, fr any, wid uintptr) int
6059
func slicestringcopy(to any, fr any) int
6160

61+
func decoderune(string, int) (retv rune, retk int)
62+
func countrunes(string) int
63+
6264
// interface conversions
6365
func convI2I(typ *byte, elem any) (ret any)
6466

src/cmd/compile/internal/gc/order.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1098,7 +1098,14 @@ func (o *Order) expr(n, lhs *Node) *Node {
10981098
OSTRARRAYBYTE,
10991099
OSTRARRAYBYTETMP,
11001100
OSTRARRAYRUNE:
1101-
o.call(n)
1101+
1102+
if isRuneCount(n) {
1103+
// len([]rune(s)) is rewritten to runtime.countrunes(s) later.
1104+
n.Left.Left = o.expr(n.Left.Left, nil)
1105+
} else {
1106+
o.call(n)
1107+
}
1108+
11021109
if lhs == nil || lhs.Op != ONAME || instrumenting {
11031110
n = o.copyExpr(n, n.Type, false)
11041111
}

src/cmd/compile/internal/gc/walk.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,12 @@ opswitch:
538538
n.Left = walkexpr(n.Left, init)
539539

540540
case OLEN, OCAP:
541+
if isRuneCount(n) {
542+
// Replace len([]rune(string)) with runtime.countrunes(string).
543+
n = mkcall("countrunes", n.Type, init, conv(n.Left.Left, types.Types[TSTRING]))
544+
break
545+
}
546+
541547
n.Left = walkexpr(n.Left, init)
542548

543549
// replace len(*[10]int) with 10.
@@ -4085,3 +4091,9 @@ func canMergeLoads() bool {
40854091
}
40864092
return false
40874093
}
4094+
4095+
// isRuneCount reports whether n is of the form len([]rune(string)).
4096+
// These are optimized into a call to runtime.runecount.
4097+
func isRuneCount(n *Node) bool {
4098+
return Debug['N'] == 0 && !instrumenting && n.Op == OLEN && n.Left.Op == OSTRARRAYRUNE
4099+
}

src/runtime/string_test.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strconv"
1010
"strings"
1111
"testing"
12+
"unicode/utf8"
1213
)
1314

1415
// Strings and slices that don't escape and fit into tmpBuf are stack allocated,
@@ -110,6 +111,43 @@ var stringdata = []struct{ name, data string }{
110111
{"MixedLength", "$Ѐࠀက퀀𐀀\U00040000\U0010FFFF"},
111112
}
112113

114+
var sinkInt int
115+
116+
func BenchmarkRuneCount(b *testing.B) {
117+
// Each sub-benchmark counts the runes in a string in a different way.
118+
b.Run("lenruneslice", func(b *testing.B) {
119+
for _, sd := range stringdata {
120+
b.Run(sd.name, func(b *testing.B) {
121+
for i := 0; i < b.N; i++ {
122+
sinkInt += len([]rune(sd.data))
123+
}
124+
})
125+
}
126+
})
127+
b.Run("rangeloop", func(b *testing.B) {
128+
for _, sd := range stringdata {
129+
b.Run(sd.name, func(b *testing.B) {
130+
for i := 0; i < b.N; i++ {
131+
n := 0
132+
for range sd.data {
133+
n++
134+
}
135+
sinkInt += n
136+
}
137+
})
138+
}
139+
})
140+
b.Run("utf8.RuneCountInString", func(b *testing.B) {
141+
for _, sd := range stringdata {
142+
b.Run(sd.name, func(b *testing.B) {
143+
for i := 0; i < b.N; i++ {
144+
sinkInt += utf8.RuneCountInString(sd.data)
145+
}
146+
})
147+
}
148+
})
149+
}
150+
113151
func BenchmarkRuneIterate(b *testing.B) {
114152
b.Run("range", func(b *testing.B) {
115153
for _, sd := range stringdata {

src/runtime/utf8.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@ const (
3939
hicb = 0xBF // 1011 1111
4040
)
4141

42+
// countrunes returns the number of runes in s.
43+
func countrunes(s string) int {
44+
n := 0
45+
for range s {
46+
n++
47+
}
48+
return n
49+
}
50+
4251
// decoderune returns the non-ASCII rune at the start of
4352
// s[k:] and the index after the rune in s.
4453
//

src/unicode/utf8/utf8_test.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -212,14 +212,25 @@ func TestSequencing(t *testing.T) {
212212
}
213213
}
214214

215-
// Check that a range loop and a []int conversion visit the same runes.
215+
func runtimeRuneCount(s string) int {
216+
return len([]rune(s)) // Replaced by gc with call to runtime.countrunes(s).
217+
}
218+
219+
// Check that a range loop, len([]rune(string)) optimization and
220+
// []rune conversions visit the same runes.
216221
// Not really a test of this package, but the assumption is used here and
217-
// it's good to verify
218-
func TestIntConversion(t *testing.T) {
222+
// it's good to verify.
223+
func TestRuntimeConversion(t *testing.T) {
219224
for _, ts := range testStrings {
225+
count := RuneCountInString(ts)
226+
if n := runtimeRuneCount(ts); n != count {
227+
t.Errorf("%q: len([]rune()) counted %d runes; got %d from RuneCountInString", ts, n, count)
228+
break
229+
}
230+
220231
runes := []rune(ts)
221-
if RuneCountInString(ts) != len(runes) {
222-
t.Errorf("%q: expected %d runes; got %d", ts, len(runes), RuneCountInString(ts))
232+
if n := len(runes); n != count {
233+
t.Errorf("%q: []rune() has length %d; got %d from RuneCountInString", ts, n, count)
223234
break
224235
}
225236
i := 0

test/codegen/strings.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// asmcheck
2+
3+
// Copyright 2018 The Go Authors. All rights reserved.
4+
// Use of this source code is governed by a BSD-style
5+
// license that can be found in the LICENSE file.
6+
7+
package codegen
8+
9+
// This file contains code generation tests related to the handling of
10+
// string types.
11+
12+
func CountRunes(s string) int { // Issue #24923
13+
// amd64:`.*countrunes`
14+
return len([]rune(s))
15+
}

0 commit comments

Comments
 (0)