Skip to content

Commit 937f91e

Browse files
donovanhidersc
authored andcommitted
strings: faster Count, Index
Slightly better benchmarks for when string and separator are equivalent and also less branching in inner loops. benchmark old ns/op new ns/op delta BenchmarkGenericNoMatch 3430 3442 +0.35% BenchmarkGenericMatch1 23590 22855 -3.12% BenchmarkGenericMatch2 108031 105025 -2.78% BenchmarkSingleMaxSkipping 2969 2704 -8.93% BenchmarkSingleLongSuffixFail 2826 2572 -8.99% BenchmarkSingleMatch 205268 197832 -3.62% BenchmarkByteByteNoMatch 987 921 -6.69% BenchmarkByteByteMatch 2014 1749 -13.16% BenchmarkByteStringMatch 3083 3050 -1.07% BenchmarkHTMLEscapeNew 922 915 -0.76% BenchmarkHTMLEscapeOld 1654 1570 -5.08% BenchmarkByteByteReplaces 11897 11556 -2.87% BenchmarkByteByteMap 4485 4255 -5.13% BenchmarkIndexRune 174 121 -30.46% BenchmarkIndexRuneFastPath 41 41 -0.24% BenchmarkIndex 45 44 -0.22% BenchmarkMapNoChanges 433 431 -0.46% BenchmarkIndexHard1 4015336 3316490 -17.40% BenchmarkIndexHard2 3976254 3395627 -14.60% BenchmarkIndexHard3 3973158 3378329 -14.97% BenchmarkCountHard1 4403549 3448512 -21.69% BenchmarkCountHard2 4387437 3413059 -22.21% BenchmarkCountHard3 4403891 3382661 -23.19% BenchmarkIndexTorture 28354 25864 -8.78% BenchmarkCountTorture 29625 27463 -7.30% BenchmarkFields 38752040 39169840 +1.08% BenchmarkFieldsFunc 38797765 38888060 +0.23% benchmark old MB/s new MB/s speedup BenchmarkSingleMaxSkipping 3367.07 3697.62 1.10x BenchmarkSingleLongSuffixFail 354.51 389.47 1.10x BenchmarkSingleMatch 73.07 75.82 1.04x BenchmarkFields 27.06 26.77 0.99x BenchmarkFieldsFunc 27.03 26.96 1.00x R=dave, fullung, remyoudompheng, rsc CC=golang-dev https://golang.org/cl/7350045
1 parent 9704c80 commit 937f91e

File tree

2 files changed

+47
-28
lines changed

2 files changed

+47
-28
lines changed

src/pkg/strings/strings.go

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -59,21 +59,26 @@ func hashstr(sep string) (uint32, uint32) {
5959

6060
// Count counts the number of non-overlapping instances of sep in s.
6161
func Count(s, sep string) int {
62-
if sep == "" {
63-
return utf8.RuneCountInString(s) + 1
64-
}
65-
c := sep[0]
6662
n := 0
67-
if len(sep) == 1 {
63+
// special cases
64+
switch {
65+
case len(sep) == 0:
66+
return utf8.RuneCountInString(s) + 1
67+
case len(sep) == 1:
6868
// special case worth making fast
69+
c := sep[0]
6970
for i := 0; i < len(s); i++ {
7071
if s[i] == c {
7172
n++
7273
}
7374
}
7475
return n
75-
}
76-
if len(sep) > len(s) {
76+
case len(sep) > len(s):
77+
return 0
78+
case len(sep) == len(s):
79+
if sep == s {
80+
return 1
81+
}
7782
return 0
7883
}
7984
hashsep, pow := hashstr(sep)
@@ -82,17 +87,19 @@ func Count(s, sep string) int {
8287
h = h*primeRK + uint32(s[i])
8388
}
8489
lastmatch := 0
85-
for i := len(sep); ; i++ {
86-
// Invariant: h = hash(s[i-l : i])
90+
if h == hashsep && s[:len(sep)] == sep {
91+
n++
92+
lastmatch = len(sep)
93+
}
94+
for i := len(sep); i < len(s); {
95+
h *= primeRK
96+
h += uint32(s[i])
97+
h -= pow * uint32(s[i-len(sep)])
98+
i++
8799
if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep {
88100
n++
89101
lastmatch = i
90102
}
91-
if i >= len(s) {
92-
break
93-
}
94-
h = h*primeRK + uint32(s[i])
95-
h -= pow * uint32(s[i-len(sep)])
96103
}
97104
return n
98105
}
@@ -115,21 +122,24 @@ func ContainsRune(s string, r rune) bool {
115122
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
116123
func Index(s, sep string) int {
117124
n := len(sep)
118-
if n == 0 {
125+
switch {
126+
case n == 0:
119127
return 0
120-
}
121-
c := sep[0]
122-
if n == 1 {
128+
case n == 1:
129+
c := sep[0]
123130
// special case worth making fast
124131
for i := 0; i < len(s); i++ {
125132
if s[i] == c {
126133
return i
127134
}
128135
}
129136
return -1
130-
}
131-
// n > 1
132-
if n > len(s) {
137+
case n == len(s):
138+
if sep == s {
139+
return 0
140+
}
141+
return -1
142+
case n > len(s):
133143
return -1
134144
}
135145
// Hash sep.
@@ -138,16 +148,17 @@ func Index(s, sep string) int {
138148
for i := 0; i < n; i++ {
139149
h = h*primeRK + uint32(s[i])
140150
}
141-
for i := n; ; i++ {
142-
// Invariant: h = hash(s[i-n : i])
151+
if h == hashsep && s[:n] == sep {
152+
return 0
153+
}
154+
for i := n; i < len(s); {
155+
h *= primeRK
156+
h += uint32(s[i])
157+
h -= pow * uint32(s[i-n])
158+
i++
143159
if h == hashsep && s[i-n:i] == sep {
144160
return i - n
145161
}
146-
if i >= len(s) {
147-
break
148-
}
149-
h = h*primeRK + uint32(s[i])
150-
h -= pow * uint32(s[i-n])
151162
}
152163
return -1
153164
}

src/pkg/strings/strings_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,14 @@ func BenchmarkCountTorture(b *testing.B) {
10521052
}
10531053
}
10541054

1055+
func BenchmarkCountTortureOverlapping(b *testing.B) {
1056+
A := Repeat("ABC", 1<<20)
1057+
B := Repeat("ABC", 1<<10)
1058+
for i := 0; i < b.N; i++ {
1059+
Count(A, B)
1060+
}
1061+
}
1062+
10551063
var makeFieldsInput = func() string {
10561064
x := make([]byte, 1<<20)
10571065
// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.

0 commit comments

Comments
 (0)