Skip to content

Commit 65a6e05

Browse files
charlieviethgopherbot
authored andcommitted
byte,strings: improve IndexRune performance by ~45%
Change IndexRune to search for the last byte of a multi-byte rune instead of using the first byte. This improves search performance by 45% on average when dealing with Unicode text. The rationale here is that the last byte of a UTF-8 encoded multi-byte rune is significantly more unique (evenly distributed) than the first byte which has a 78% chance of being [240, 243, 244]. This approach is typically much faster, but can be slower when there are a large number of false positives (see Han benchmarks) because the more even distribution of bytes can delay/prevent falling back to a brute-force search using bytealg.Index, which is particularly powerful on amd64/x86_64 (particularly Skylake, but less so with newer processors). bytes package benchmarks: goos: darwin goarch: arm64 pkg: bytes cpu: Apple M1 Max │ base.10.txt │ new.10.txt │ │ sec/op │ sec/op vs base │ IndexRune/10-10 9.784n ± 0% 8.470n ± 0% -13.43% (p=0.000 n=10) IndexRune/32-10 11.660n ± 0% 8.473n ± 0% -27.34% (p=0.000 n=10) IndexRune/4K-10 83.96n ± 0% 81.08n ± 0% -3.44% (p=0.000 n=10) IndexRune/4M-10 63.92µ ± 0% 64.67µ ± 0% +1.17% (p=0.000 n=10) IndexRune/64M-10 1.121m ± 1% 1.125m ± 1% ~ (p=0.218 n=10) IndexRuneUnicode/Latin/10-10 10.125n ± 0% 7.347n ± 0% -27.43% (p=0.000 n=10) IndexRuneUnicode/Latin/32-10 11.435n ± 0% 7.349n ± 0% -35.73% (p=0.000 n=10) IndexRuneUnicode/Latin/4K-10 882.6n ± 0% 334.9n ± 1% -62.06% (p=0.000 n=10) IndexRuneUnicode/Latin/4M-10 977.2µ ± 0% 370.9µ ± 1% -62.04% (p=0.000 n=10) IndexRuneUnicode/Latin/64M-10 15.649m ± 1% 6.028m ± 1% -61.48% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/10-10 10.070n ± 0% 8.701n ± 0% -13.59% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/32-10 19.045n ± 0% 8.704n ± 1% -54.30% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/4K-10 2.734µ ± 0% 1.046µ ± 1% -61.75% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/4M-10 2.671m ± 0% 1.143m ± 1% -57.22% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/64M-10 43.12m ± 1% 18.26m ± 1% -57.64% (p=0.000 n=10) IndexRuneUnicode/Han/10-10 10.10n ± 0% 10.82n ± 1% +7.08% (p=0.000 n=10) IndexRuneUnicode/Han/32-10 38.29n ± 1% 10.87n ± 1% -71.62% (p=0.000 n=10) IndexRuneUnicode/Han/4K-10 1409.0n ± 0% 489.1n ± 1% -65.28% (p=0.000 n=10) IndexRuneUnicode/Han/4M-10 1338.4µ ± 0% 821.1µ ± 2% -38.65% (p=0.000 n=10) IndexRuneUnicode/Han/64M-10 21.42m ± 1% 13.42m ± 2% -37.34% (p=0.000 n=10) geomean 3.983µ 2.305µ -42.14% │ base.10.txt │ new.10.txt │ │ B/s │ B/s vs base │ IndexRune/10-10 974.8Mi ± 0% 1126.1Mi ± 0% +15.52% (p=0.000 n=10) IndexRune/32-10 2.556Gi ± 0% 3.517Gi ± 0% +37.62% (p=0.000 n=10) IndexRune/4K-10 45.43Gi ± 0% 47.05Gi ± 0% +3.56% (p=0.000 n=10) IndexRune/4M-10 61.12Gi ± 0% 60.41Gi ± 0% -1.16% (p=0.000 n=10) IndexRune/64M-10 55.74Gi ± 1% 55.57Gi ± 1% ~ (p=0.218 n=10) IndexRuneUnicode/Latin/10-10 942.0Mi ± 0% 1297.9Mi ± 0% +37.78% (p=0.000 n=10) IndexRuneUnicode/Latin/32-10 2.606Gi ± 0% 4.055Gi ± 0% +55.61% (p=0.000 n=10) IndexRuneUnicode/Latin/4K-10 4.322Gi ± 0% 11.392Gi ± 1% +163.57% (p=0.000 n=10) IndexRuneUnicode/Latin/4M-10 3.998Gi ± 0% 10.532Gi ± 1% +163.47% (p=0.000 n=10) IndexRuneUnicode/Latin/64M-10 3.994Gi ± 1% 10.369Gi ± 1% +159.61% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/10-10 947.2Mi ± 0% 1096.1Mi ± 0% +15.72% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/32-10 1.565Gi ± 0% 3.424Gi ± 1% +118.80% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/4K-10 1.396Gi ± 0% 3.649Gi ± 1% +161.43% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/4M-10 1.462Gi ± 0% 3.418Gi ± 1% +133.76% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/64M-10 1.450Gi ± 1% 3.422Gi ± 1% +136.08% (p=0.000 n=10) IndexRuneUnicode/Han/10-10 944.6Mi ± 0% 881.7Mi ± 1% -6.66% (p=0.000 n=10) IndexRuneUnicode/Han/32-10 797.0Mi ± 1% 2809.3Mi ± 1% +252.47% (p=0.000 n=10) IndexRuneUnicode/Han/4K-10 2.707Gi ± 0% 7.798Gi ± 1% +188.04% (p=0.000 n=10) IndexRuneUnicode/Han/4M-10 2.919Gi ± 0% 4.757Gi ± 2% +63.01% (p=0.000 n=10) IndexRuneUnicode/Han/64M-10 2.917Gi ± 1% 4.656Gi ± 2% +59.60% (p=0.000 n=10) geomean 3.036Gi 5.246Gi +72.82% goos: linux goarch: amd64 pkg: bytes │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ IndexRune/10-4 10.805n ± 0% 6.999n ± 0% -35.22% (p=0.000 n=10) IndexRune/32-4 12.515n ± 0% 7.539n ± 0% -39.76% (p=0.000 n=10) IndexRune/4K-4 71.69n ± 0% 68.39n ± 0% -4.60% (p=0.000 n=10) IndexRune/4M-4 125.19µ ± 2% 63.05µ ± 0% -49.63% (p=0.000 n=10) IndexRune/64M-4 1.050m ± 1% 1.053m ± 0% ~ (p=0.353 n=10) IndexRuneUnicode/Latin/10-4 9.471n ± 0% 6.144n ± 1% -35.13% (p=0.000 n=10) IndexRuneUnicode/Latin/32-4 12.540n ± 0% 6.655n ± 0% -46.93% (p=0.000 n=10) IndexRuneUnicode/Latin/4K-4 522.1n ± 0% 207.2n ± 0% -60.32% (p=0.000 n=10) IndexRuneUnicode/Latin/4M-4 626.1µ ± 0% 297.2µ ± 0% -52.54% (p=0.000 n=10) IndexRuneUnicode/Latin/64M-4 13.866m ± 3% 5.069m ± 4% -63.44% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/10-4 10.920n ± 0% 7.213n ± 0% -33.95% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/32-4 12.515n ± 0% 7.780n ± 0% -37.83% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/4K-4 2650.0n ± 0% 621.5n ± 0% -76.55% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/4M-4 2744.7µ ± 0% 723.2µ ± 0% -73.65% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/64M-4 44.18m ± 0% 14.22m ± 14% -67.82% (p=0.000 n=10) IndexRuneUnicode/Han/10-4 10.795n ± 0% 9.734n ± 1% -9.83% (p=0.000 n=10) IndexRuneUnicode/Han/32-4 12.79n ± 0% 10.42n ± 1% -18.46% (p=0.000 n=10) IndexRuneUnicode/Han/4K-4 519.7n ± 0% 288.4n ± 0% -44.51% (p=0.000 n=10) IndexRuneUnicode/Han/4M-4 498.2µ ± 0% 443.0µ ± 0% -11.07% (p=0.000 n=10) IndexRuneUnicode/Han/64M-4 9.654m ± 2% 12.223m ± 1% +26.61% (p=0.000 n=10) geomean 3.168µ 1.828µ -42.30% │ old.txt │ new.txt │ │ B/s │ B/s vs base │ IndexRune/10-4 882.5Mi ± 0% 1362.6Mi ± 0% +54.41% (p=0.000 n=10) IndexRune/32-4 2.381Gi ± 0% 3.953Gi ± 0% +66.00% (p=0.000 n=10) IndexRune/4K-4 53.21Gi ± 0% 55.77Gi ± 0% +4.82% (p=0.000 n=10) IndexRune/4M-4 31.20Gi ± 2% 61.95Gi ± 0% +98.55% (p=0.000 n=10) IndexRune/64M-4 59.54Gi ± 1% 59.37Gi ± 0% ~ (p=0.353 n=10) IndexRuneUnicode/Latin/10-4 1006.9Mi ± 0% 1552.3Mi ± 1% +54.17% (p=0.000 n=10) IndexRuneUnicode/Latin/32-4 2.376Gi ± 0% 4.478Gi ± 0% +88.45% (p=0.000 n=10) IndexRuneUnicode/Latin/4K-4 7.306Gi ± 0% 18.411Gi ± 0% +152.01% (p=0.000 n=10) IndexRuneUnicode/Latin/4M-4 6.239Gi ± 0% 13.145Gi ± 0% +110.70% (p=0.000 n=10) IndexRuneUnicode/Latin/64M-4 4.507Gi ± 3% 12.329Gi ± 4% +173.54% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/10-4 873.0Mi ± 0% 1322.2Mi ± 0% +51.46% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/32-4 2.382Gi ± 0% 3.831Gi ± 0% +60.84% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/4K-4 1.439Gi ± 0% 6.138Gi ± 0% +326.43% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/4M-4 1.423Gi ± 0% 5.401Gi ± 0% +279.52% (p=0.000 n=10) IndexRuneUnicode/Cyrillic/64M-4 1.415Gi ± 0% 4.396Gi ± 17% +210.79% (p=0.000 n=10) IndexRuneUnicode/Han/10-4 883.4Mi ± 0% 979.7Mi ± 1% +10.90% (p=0.000 n=10) IndexRuneUnicode/Han/32-4 2.331Gi ± 0% 2.858Gi ± 1% +22.61% (p=0.000 n=10) IndexRuneUnicode/Han/4K-4 7.340Gi ± 0% 13.226Gi ± 0% +80.19% (p=0.000 n=10) IndexRuneUnicode/Han/4M-4 7.841Gi ± 0% 8.817Gi ± 0% +12.44% (p=0.000 n=10) IndexRuneUnicode/Han/64M-4 6.474Gi ± 2% 5.113Gi ± 1% -21.02% (p=0.000 n=10) geomean 3.816Gi 6.614Gi +73.32% strings package benchmarks: goos: darwin goarch: arm64 pkg: strings │ base.index_rune.10.txt │ new.index_rune.10.txt │ │ sec/op │ sec/op vs base │ IndexRune-10 11.905n ± 5% 6.633n ± 6% -44.28% (p=0.000 n=10) IndexRuneLongString-10 13.800n ± 1% 7.330n ± 2% -46.88% (p=0.000 n=10) IndexRuneFastPath-10 3.477n ± 0% 3.481n ± 1% ~ (p=0.468 n=10) geomean 8.297n 5.531n -33.34% Change-Id: I59357fda1c8ac85315b759930f620dbce1ba4721 Reviewed-on: https://go-review.googlesource.com/c/go/+/539116 Reviewed-by: Ian Lance Taylor <[email protected]> Reviewed-by: Carlos Amedee <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Auto-Submit: Ian Lance Taylor <[email protected]>
1 parent 9becf40 commit 65a6e05

File tree

4 files changed

+264
-5
lines changed

4 files changed

+264
-5
lines changed

src/bytes/bytes.go

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ func LastIndexByte(s []byte, c byte) int {
137137
// If r is [utf8.RuneError], it returns the first instance of any
138138
// invalid UTF-8 byte sequence.
139139
func IndexRune(s []byte, r rune) int {
140+
const haveFastIndex = bytealg.MaxBruteForce > 0
140141
switch {
141142
case 0 <= r && r < utf8.RuneSelf:
142143
return IndexByte(s, byte(r))
@@ -152,9 +153,64 @@ func IndexRune(s []byte, r rune) int {
152153
case !utf8.ValidRune(r):
153154
return -1
154155
default:
156+
// Search for rune r using the last byte of its UTF-8 encoded form.
157+
// The distribution of the last byte is more uniform compared to the
158+
// first byte which has a 78% chance of being [240, 243, 244].
155159
var b [utf8.UTFMax]byte
156160
n := utf8.EncodeRune(b[:], r)
157-
return Index(s, b[:n])
161+
last := n - 1
162+
i := last
163+
fails := 0
164+
for i < len(s) {
165+
if s[i] != b[last] {
166+
o := IndexByte(s[i+1:], b[last])
167+
if o < 0 {
168+
return -1
169+
}
170+
i += o + 1
171+
}
172+
// Step backwards comparing bytes.
173+
for j := 1; j < n; j++ {
174+
if s[i-j] != b[last-j] {
175+
goto next
176+
}
177+
}
178+
return i - last
179+
next:
180+
fails++
181+
i++
182+
if (haveFastIndex && fails > bytealg.Cutover(i)) && i < len(s) ||
183+
(!haveFastIndex && fails >= 4+i>>4 && i < len(s)) {
184+
goto fallback
185+
}
186+
}
187+
return -1
188+
189+
fallback:
190+
// Switch to bytealg.Index, if available, or a brute for search when
191+
// IndexByte returns too many false positives.
192+
if haveFastIndex {
193+
if j := bytealg.Index(s[i-last:], b[:n]); j >= 0 {
194+
return i + j - last
195+
}
196+
} else {
197+
// If bytealg.Index is not available a brute force search is
198+
// ~1.5-3x faster than Rabin-Karp since n is small.
199+
c0 := b[last]
200+
c1 := b[last-1] // There are at least 2 chars to match
201+
loop:
202+
for ; i < len(s); i++ {
203+
if s[i] == c0 && s[i-1] == c1 {
204+
for k := 2; k < n; k++ {
205+
if s[i-k] != b[last-k] {
206+
continue loop
207+
}
208+
}
209+
return i - last
210+
}
211+
}
212+
}
213+
return -1
158214
}
159215
}
160216

src/bytes/bytes_test.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ var indexTests = []BinOpTest{
197197
{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
198198
// test fallback to Rabin-Karp.
199199
{"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5},
200+
// test fallback to IndexRune
201+
{"oxoxoxoxoxoxoxoxoxoxox☺", "☺", 22},
202+
// invalid UTF-8 byte sequence (must be longer than bytealg.MaxBruteForce to
203+
// test that we don't use IndexRune)
204+
{"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx\xed\x9f\xc0", "\xed\x9f\xc0", 105},
200205
}
201206

202207
var lastIndexTests = []BinOpTest{
@@ -445,6 +450,31 @@ func TestIndexRune(t *testing.T) {
445450
{"some_text=some_value", '=', 9},
446451
{"☺a", 'a', 3},
447452
{"a☻☺b", '☺', 4},
453+
{"𠀳𠀗𠀾𠁄𠀧𠁆𠁂𠀫𠀖𠀪𠀲𠀴𠁀𠀨𠀿", '𠀿', 56},
454+
455+
// 2 bytes
456+
{"ӆ", 'ӆ', 0},
457+
{"a", 'ӆ', -1},
458+
{" ӆ", 'ӆ', 2},
459+
{" a", 'ӆ', -1},
460+
{strings.Repeat("ц", 64) + "ӆ", 'ӆ', 128}, // test cutover
461+
{strings.Repeat("ц", 64), 'ӆ', -1},
462+
463+
// 3 bytes
464+
{"Ꚁ", 'Ꚁ', 0},
465+
{"a", 'Ꚁ', -1},
466+
{" Ꚁ", 'Ꚁ', 2},
467+
{" a", 'Ꚁ', -1},
468+
{strings.Repeat("Ꙁ", 64) + "Ꚁ", 'Ꚁ', 192}, // test cutover
469+
{strings.Repeat("Ꙁ", 64) + "Ꚁ", '䚀', -1}, // 'Ꚁ' and '䚀' share the same last two bytes
470+
471+
// 4 bytes
472+
{"𡌀", '𡌀', 0},
473+
{"a", '𡌀', -1},
474+
{" 𡌀", '𡌀', 2},
475+
{" a", '𡌀', -1},
476+
{strings.Repeat("𡋀", 64) + "𡌀", '𡌀', 256}, // test cutover
477+
{strings.Repeat("𡋀", 64) + "𡌀", '𣌀', -1}, // '𡌀' and '𣌀' share the same last two bytes
448478

449479
// RuneError should match any invalid UTF-8 byte sequence.
450480
{"�", '�', 0},
@@ -458,6 +488,13 @@ func TestIndexRune(t *testing.T) {
458488
{"a☺b☻c☹d\xe2\x98\xff\xed\xa0\x80", -1, -1},
459489
{"a☺b☻c☹d\xe2\x98\xff\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
460490
{"a☺b☻c☹d\xe2\x98\xff\xed\xa0\x80", utf8.MaxRune + 1, -1},
491+
492+
// Test the cutover to to bytealg.Index when it is triggered in
493+
// the middle of rune that contains consecutive runs of equal bytes.
494+
{"aaaaaKKKK\U000bc104", '\U000bc104', 17}, // cutover: (n + 16) / 8
495+
{"aaaaaKKKK鄄", '鄄', 17},
496+
{"aaKKKKKa\U000bc104", '\U000bc104', 18}, // cutover: 4 + n>>4
497+
{"aaKKKKKa鄄", '鄄', 18},
461498
}
462499
for _, tt := range tests {
463500
if got := IndexRune([]byte(tt.in), tt.rune); got != tt.want {
@@ -605,6 +642,21 @@ func BenchmarkIndexRuneASCII(b *testing.B) {
605642
benchBytes(b, indexSizes, bmIndexRuneASCII(IndexRune))
606643
}
607644

645+
func BenchmarkIndexRuneUnicode(b *testing.B) {
646+
b.Run("Latin", func(b *testing.B) {
647+
// Latin is mostly 1, 2, 3 byte runes.
648+
benchBytes(b, indexSizes, bmIndexRuneUnicode(unicode.Latin, 'é'))
649+
})
650+
b.Run("Cyrillic", func(b *testing.B) {
651+
// Cyrillic is mostly 2 and 3 byte runes.
652+
benchBytes(b, indexSizes, bmIndexRuneUnicode(unicode.Cyrillic, 'Ꙁ'))
653+
})
654+
b.Run("Han", func(b *testing.B) {
655+
// Han consists only of 3 and 4 byte runes.
656+
benchBytes(b, indexSizes, bmIndexRuneUnicode(unicode.Han, '𠀿'))
657+
})
658+
}
659+
608660
func bmIndexRuneASCII(index func([]byte, rune) int) func(b *testing.B, n int) {
609661
return func(b *testing.B, n int) {
610662
buf := bmbuf[0:n]
@@ -635,6 +687,61 @@ func bmIndexRune(index func([]byte, rune) int) func(b *testing.B, n int) {
635687
}
636688
}
637689

690+
func bmIndexRuneUnicode(rt *unicode.RangeTable, needle rune) func(b *testing.B, n int) {
691+
var rs []rune
692+
for _, r16 := range rt.R16 {
693+
for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) {
694+
if r != needle {
695+
rs = append(rs, rune(r))
696+
}
697+
}
698+
}
699+
for _, r32 := range rt.R32 {
700+
for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) {
701+
if r != needle {
702+
rs = append(rs, rune(r))
703+
}
704+
}
705+
}
706+
// Shuffle the runes so that they are not in descending order.
707+
// The sort is deterministic since this is used for benchmarks,
708+
// which need to be repeatable.
709+
rr := rand.New(rand.NewSource(1))
710+
rr.Shuffle(len(rs), func(i, j int) {
711+
rs[i], rs[j] = rs[j], rs[i]
712+
})
713+
uchars := string(rs)
714+
715+
return func(b *testing.B, n int) {
716+
buf := bmbuf[0:n]
717+
o := copy(buf, uchars)
718+
for o < len(buf) {
719+
o += copy(buf[o:], uchars)
720+
}
721+
722+
// Make space for the needle rune at the end of buf.
723+
m := utf8.RuneLen(needle)
724+
for o := m; o > 0; {
725+
_, sz := utf8.DecodeLastRune(buf)
726+
copy(buf[len(buf)-sz:], "\x00\x00\x00\x00")
727+
buf = buf[:len(buf)-sz]
728+
o -= sz
729+
}
730+
buf = utf8.AppendRune(buf[:n-m], needle)
731+
732+
n -= m // adjust for rune len
733+
for i := 0; i < b.N; i++ {
734+
j := IndexRune(buf, needle)
735+
if j != n {
736+
b.Fatal("bad index", j)
737+
}
738+
}
739+
for i := range buf {
740+
buf[i] = '\x00'
741+
}
742+
}
743+
}
744+
638745
func BenchmarkEqual(b *testing.B) {
639746
b.Run("0", func(b *testing.B) {
640747
var buf [4]byte
@@ -2077,6 +2184,11 @@ func makeBenchInputHard() []byte {
20772184
var benchInputHard = makeBenchInputHard()
20782185

20792186
func benchmarkIndexHard(b *testing.B, sep []byte) {
2187+
n := Index(benchInputHard, sep)
2188+
if n < 0 {
2189+
n = len(benchInputHard)
2190+
}
2191+
b.SetBytes(int64(n))
20802192
for i := 0; i < b.N; i++ {
20812193
Index(benchInputHard, sep)
20822194
}

src/strings/strings.go

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ func IndexByte(s string, c byte) int {
125125
// If r is [utf8.RuneError], it returns the first instance of any
126126
// invalid UTF-8 byte sequence.
127127
func IndexRune(s string, r rune) int {
128+
const haveFastIndex = bytealg.MaxBruteForce > 0
128129
switch {
129130
case 0 <= r && r < utf8.RuneSelf:
130131
return IndexByte(s, byte(r))
@@ -138,7 +139,60 @@ func IndexRune(s string, r rune) int {
138139
case !utf8.ValidRune(r):
139140
return -1
140141
default:
141-
return Index(s, string(r))
142+
// Search for rune r using the last byte of its UTF-8 encoded form.
143+
// The distribution of the last byte is more uniform compared to the
144+
// first byte which has a 78% chance of being [240, 243, 244].
145+
rs := string(r)
146+
last := len(rs) - 1
147+
i := last
148+
fails := 0
149+
for i < len(s) {
150+
if s[i] != rs[last] {
151+
o := IndexByte(s[i+1:], rs[last])
152+
if o < 0 {
153+
return -1
154+
}
155+
i += o + 1
156+
}
157+
// Step backwards comparing bytes.
158+
for j := 1; j < len(rs); j++ {
159+
if s[i-j] != rs[last-j] {
160+
goto next
161+
}
162+
}
163+
return i - last
164+
next:
165+
fails++
166+
i++
167+
if (haveFastIndex && fails > bytealg.Cutover(i)) && i < len(s) ||
168+
(!haveFastIndex && fails >= 4+i>>4 && i < len(s)) {
169+
goto fallback
170+
}
171+
}
172+
return -1
173+
174+
fallback:
175+
// see comment in ../bytes/bytes.go
176+
if haveFastIndex {
177+
if j := bytealg.IndexString(s[i-last:], string(r)); j >= 0 {
178+
return i + j - last
179+
}
180+
} else {
181+
c0 := rs[last]
182+
c1 := rs[last-1]
183+
loop:
184+
for ; i < len(s); i++ {
185+
if s[i] == c0 && s[i-1] == c1 {
186+
for k := 2; k < len(rs); k++ {
187+
if s[i-k] != rs[last-k] {
188+
continue loop
189+
}
190+
}
191+
return i - last
192+
}
193+
}
194+
}
195+
return -1
142196
}
143197
}
144198

src/strings/strings_test.go

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ var indexTests = []IndexTest{
155155
// test fallback to Rabin-Karp.
156156
{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
157157
{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
158+
// test fallback to IndexRune
159+
{"oxoxoxoxoxoxoxoxoxoxox☺", "☺", 22},
160+
// invalid UTF-8 byte sequence (must be longer than bytealg.MaxBruteForce to
161+
// test that we don't use IndexRune)
162+
{"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx\xed\x9f\xc0", "\xed\x9f\xc0", 105},
158163
}
159164

160165
var lastIndexTests = []IndexTest{
@@ -326,20 +331,52 @@ func TestIndexRune(t *testing.T) {
326331
{"a☺b☻c☹d\xe2\x98\xff\xed\xa0\x80", -1, -1},
327332
{"a☺b☻c☹d\xe2\x98\xff\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
328333
{"a☺b☻c☹d\xe2\x98\xff\xed\xa0\x80", utf8.MaxRune + 1, -1},
334+
335+
// 2 bytes
336+
{"ӆ", 'ӆ', 0},
337+
{"a", 'ӆ', -1},
338+
{" ӆ", 'ӆ', 2},
339+
{" a", 'ӆ', -1},
340+
{Repeat("ц", 64) + "ӆ", 'ӆ', 128}, // test cutover
341+
{Repeat("Ꙁ", 64) + "Ꚁ", '䚀', -1}, // 'Ꚁ' and '䚀' share the same last two bytes
342+
343+
// 3 bytes
344+
{"Ꚁ", 'Ꚁ', 0},
345+
{"a", 'Ꚁ', -1},
346+
{" Ꚁ", 'Ꚁ', 2},
347+
{" a", 'Ꚁ', -1},
348+
{Repeat("Ꙁ", 64) + "Ꚁ", 'Ꚁ', 192}, // test cutover
349+
{Repeat("𡋀", 64) + "𡌀", '𣌀', -1}, // '𡌀' and '𣌀' share the same last two bytes
350+
351+
// 4 bytes
352+
{"𡌀", '𡌀', 0},
353+
{"a", '𡌀', -1},
354+
{" 𡌀", '𡌀', 2},
355+
{" a", '𡌀', -1},
356+
{Repeat("𡋀", 64) + "𡌀", '𡌀', 256}, // test cutover
357+
{Repeat("𡋀", 64), '𡌀', -1},
358+
359+
// Test the cutover to to bytealg.IndexString when it is triggered in
360+
// the middle of rune that contains consecutive runs of equal bytes.
361+
{"aaaaaKKKK\U000bc104", '\U000bc104', 17}, // cutover: (n + 16) / 8
362+
{"aaaaaKKKK鄄", '鄄', 17},
363+
{"aaKKKKKa\U000bc104", '\U000bc104', 18}, // cutover: 4 + n>>4
364+
{"aaKKKKKa鄄", '鄄', 18},
329365
}
330366
for _, tt := range tests {
331367
if got := IndexRune(tt.in, tt.rune); got != tt.want {
332368
t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
333369
}
334370
}
335371

336-
haystack := "test世界"
372+
// Make sure we trigger the cutover and string(rune) conversion.
373+
haystack := "test" + Repeat("𡋀", 32) + "𡌀"
337374
allocs := testing.AllocsPerRun(1000, func() {
338375
if i := IndexRune(haystack, 's'); i != 2 {
339376
t.Fatalf("'s' at %d; want 2", i)
340377
}
341-
if i := IndexRune(haystack, ''); i != 4 {
342-
t.Fatalf("'' at %d; want 4", i)
378+
if i := IndexRune(haystack, '𡌀'); i != 132 {
379+
t.Fatalf("'𡌀' at %d; want 4", i)
343380
}
344381
})
345382
if allocs != 0 && testing.CoverMode() == "" {

0 commit comments

Comments
 (0)