Skip to content

Commit bea39e6

Browse files
valyalabradfitz
authored andcommitted
regexp: reduce mallocs in Regexp.Find* and Regexp.ReplaceAll*.
This improves Regexp.Find* and Regexp.ReplaceAll* speed: name old time/op new time/op delta Find-4 345ns ± 1% 314ns ± 1% -8.94% (p=0.000 n=9+8) FindString-4 341ns ± 1% 308ns ± 0% -9.85% (p=0.000 n=10+9) FindSubmatch-4 440ns ± 1% 404ns ± 0% -8.27% (p=0.000 n=10+8) FindStringSubmatch-4 426ns ± 0% 387ns ± 0% -9.07% (p=0.000 n=10+9) ReplaceAll-4 1.75µs ± 1% 1.67µs ± 0% -4.45% (p=0.000 n=9+10) name old alloc/op new alloc/op delta Find-4 16.0B ± 0% 0.0B ±NaN% -100.00% (p=0.000 n=10+10) FindString-4 16.0B ± 0% 0.0B ±NaN% -100.00% (p=0.000 n=10+10) FindSubmatch-4 80.0B ± 0% 48.0B ± 0% -40.00% (p=0.000 n=10+10) FindStringSubmatch-4 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10) ReplaceAll-4 152B ± 0% 104B ± 0% -31.58% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Find-4 1.00 ± 0% 0.00 ±NaN% -100.00% (p=0.000 n=10+10) FindString-4 1.00 ± 0% 0.00 ±NaN% -100.00% (p=0.000 n=10+10) FindSubmatch-4 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) FindStringSubmatch-4 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) ReplaceAll-4 8.00 ± 0% 5.00 ± 0% -37.50% (p=0.000 n=10+10) Fixes #15643 Change-Id: I594fe51172373e2adb98d1d25c76ca2cde54ff48 Reviewed-on: https://go-review.googlesource.com/23030 Reviewed-by: Brad Fitzpatrick <[email protected]> Run-TryBot: Brad Fitzpatrick <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 5923df1 commit bea39e6

File tree

3 files changed

+104
-28
lines changed

3 files changed

+104
-28
lines changed

src/regexp/all_test.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,72 @@ func TestSwitchBacktrack(t *testing.T) {
564564
re.Match(long[:1]) // triggers backtracker
565565
}
566566

567+
func BenchmarkFind(b *testing.B) {
568+
b.StopTimer()
569+
re := MustCompile("a+b+")
570+
wantSubs := "aaabb"
571+
s := []byte("acbb" + wantSubs + "dd")
572+
b.StartTimer()
573+
b.ReportAllocs()
574+
for i := 0; i < b.N; i++ {
575+
subs := re.Find(s)
576+
if string(subs) != wantSubs {
577+
b.Fatalf("Find(%q) = %q; want %q", s, subs, wantSubs)
578+
}
579+
}
580+
}
581+
582+
func BenchmarkFindString(b *testing.B) {
583+
b.StopTimer()
584+
re := MustCompile("a+b+")
585+
wantSubs := "aaabb"
586+
s := "acbb" + wantSubs + "dd"
587+
b.StartTimer()
588+
b.ReportAllocs()
589+
for i := 0; i < b.N; i++ {
590+
subs := re.FindString(s)
591+
if subs != wantSubs {
592+
b.Fatalf("FindString(%q) = %q; want %q", s, subs, wantSubs)
593+
}
594+
}
595+
}
596+
597+
func BenchmarkFindSubmatch(b *testing.B) {
598+
b.StopTimer()
599+
re := MustCompile("a(a+b+)b")
600+
wantSubs := "aaabb"
601+
s := []byte("acbb" + wantSubs + "dd")
602+
b.StartTimer()
603+
b.ReportAllocs()
604+
for i := 0; i < b.N; i++ {
605+
subs := re.FindSubmatch(s)
606+
if string(subs[0]) != wantSubs {
607+
b.Fatalf("FindSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
608+
}
609+
if string(subs[1]) != "aab" {
610+
b.Fatalf("FindSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
611+
}
612+
}
613+
}
614+
615+
func BenchmarkFindStringSubmatch(b *testing.B) {
616+
b.StopTimer()
617+
re := MustCompile("a(a+b+)b")
618+
wantSubs := "aaabb"
619+
s := "acbb" + wantSubs + "dd"
620+
b.StartTimer()
621+
b.ReportAllocs()
622+
for i := 0; i < b.N; i++ {
623+
subs := re.FindStringSubmatch(s)
624+
if subs[0] != wantSubs {
625+
b.Fatalf("FindStringSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
626+
}
627+
if subs[1] != "aab" {
628+
b.Fatalf("FindStringSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
629+
}
630+
}
631+
}
632+
567633
func BenchmarkLiteral(b *testing.B) {
568634
x := strings.Repeat("x", 50) + "y"
569635
b.StopTimer()

src/regexp/exec.go

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -405,14 +405,16 @@ func (m *machine) onepass(i input, pos int) bool {
405405
return m.matched
406406
}
407407

408-
// empty is a non-nil 0-element slice,
409-
// so doExecute can avoid an allocation
410-
// when 0 captures are requested from a successful match.
411-
var empty = make([]int, 0)
408+
// doMatch reports whether either r, b or s match the regexp.
409+
func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool {
410+
return re.doExecute(r, b, s, 0, 0, nil) != nil
411+
}
412412

413-
// doExecute finds the leftmost match in the input and returns
414-
// the position of its subexpressions.
415-
func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int) []int {
413+
// doExecute finds the leftmost match in the input, appends the position
414+
// of its subexpressions to dstCap and returns dstCap.
415+
//
416+
// nil is returned if no matches are found and non-nil if matches are found.
417+
func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int {
416418
m := re.get()
417419
var i input
418420
var size int
@@ -445,12 +447,15 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i
445447
return nil
446448
}
447449
}
448-
if ncap == 0 {
449-
re.put(m)
450-
return empty // empty but not nil
450+
dstCap = append(dstCap, m.matchcap...)
451+
if dstCap == nil {
452+
// Keep the promise of returning non-nil value on match.
453+
dstCap = arrayNoInts[:0]
451454
}
452-
cap := make([]int, len(m.matchcap))
453-
copy(cap, m.matchcap)
454455
re.put(m)
455-
return cap
456+
return dstCap
456457
}
458+
459+
// arrayNoInts is returned by doExecute match if nil dstCap is passed
460+
// to it with ncap=0.
461+
var arrayNoInts [0]int

src/regexp/regexp.go

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -408,17 +408,17 @@ func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
408408
// MatchReader reports whether the Regexp matches the text read by the
409409
// RuneReader.
410410
func (re *Regexp) MatchReader(r io.RuneReader) bool {
411-
return re.doExecute(r, nil, "", 0, 0) != nil
411+
return re.doMatch(r, nil, "")
412412
}
413413

414414
// MatchString reports whether the Regexp matches the string s.
415415
func (re *Regexp) MatchString(s string) bool {
416-
return re.doExecute(nil, nil, s, 0, 0) != nil
416+
return re.doMatch(nil, nil, s)
417417
}
418418

419419
// Match reports whether the Regexp matches the byte slice b.
420420
func (re *Regexp) Match(b []byte) bool {
421-
return re.doExecute(nil, b, "", 0, 0) != nil
421+
return re.doMatch(nil, b, "")
422422
}
423423

424424
// MatchReader checks whether a textual regular expression matches the text
@@ -502,8 +502,9 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst
502502
nmatch = re.prog.NumCap
503503
}
504504

505+
var dstCap [2]int
505506
for searchPos <= endPos {
506-
a := re.doExecute(nil, bsrc, src, searchPos, nmatch)
507+
a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0])
507508
if len(a) == 0 {
508509
break // no more matches
509510
}
@@ -641,7 +642,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
641642
}
642643

643644
for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
644-
matches := re.doExecute(nil, b, s, pos, re.prog.NumCap)
645+
matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil)
645646
if len(matches) == 0 {
646647
break
647648
}
@@ -681,7 +682,8 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
681682
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
682683
// A return value of nil indicates no match.
683684
func (re *Regexp) Find(b []byte) []byte {
684-
a := re.doExecute(nil, b, "", 0, 2)
685+
var dstCap [2]int
686+
a := re.doExecute(nil, b, "", 0, 2, dstCap[:0])
685687
if a == nil {
686688
return nil
687689
}
@@ -693,7 +695,7 @@ func (re *Regexp) Find(b []byte) []byte {
693695
// b[loc[0]:loc[1]].
694696
// A return value of nil indicates no match.
695697
func (re *Regexp) FindIndex(b []byte) (loc []int) {
696-
a := re.doExecute(nil, b, "", 0, 2)
698+
a := re.doExecute(nil, b, "", 0, 2, nil)
697699
if a == nil {
698700
return nil
699701
}
@@ -706,7 +708,8 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) {
706708
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
707709
// necessary to distinguish these cases.
708710
func (re *Regexp) FindString(s string) string {
709-
a := re.doExecute(nil, nil, s, 0, 2)
711+
var dstCap [2]int
712+
a := re.doExecute(nil, nil, s, 0, 2, dstCap[:0])
710713
if a == nil {
711714
return ""
712715
}
@@ -718,7 +721,7 @@ func (re *Regexp) FindString(s string) string {
718721
// itself is at s[loc[0]:loc[1]].
719722
// A return value of nil indicates no match.
720723
func (re *Regexp) FindStringIndex(s string) (loc []int) {
721-
a := re.doExecute(nil, nil, s, 0, 2)
724+
a := re.doExecute(nil, nil, s, 0, 2, nil)
722725
if a == nil {
723726
return nil
724727
}
@@ -731,7 +734,7 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) {
731734
// byte offset loc[0] through loc[1]-1.
732735
// A return value of nil indicates no match.
733736
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
734-
a := re.doExecute(r, nil, "", 0, 2)
737+
a := re.doExecute(r, nil, "", 0, 2, nil)
735738
if a == nil {
736739
return nil
737740
}
@@ -744,7 +747,8 @@ func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
744747
// comment.
745748
// A return value of nil indicates no match.
746749
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
747-
a := re.doExecute(nil, b, "", 0, re.prog.NumCap)
750+
var dstCap [4]int
751+
a := re.doExecute(nil, b, "", 0, re.prog.NumCap, dstCap[:0])
748752
if a == nil {
749753
return nil
750754
}
@@ -891,7 +895,7 @@ func extract(str string) (name string, num int, rest string, ok bool) {
891895
// in the package comment.
892896
// A return value of nil indicates no match.
893897
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
894-
return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap))
898+
return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap, nil))
895899
}
896900

897901
// FindStringSubmatch returns a slice of strings holding the text of the
@@ -900,7 +904,8 @@ func (re *Regexp) FindSubmatchIndex(b []byte) []int {
900904
// package comment.
901905
// A return value of nil indicates no match.
902906
func (re *Regexp) FindStringSubmatch(s string) []string {
903-
a := re.doExecute(nil, nil, s, 0, re.prog.NumCap)
907+
var dstCap [4]int
908+
a := re.doExecute(nil, nil, s, 0, re.prog.NumCap, dstCap[:0])
904909
if a == nil {
905910
return nil
906911
}
@@ -919,7 +924,7 @@ func (re *Regexp) FindStringSubmatch(s string) []string {
919924
// 'Index' descriptions in the package comment.
920925
// A return value of nil indicates no match.
921926
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
922-
return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap))
927+
return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap, nil))
923928
}
924929

925930
// FindReaderSubmatchIndex returns a slice holding the index pairs
@@ -928,7 +933,7 @@ func (re *Regexp) FindStringSubmatchIndex(s string) []int {
928933
// by the 'Submatch' and 'Index' descriptions in the package comment. A
929934
// return value of nil indicates no match.
930935
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
931-
return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap))
936+
return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap, nil))
932937
}
933938

934939
const startSize = 10 // The size at which to start a slice in the 'All' routines.

0 commit comments

Comments
 (0)