Skip to content

Commit 4b4f222

Browse files
benhoytbradfitz
authored andcommitted
bytes, strings: speed up TrimSpace 4-5x for common ASCII cases
This change adds a fast path for ASCII strings to both strings.TrimSpace and bytes.TrimSpace. It doesn't slow down the non-ASCII path much, if at all. I added benchmarks for strings.TrimSpace as it didn't have any, and I fleshed out the benchmarks for bytes.TrimSpace as it just had one case (for ASCII). The benchmarks (and the code!) are now the same between the two versions. Below are the benchmark results: strings.TrimSpace: name old time/op new time/op delta TrimSpace/NoTrim-8 18.6ns ± 0% 3.8ns ± 0% -79.53% (p=0.000 n=5+4) TrimSpace/ASCII-8 33.5ns ± 2% 6.0ns ± 3% -82.05% (p=0.008 n=5+5) TrimSpace/SomeNonASCII-8 97.1ns ± 1% 88.6ns ± 1% -8.68% (p=0.008 n=5+5) TrimSpace/JustNonASCII-8 144ns ± 0% 143ns ± 0% ~ (p=0.079 n=4+5) bytes.TrimSpace: name old time/op new time/op delta TrimSpace/NoTrim-8 18.9ns ± 1% 4.1ns ± 1% -78.34% (p=0.008 n=5+5) TrimSpace/ASCII-8 29.9ns ± 0% 6.3ns ± 1% -79.06% (p=0.008 n=5+5) TrimSpace/SomeNonASCII-8 91.5ns ± 0% 82.3ns ± 0% -10.03% (p=0.008 n=5+5) TrimSpace/JustNonASCII-8 150ns ± 0% 150ns ± 0% ~ (all equal) Fixes #29122 Change-Id: Ica45cd86a219cadf60173ec9db260133cd1d7951 Reviewed-on: https://go-review.googlesource.com/c/go/+/152917 Reviewed-by: Daniel Martí <[email protected]> Reviewed-by: Brad Fitzpatrick <[email protected]> Run-TryBot: Daniel Martí <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 10aede2 commit 4b4f222

File tree

4 files changed

+91
-5
lines changed

4 files changed

+91
-5
lines changed

src/bytes/bytes.go

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,36 @@ func TrimRight(s []byte, cutset string) []byte {
759759
// TrimSpace returns a subslice of s by slicing off all leading and
760760
// trailing white space, as defined by Unicode.
761761
func TrimSpace(s []byte) []byte {
762-
return TrimFunc(s, unicode.IsSpace)
762+
// Fast path for ASCII: look for the first ASCII non-space byte
763+
start := 0
764+
for ; start < len(s); start++ {
765+
c := s[start]
766+
if c >= utf8.RuneSelf {
767+
// If we run into a non-ASCII byte, fall back to the
768+
// slower unicode-aware method on the remaining bytes
769+
return TrimFunc(s[start:], unicode.IsSpace)
770+
}
771+
if asciiSpace[c] == 0 {
772+
break
773+
}
774+
}
775+
776+
// Now look for the first ASCII non-space byte from the end
777+
stop := len(s)
778+
for ; stop > start; stop-- {
779+
c := s[stop-1]
780+
if c >= utf8.RuneSelf {
781+
return TrimFunc(s[start:stop], unicode.IsSpace)
782+
}
783+
if asciiSpace[c] == 0 {
784+
break
785+
}
786+
}
787+
788+
// At this point s[start:stop] starts and ends with an ASCII
789+
// non-space bytes, so we're done. Non-ASCII cases have already
790+
// been handled above.
791+
return s[start:stop]
763792
}
764793

765794
// Runes interprets s as a sequence of UTF-8-encoded code points.

src/bytes/bytes_test.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1617,9 +1617,21 @@ func BenchmarkFieldsFunc(b *testing.B) {
16171617
}
16181618

16191619
func BenchmarkTrimSpace(b *testing.B) {
1620-
s := []byte(" Some text. \n")
1621-
for i := 0; i < b.N; i++ {
1622-
TrimSpace(s)
1620+
tests := []struct {
1621+
name string
1622+
input []byte
1623+
}{
1624+
{"NoTrim", []byte("typical")},
1625+
{"ASCII", []byte(" foo bar ")},
1626+
{"SomeNonASCII", []byte(" \u2000\t\r\n x\t\t\r\r\ny\n \u3000 ")},
1627+
{"JustNonASCII", []byte("\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000")},
1628+
}
1629+
for _, test := range tests {
1630+
b.Run(test.name, func(b *testing.B) {
1631+
for i := 0; i < b.N; i++ {
1632+
TrimSpace(test.input)
1633+
}
1634+
})
16231635
}
16241636
}
16251637

src/strings/strings.go

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -818,7 +818,36 @@ func TrimRight(s string, cutset string) string {
818818
// TrimSpace returns a slice of the string s, with all leading
819819
// and trailing white space removed, as defined by Unicode.
820820
func TrimSpace(s string) string {
821-
return TrimFunc(s, unicode.IsSpace)
821+
// Fast path for ASCII: look for the first ASCII non-space byte
822+
start := 0
823+
for ; start < len(s); start++ {
824+
c := s[start]
825+
if c >= utf8.RuneSelf {
826+
// If we run into a non-ASCII byte, fall back to the
827+
// slower unicode-aware method on the remaining bytes
828+
return TrimFunc(s[start:], unicode.IsSpace)
829+
}
830+
if asciiSpace[c] == 0 {
831+
break
832+
}
833+
}
834+
835+
// Now look for the first ASCII non-space byte from the end
836+
stop := len(s)
837+
for ; stop > start; stop-- {
838+
c := s[stop-1]
839+
if c >= utf8.RuneSelf {
840+
return TrimFunc(s[start:stop], unicode.IsSpace)
841+
}
842+
if asciiSpace[c] == 0 {
843+
break
844+
}
845+
}
846+
847+
// At this point s[start:stop] starts and ends with an ASCII
848+
// non-space bytes, so we're done. Non-ASCII cases have already
849+
// been handled above.
850+
return s[start:stop]
822851
}
823852

824853
// TrimPrefix returns s without the provided leading prefix string.

src/strings/strings_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,3 +1731,19 @@ func BenchmarkJoin(b *testing.B) {
17311731
})
17321732
}
17331733
}
1734+
1735+
func BenchmarkTrimSpace(b *testing.B) {
1736+
tests := []struct{ name, input string }{
1737+
{"NoTrim", "typical"},
1738+
{"ASCII", " foo bar "},
1739+
{"SomeNonASCII", " \u2000\t\r\n x\t\t\r\r\ny\n \u3000 "},
1740+
{"JustNonASCII", "\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000"},
1741+
}
1742+
for _, test := range tests {
1743+
b.Run(test.name, func(b *testing.B) {
1744+
for i := 0; i < b.N; i++ {
1745+
TrimSpace(test.input)
1746+
}
1747+
})
1748+
}
1749+
}

0 commit comments

Comments
 (0)