Skip to content

Commit fd82718

Browse files
committed
internal/bytealg: correct alignment checks for compare/memequal on riscv64
On riscv64 we need 8 byte alignment for 8 byte loads - the existing check was only ensuring 4 byte alignment, which potentially results in unaligned loads being performed. Unaligned loads incur a significant performance penality due to the resulting kernel traps and fix ups. Adjust BenchmarkCompareBytesBigUnaligned so that this issue would have been more readily visible. Updates #50615 name old time/op new time/op delta CompareBytesBigUnaligned/offset=1-4 6.98ms _ 5% 6.84ms _ 3% ~ (p=0.319 n=5+5) CompareBytesBigUnaligned/offset=2-4 6.75ms _ 1% 6.99ms _ 4% ~ (p=0.063 n=5+5) CompareBytesBigUnaligned/offset=3-4 6.84ms _ 1% 6.74ms _ 1% -1.48% (p=0.003 n=5+5) CompareBytesBigUnaligned/offset=4-4 146ms _ 1% 7ms _ 6% -95.08% (p=0.000 n=5+5) CompareBytesBigUnaligned/offset=5-4 7.05ms _ 5% 6.75ms _ 1% ~ (p=0.079 n=5+5) CompareBytesBigUnaligned/offset=6-4 7.11ms _ 5% 6.89ms _ 5% ~ (p=0.177 n=5+5) CompareBytesBigUnaligned/offset=7-4 7.14ms _ 5% 6.91ms _ 6% ~ (p=0.165 n=5+5) name old speed new speed delta CompareBytesBigUnaligned/offset=1-4 150MB/s _ 5% 153MB/s _ 3% ~ (p=0.336 n=5+5) CompareBytesBigUnaligned/offset=2-4 155MB/s _ 1% 150MB/s _ 4% ~ (p=0.058 n=5+5) CompareBytesBigUnaligned/offset=3-4 153MB/s _ 1% 156MB/s _ 1% +1.51% (p=0.004 n=5+5) CompareBytesBigUnaligned/offset=4-4 7.16MB/s _ 1% 145.79MB/s _ 6% +1936.23% (p=0.000 n=5+5) CompareBytesBigUnaligned/offset=5-4 149MB/s _ 5% 155MB/s _ 1% ~ (p=0.078 n=5+5) CompareBytesBigUnaligned/offset=6-4 148MB/s _ 5% 152MB/s _ 5% ~ (p=0.175 n=5+5) CompareBytesBigUnaligned/offset=7-4 147MB/s _ 5% 152MB/s _ 6% ~ (p=0.160 n=5+5) Change-Id: I2c859e061919db482318ce63b85b808aa973a9ba Reviewed-on: https://go-review.googlesource.com/c/go/+/431099 Reviewed-by: Meng Zhuo <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Run-TryBot: Joel Sing <[email protected]> Reviewed-by: Bryan Mills <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent ceffdc8 commit fd82718

File tree

3 files changed

+17
-8
lines changed

3 files changed

+17
-8
lines changed

src/bytes/compare_test.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package bytes_test
66

77
import (
88
. "bytes"
9+
"fmt"
910
"internal/testenv"
1011
"testing"
1112
)
@@ -213,22 +214,30 @@ func BenchmarkCompareBytesDifferentLength(b *testing.B) {
213214
}
214215
}
215216

216-
func BenchmarkCompareBytesBigUnaligned(b *testing.B) {
217+
func benchmarkCompareBytesBigUnaligned(b *testing.B, offset int) {
217218
b.StopTimer()
218219
b1 := make([]byte, 0, 1<<20)
219220
for len(b1) < 1<<20 {
220221
b1 = append(b1, "Hello Gophers!"...)
221222
}
222-
b2 := append([]byte("hello"), b1...)
223+
b2 := append([]byte("12345678")[:offset], b1...)
223224
b.StartTimer()
224-
for i := 0; i < b.N; i++ {
225-
if Compare(b1, b2[len("hello"):]) != 0 {
225+
for j := 0; j < b.N; j++ {
226+
if Compare(b1, b2[offset:]) != 0 {
226227
b.Fatal("b1 != b2")
227228
}
228229
}
229230
b.SetBytes(int64(len(b1)))
230231
}
231232

233+
func BenchmarkCompareBytesBigUnaligned(b *testing.B) {
234+
for i := 1; i < 8; i++ {
235+
b.Run(fmt.Sprintf("offset=%d", i), func(b *testing.B) {
236+
benchmarkCompareBytesBigUnaligned(b, i)
237+
})
238+
}
239+
}
240+
232241
func BenchmarkCompareBytesBig(b *testing.B) {
233242
b.StopTimer()
234243
b1 := make([]byte, 0, 1<<20)

src/internal/bytealg/compare_riscv64.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ use_a_len:
5858
BLT X5, X6, loop4_check
5959

6060
// Check alignment - if alignment differs we have to do one byte at a time.
61-
AND $3, X10, X7
62-
AND $3, X12, X8
61+
AND $7, X10, X7
62+
AND $7, X12, X8
6363
BNE X7, X8, loop4_check
6464
BEQZ X7, loop32_check
6565

src/internal/bytealg/equal_riscv64.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0
4242
BLT X12, X23, loop4_check
4343

4444
// Check alignment - if alignment differs we have to do one byte at a time.
45-
AND $3, X10, X9
46-
AND $3, X11, X19
45+
AND $7, X10, X9
46+
AND $7, X11, X19
4747
BNE X9, X19, loop4_check
4848
BEQZ X9, loop32_check
4949

0 commit comments

Comments
 (0)