Skip to content

cmd/compile: performance of slice append is optimizable #36405

Closed
@lhonglwl

Description

@lhonglwl

What version of Go are you using (go version)?

$ go version
go version go1.13.5 linux/amd64

What did you do?

package main

import "testing"

func BenchmarkAppend(b *testing.B) {
    buf1 := make([]byte, 3, 10)
    buf1[0] = 0
    buf1[1] = 1
    buf1[2] = 2
    buf2 := []byte{'a', 'b', 'c', 'd', 'e', 'f', 'g'}
    for i := 0; i < b.N; i++ {
        buf1 = buf1[:3]
        buf1 = append(buf1, buf2...)
    }
}

func BenchmarkAppend2(b *testing.B) {
    buf1 := make([]byte, 3, 10)
    buf1[0] = 0
    buf1[1] = 1
    buf1[2] = 2
    buf2 := buf1[3:10]
    buf2[0] = 'a'
    buf2[1] = 'b'
    buf2[2] = 'c'
    buf2[3] = 'd'
    buf2[4] = 'e'
    buf2[5] = 'f'
    buf2[6] = 'g'
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        buf1 = buf1[:3]
        buf1 = append(buf1, buf2...)
    }
}

func BenchmarkAppend3(b *testing.B) {
    buf1 := make([]byte, 3, 10)
    buf1[0] = 0
    buf1[1] = 1
    buf1[2] = 2
    for i := 0; i < b.N; i++ {
        buf2 := []byte{'a', 'b', 'c', 'd', 'e', 'f', 'g'}
        buf1 = buf1[:3]
        buf1 = append(buf1, buf2...)
    }
}

func BenchmarkAppend4(b *testing.B) {
    buf1 := make([]byte, 3, 10)
    buf1[0] = 0
    buf1[1] = 1
    buf1[2] = 2
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        buf2 := buf1[3:10]
        buf2[0] = 'a'
        buf2[1] = 'b'
        buf2[2] = 'c'
        buf2[3] = 'd'
        buf2[4] = 'e'
        buf2[5] = 'f'
        buf2[6] = 'g'
        buf1 = buf1[:3]
        buf1 = append(buf1, buf2...)
    }
}

output:

BenchmarkAppend-8       1000000000               0.879 ns/op
BenchmarkAppend2-8      193146717                6.20 ns/op
BenchmarkAppend3-8      172225722                7.04 ns/op
BenchmarkAppend4-8      170996562                6.96 ns/op

What did you expect to see?

I expected BenchmarkAppend2 would be obviously much fastest than BenchmarkAppend as the memory of buf1 and buf2 is continuous. append operator would not has actually memmove in BenchmarkAppend2.

But the benchmark says BenchmarkAppend2 cost much more time than BenchmarkAppend, just as it has malloc memory for every loop because it cost almost equal to BenchmarkAppend3 and BenchmarkAppend3

I wonder it's there has something wrong or bug in append for memory copy, or will it be fix/optimize in future?

Metadata

Metadata

Assignees

No one assigned

    Labels

    FrozenDueToAgeWaitingForInfoIssue is not actionable because of missing required information, which needs to be provided.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions