Skip to content

GODRIVER-2914 bson: improve marshal/unmarshal performance by ~58% and ~29% #1313

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 142 additions & 0 deletions bson/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@
package bson

import (
"bytes"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"path"
"sync"
"testing"
)

Expand Down Expand Up @@ -129,10 +132,20 @@ var nestedInstance = nestedtest1{

const extendedBSONDir = "../testdata/extended_bson"

var (
extJSONFiles map[string]map[string]interface{}
extJSONFilesMu sync.Mutex
)

// readExtJSONFile reads the GZIP-compressed extended JSON document from the given filename in the
// "extended BSON" test data directory (../testdata/extended_bson) and returns it as a
// map[string]interface{}. It panics on any errors.
func readExtJSONFile(filename string) map[string]interface{} {
extJSONFilesMu.Lock()
defer extJSONFilesMu.Unlock()
if v, ok := extJSONFiles[filename]; ok {
return v
}
filePath := path.Join(extendedBSONDir, filename)
file, err := os.Open(filePath)
if err != nil {
Expand Down Expand Up @@ -161,6 +174,10 @@ func readExtJSONFile(filename string) map[string]interface{} {
panic(fmt.Sprintf("error unmarshalling extended JSON: %s", err))
}

if extJSONFiles == nil {
extJSONFiles = make(map[string]map[string]interface{})
}
extJSONFiles[filename] = v
return v
}

Expand Down Expand Up @@ -305,3 +322,128 @@ func BenchmarkUnmarshal(b *testing.B) {
})
}
}

// The following benchmarks are copied from the Go standard library's
// encoding/json package.

type codeResponse struct {
Tree *codeNode `json:"tree"`
Username string `json:"username"`
}

type codeNode struct {
Name string `json:"name"`
Kids []*codeNode `json:"kids"`
CLWeight float64 `json:"cl_weight"`
Touches int `json:"touches"`
MinT int64 `json:"min_t"`
MaxT int64 `json:"max_t"`
MeanT int64 `json:"mean_t"`
}

var codeJSON []byte
var codeBSON []byte
var codeStruct codeResponse

func codeInit() {
f, err := os.Open("testdata/code.json.gz")
if err != nil {
panic(err)
}
defer f.Close()
gz, err := gzip.NewReader(f)
if err != nil {
panic(err)
}
data, err := io.ReadAll(gz)
if err != nil {
panic(err)
}

codeJSON = data

if err := json.Unmarshal(codeJSON, &codeStruct); err != nil {
panic("json.Unmarshal code.json: " + err.Error())
}

if data, err = json.Marshal(&codeStruct); err != nil {
panic("json.Marshal code.json: " + err.Error())
}

if codeBSON, err = Marshal(&codeStruct); err != nil {
panic("Marshal code.json: " + err.Error())
}

if !bytes.Equal(data, codeJSON) {
println("different lengths", len(data), len(codeJSON))
for i := 0; i < len(data) && i < len(codeJSON); i++ {
if data[i] != codeJSON[i] {
println("re-marshal: changed at byte", i)
println("orig: ", string(codeJSON[i-10:i+10]))
println("new: ", string(data[i-10:i+10]))
break
}
}
panic("re-marshal code.json: different result")
}
}

func BenchmarkCodeUnmarshal(b *testing.B) {
b.ReportAllocs()
if codeJSON == nil {
b.StopTimer()
codeInit()
b.StartTimer()
}
b.Run("BSON", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
var r codeResponse
if err := Unmarshal(codeBSON, &r); err != nil {
b.Fatal("Unmarshal:", err)
}
}
})
b.SetBytes(int64(len(codeBSON)))
})
b.Run("JSON", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
var r codeResponse
if err := json.Unmarshal(codeJSON, &r); err != nil {
b.Fatal("json.Unmarshal:", err)
}
}
})
b.SetBytes(int64(len(codeJSON)))
})
}

func BenchmarkCodeMarshal(b *testing.B) {
b.ReportAllocs()
if codeJSON == nil {
b.StopTimer()
codeInit()
b.StartTimer()
}
b.Run("BSON", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if _, err := Marshal(&codeStruct); err != nil {
b.Fatal("Marshal:", err)
}
}
})
b.SetBytes(int64(len(codeBSON)))
})
b.Run("JSON", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if _, err := json.Marshal(&codeStruct); err != nil {
b.Fatal("json.Marshal:", err)
}
}
})
b.SetBytes(int64(len(codeJSON)))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this use the byte slice returned by json.Marshal instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The output of json.Marshal and codeJSON are identical (we test for this when initializing these values):

if data, err = json.Marshal(&codeStruct); err != nil {
panic("json.Marshal code.json: " + err.Error())
}
if codeBSON, err = Marshal(&codeStruct); err != nil {
panic("Marshal code.json: " + err.Error())
}
if !bytes.Equal(data, codeJSON) {

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thanks for the update! Sounds good.

})
}
166 changes: 166 additions & 0 deletions bson/bsoncodec/codec_cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright (C) MongoDB, Inc. 2017-present.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0

package bsoncodec

import (
"reflect"
"sync"
"sync/atomic"
)

// Runtime check that the kind encoder and decoder caches can store any valid
// reflect.Kind constant.
func init() {
if s := reflect.Kind(len(kindEncoderCache{}.entries)).String(); s != "kind27" {
panic("The capacity of kindEncoderCache is too small.\n" +
"This is due to a new type being added to reflect.Kind.")
}
}

// statically assert array size
var _ = (kindEncoderCache{}).entries[reflect.UnsafePointer]
var _ = (kindDecoderCache{}).entries[reflect.UnsafePointer]

type typeEncoderCache struct {
cache sync.Map // map[reflect.Type]ValueEncoder
}

func (c *typeEncoderCache) Store(rt reflect.Type, enc ValueEncoder) {
c.cache.Store(rt, enc)
}

func (c *typeEncoderCache) Load(rt reflect.Type) (ValueEncoder, bool) {
if v, _ := c.cache.Load(rt); v != nil {
return v.(ValueEncoder), true
}
return nil, false
}

func (c *typeEncoderCache) LoadOrStore(rt reflect.Type, enc ValueEncoder) ValueEncoder {
if v, loaded := c.cache.LoadOrStore(rt, enc); loaded {
enc = v.(ValueEncoder)
}
return enc
}

func (c *typeEncoderCache) Clone() *typeEncoderCache {
cc := new(typeEncoderCache)
c.cache.Range(func(k, v interface{}) bool {
if k != nil && v != nil {
cc.cache.Store(k, v)
}
return true
})
return cc
}

type typeDecoderCache struct {
cache sync.Map // map[reflect.Type]ValueDecoder
}

func (c *typeDecoderCache) Store(rt reflect.Type, dec ValueDecoder) {
c.cache.Store(rt, dec)
}

func (c *typeDecoderCache) Load(rt reflect.Type) (ValueDecoder, bool) {
if v, _ := c.cache.Load(rt); v != nil {
return v.(ValueDecoder), true
}
return nil, false
}

func (c *typeDecoderCache) LoadOrStore(rt reflect.Type, dec ValueDecoder) ValueDecoder {
if v, loaded := c.cache.LoadOrStore(rt, dec); loaded {
dec = v.(ValueDecoder)
}
return dec
}

func (c *typeDecoderCache) Clone() *typeDecoderCache {
cc := new(typeDecoderCache)
c.cache.Range(func(k, v interface{}) bool {
if k != nil && v != nil {
cc.cache.Store(k, v)
}
return true
})
return cc
}

// atomic.Value requires that all calls to Store() have the same concrete type
// so we wrap the ValueEncoder with a kindEncoderCacheEntry to ensure the type
// is always the same (since different concrete types may implement the
// ValueEncoder interface).
type kindEncoderCacheEntry struct {
enc ValueEncoder
}

type kindEncoderCache struct {
entries [reflect.UnsafePointer + 1]atomic.Value // *kindEncoderCacheEntry
Copy link
Collaborator

@matthewdale matthewdale Jul 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using an array sized by reflect.UnsafePointer risks becoming a problem if a future version of Go ever introduces a reflect kind that is greater than UnsafePointer. It doesn't seem to offer a significant performance improvement compared to using a sync.Map, so we should use a sync.Map here instead.

That suggestion goes for kindDecoderCache as well.

Here's the benchmark comparison between using a [reflect.UnsafePointer + 1]atomic.Value (old.txt) and a sync.Map (new.txt):

goos: darwin
goarch: arm64
pkg: go.mongodb.org/mongo-driver/bson
                      │   old.txt   │              new.txt               │
                      │   sec/op    │   sec/op     vs base               │
CodeUnmarshal/BSON-10   2.492m ± 4%   2.430m ± 5%       ~ (p=0.912 n=10)
CodeUnmarshal/JSON-10   3.078m ± 2%   3.038m ± 3%       ~ (p=0.075 n=10)
CodeMarshal/BSON-10     1.342m ± 3%   1.322m ± 4%       ~ (p=0.353 n=10)
CodeMarshal/JSON-10     497.1µ ± 1%   499.5µ ± 1%       ~ (p=0.218 n=10)
geomean                 1.504m        1.486m       -1.20%

                      │   old.txt    │               new.txt               │
                      │     B/s      │     B/s       vs base               │
CodeUnmarshal/BSON-10   742.7Mi ± 4%   761.5Mi ± 5%       ~ (p=0.912 n=10)
CodeUnmarshal/JSON-10   601.2Mi ± 2%   609.2Mi ± 2%       ~ (p=0.075 n=10)
CodeMarshal/BSON-10     1.346Gi ± 3%   1.367Gi ± 4%       ~ (p=0.353 n=10)
CodeMarshal/JSON-10     3.635Gi ± 1%   3.618Gi ± 1%       ~ (p=0.218 n=10)
geomean                 1.202Gi        1.216Gi       +1.22%

                      │   old.txt    │               new.txt               │
                      │     B/op     │     B/op      vs base               │
CodeUnmarshal/BSON-10   4.219Mi ± 0%   4.219Mi ± 0%       ~ (p=0.644 n=10)
CodeUnmarshal/JSON-10   2.904Mi ± 0%   2.904Mi ± 0%       ~ (p=0.304 n=10)
CodeMarshal/BSON-10     2.720Mi ± 2%   2.769Mi ± 2%       ~ (p=0.063 n=10)
CodeMarshal/JSON-10     1.855Mi ± 1%   1.863Mi ± 0%       ~ (p=0.105 n=10)
geomean                 2.804Mi        2.820Mi       +0.55%

                      │   old.txt   │               new.txt                │
                      │  allocs/op  │  allocs/op   vs base                 │
CodeUnmarshal/BSON-10   230.4k ± 0%   230.4k ± 0%       ~ (p=1.000 n=10) ¹
CodeUnmarshal/JSON-10   92.67k ± 0%   92.67k ± 0%       ~ (p=1.000 n=10) ¹
CodeMarshal/BSON-10     94.07k ± 0%   94.07k ± 0%  +0.00% (p=0.033 n=10)
CodeMarshal/JSON-10      1.000 ± 0%    1.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                 6.694k        6.694k       +0.00%
¹ all samples are equal

Edit: Here's a gist with the changes used to get those results.

Copy link
Contributor Author

@charlievieth charlievieth Jul 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can go either way with this since we quickly store the result returned by the kind{Encoder,Decoder}Cache into Registry.type{Encoders,Decoders} (honestly, I wish we actually didn't need this and had a better way of handling lookupInterfaceEncoder).

That said, it is a lot faster when benchmarked in isolation and we do test against a new type being added, which shouldn't be possible until Go 2.0 and the last time reflect.Kind was modified it did not change the Kind value.

$ go test -run '^$' -bench 'BenchmarkKindEncoderCacheLoad' -v
goos: darwin
goarch: arm64
pkg: go.mongodb.org/mongo-driver/bson/bsoncodec
BenchmarkKindEncoderCacheLoad
BenchmarkKindEncoderCacheLoad/Array
BenchmarkKindEncoderCacheLoad/Array-10         	541177398	         2.033 ns/op
BenchmarkKindEncoderCacheLoad/SyncMap
BenchmarkKindEncoderCacheLoad/SyncMap-10       	82671672	        14.64 ns/op
BenchmarkKindEncoderCacheLoad/Map
BenchmarkKindEncoderCacheLoad/Map-10           	207793422	         5.779 ns/op
PASS
ok  	go.mongodb.org/mongo-driver/bson/bsoncodec	4.638s
func BenchmarkKindEncoderCacheLoad(b *testing.B) {
	m := new(sync.Map)
	x := make(map[reflect.Kind]ValueEncoder)
	c := new(kindEncoderCache)
	for k := reflect.Kind(0); k <= reflect.UnsafePointer; k++ {
		c.Store(k, fakeCodec{})
		m.Store(k, fakeCodec{})
		x[k] = fakeCodec{}
	}
	b.Run("Array", func(b *testing.B) {
		for i := 0; i < b.N; i++ {
			kind := reflect.Kind(i) % (reflect.UnsafePointer + 1)
			_, ok := c.Load(kind)
			if !ok {
				b.Fatal("missing:", kind)
			}
		}
	})
	b.Run("SyncMap", func(b *testing.B) {
		for i := 0; i < b.N; i++ {
			kind := reflect.Kind(i) % (reflect.UnsafePointer + 1)
			_, ok := m.Load(kind)
			if !ok {
				b.Fatal("missing:", kind)
			}
		}
	})
	// Map listed here because this is potentially read-only and could be represented as a plain map.
	b.Run("Map", func(b *testing.B) {
		for i := 0; i < b.N; i++ {
			kind := reflect.Kind(i) % (reflect.UnsafePointer + 1)
			_, ok := x[kind]
			if !ok {
				b.Fatal("wat")
			}
		}
	})
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, [...]sync.Atomic definitely benchmarks faster in isolation, and it does seem unlikely that Go 1.x will add new Kinds anytime soon. I also see the test now. I'm fine keeping it as-is.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My concern is that a user uses an old driver against a new Go with a reflect kind greater than UnsafePointer. A user can hardly be cautious of this issue. I would suggest using a sync.Map as long as it performs better than the current implementation. Otherwise, we can move the capacity check into an init() in "codec_cache.go", like:

func init() {
	rt := reflect.UnsafePointer
	s := rt.String()
	if !strings.Contains(s, strconv.Itoa(int(rt))) {
		panic("the capacity of kindEncoderCache is too small")
	}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed with: 924008c

}

func (c *kindEncoderCache) Store(rt reflect.Kind, enc ValueEncoder) {
if enc != nil && rt < reflect.Kind(len(c.entries)) {
c.entries[rt].Store(&kindEncoderCacheEntry{enc: enc})
}
}

func (c *kindEncoderCache) Load(rt reflect.Kind) (ValueEncoder, bool) {
if rt < reflect.Kind(len(c.entries)) {
if ent, ok := c.entries[rt].Load().(*kindEncoderCacheEntry); ok {
return ent.enc, ent.enc != nil
}
}
return nil, false
}

func (c *kindEncoderCache) Clone() *kindEncoderCache {
cc := new(kindEncoderCache)
for i, v := range c.entries {
if val := v.Load(); val != nil {
cc.entries[i].Store(val)
}
}
return cc
}

// atomic.Value requires that all calls to Store() have the same concrete type
// so we wrap the ValueDecoder with a kindDecoderCacheEntry to ensure the type
// is always the same (since different concrete types may implement the
// ValueDecoder interface).
type kindDecoderCacheEntry struct {
dec ValueDecoder
}

type kindDecoderCache struct {
entries [reflect.UnsafePointer + 1]atomic.Value // *kindDecoderCacheEntry
}

func (c *kindDecoderCache) Store(rt reflect.Kind, dec ValueDecoder) {
if rt < reflect.Kind(len(c.entries)) {
c.entries[rt].Store(&kindDecoderCacheEntry{dec: dec})
}
}

func (c *kindDecoderCache) Load(rt reflect.Kind) (ValueDecoder, bool) {
if rt < reflect.Kind(len(c.entries)) {
if ent, ok := c.entries[rt].Load().(*kindDecoderCacheEntry); ok {
return ent.dec, ent.dec != nil
}
}
return nil, false
}

func (c *kindDecoderCache) Clone() *kindDecoderCache {
cc := new(kindDecoderCache)
for i, v := range c.entries {
if val := v.Load(); val != nil {
cc.entries[i].Store(val)
}
}
return cc
}
Loading