Skip to content

feat: add ChecksumAlgorithm option to decides which algorithm calculate checksums. #2197

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
27 changes: 26 additions & 1 deletion badger/cmd/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package cmd
import (
"bytes"
"encoding/hex"
"errors"
"fmt"
"io/fs"
"os"
Expand All @@ -21,6 +22,7 @@ import (

"github.com/dgraph-io/badger/v4"
"github.com/dgraph-io/badger/v4/options"
"github.com/dgraph-io/badger/v4/pb"
"github.com/dgraph-io/badger/v4/table"
"github.com/dgraph-io/badger/v4/y"
)
Expand All @@ -40,10 +42,14 @@ type flagOptions struct {
checksumVerificationMode string
discard bool
externalMagicVersion uint16
checksumAlgorithm string
}

var (
opt flagOptions

// errInvalidChecksumAlgorithm is returned if the checksum algorithm is invalid.
errInvalidChecksumAlgorithm = errors.New("Invalid checksum algorithm. Supported values: crc32c, xxhash64.")
)

func init() {
Expand All @@ -69,6 +75,8 @@ func init() {
infoCmd.Flags().StringVar(&opt.encryptionKey, "enc-key", "", "Use the provided encryption key")
infoCmd.Flags().StringVar(&opt.checksumVerificationMode, "cv-mode", "none",
"[none, table, block, tableAndBlock] Specifies when the db should verify checksum for SST.")
infoCmd.Flags().StringVar(&opt.checksumAlgorithm, "ct", "crc32c", "[crc32c,xxhash64] "+
"Specifies the checksum algorithm for SST.")
infoCmd.Flags().BoolVar(&opt.discard, "discard", false,
"Parse and print DISCARD file from value logs.")
infoCmd.Flags().Uint16Var(&opt.externalMagicVersion, "external-magic", 0,
Expand All @@ -89,14 +97,18 @@ to the Dgraph team.

func handleInfo(cmd *cobra.Command, args []string) error {
cvMode := checksumVerificationMode(opt.checksumVerificationMode)
ct, err := strToChecksumAlgorithm(opt.checksumAlgorithm)
y.Check(err)

bopt := badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithReadOnly(opt.readOnly).
WithBlockCacheSize(100 << 20).
WithIndexCacheSize(200 << 20).
WithEncryptionKey([]byte(opt.encryptionKey)).
WithChecksumVerificationMode(cvMode).
WithExternalMagic(opt.externalMagicVersion)
WithExternalMagic(opt.externalMagicVersion).
WithChecksumAlgorithm(ct)

if opt.discard {
ds, err := badger.InitDiscardStats(bopt)
Expand Down Expand Up @@ -515,6 +527,19 @@ func pluralFiles(count int) string {
return "files"
}

// When the checkSum Algorithm is invalid, func strToChecksumAlgorithm will return the default checkSum Algorithm
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is benefit of returning the default checksum when we are returning the error along with it? I think we should return default checksum with nil error

func strToChecksumAlgorithm(ct string) (pb.Checksum_Algorithm, error) {
switch ct {
case "crc32c":
return pb.Checksum_CRC32C, nil
case "xxhash64":
return pb.Checksum_XXHash64, nil
default:
return pb.Checksum_CRC32C, y.Wrap(errInvalidChecksumAlgorithm,
"InvalidChecksumAlgorithm")
}
}

func checksumVerificationMode(cvMode string) options.ChecksumVerificationMode {
switch cvMode {
case "none":
Expand Down
40 changes: 40 additions & 0 deletions badger/cmd/info_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cmd

import (
"fmt"
"testing"

"github.com/dgraph-io/badger/v4/pb"
"github.com/stretchr/testify/require"
)

func TestStrToChecksumAlgorithm(t *testing.T) {
ctCRC32, err := strToChecksumAlgorithm("crc32c")
require.True(t, ctCRC32 == pb.Checksum_CRC32C)
require.True(t, err == nil)

ctHash, err := strToChecksumAlgorithm("xxhash64")
require.True(t, ctHash == pb.Checksum_XXHash64)
require.True(t, err == nil)

ctOthers, err := strToChecksumAlgorithm("others")
fmt.Println(err)
require.True(t, ctOthers == pb.Checksum_CRC32C)
require.True(t, err != nil)
}
16 changes: 16 additions & 0 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"time"

"github.com/dgraph-io/badger/v4/options"
"github.com/dgraph-io/badger/v4/pb"
"github.com/dgraph-io/badger/v4/table"
"github.com/dgraph-io/badger/v4/y"
"github.com/dgraph-io/ristretto/v2/z"
Expand Down Expand Up @@ -93,6 +94,9 @@ type Options struct {
// ChecksumVerificationMode decides when db should verify checksums for SSTable blocks.
ChecksumVerificationMode options.ChecksumVerificationMode

// ChecksumAlgorithm decides which algorithm calculate checksums
ChecksumAlgorithm pb.Checksum_Algorithm

// DetectConflicts determines whether the transactions would be checked for
// conflicts. The transactions can be processed at a higher rate when
// conflict detection is disabled.
Expand Down Expand Up @@ -174,6 +178,7 @@ func DefaultOptions(path string) Options {
EncryptionKeyRotationDuration: 10 * 24 * time.Hour, // Default 10 days.
DetectConflicts: true,
NamespaceOffset: -1,
ChecksumAlgorithm: pb.Checksum_CRC32C,
}
}

Expand All @@ -188,6 +193,7 @@ func buildTableOptions(db *DB) table.Options {
BlockSize: opt.BlockSize,
BloomFalsePositive: opt.BloomFalsePositive,
ChkMode: opt.ChecksumVerificationMode,
ChkAlgo: opt.ChecksumAlgorithm,
Compression: opt.Compression,
ZSTDCompressionLevel: opt.ZSTDCompressionLevel,
BlockCache: db.blockCache,
Expand Down Expand Up @@ -669,6 +675,16 @@ func (opt Options) WithChecksumVerificationMode(cvMode options.ChecksumVerificat
return opt
}

// WithChecksumAlgorithm return a new Options value with ChecksumAlgorithm set to the given value
//
// ChecksumAlgorithm decides which algorithm calculate checksums.
//
// The default value of ChecksumAlgorithm is pb.Checksum_CRC32C.
func (opt Options) WithChecksumAlgorithm(ct pb.Checksum_Algorithm) Options {
opt.ChecksumAlgorithm = ct
return opt
}

// WithBlockCacheSize returns a new Options value with BlockCacheSize set to the given value.
//
// This value specifies how much data cache should hold in memory. A small size
Expand Down
13 changes: 6 additions & 7 deletions table/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ func (b *Builder) finishBlock() {
b.append(y.U32SliceToBytes(b.curBlock.entryOffsets))
b.append(y.U32ToBytes(uint32(len(b.curBlock.entryOffsets))))

checksum := b.calculateChecksum(b.curBlock.data[:b.curBlock.end])
checksum := b.calculateChecksum(b.curBlock.data[:b.curBlock.end], b.opts.ChkAlgo)

// Append the block checksum and its length.
b.append(checksum)
Expand Down Expand Up @@ -443,27 +443,26 @@ func (b *Builder) Done() buildData {
index, err = b.encrypt(index)
y.Check(err)
}
checksum := b.calculateChecksum(index)
checksum := b.calculateChecksum(index, b.opts.ChkAlgo)

bd.index = index
bd.checksum = checksum
bd.Size = int(dataSize) + len(index) + len(checksum) + 4 + 4
return bd
}

func (b *Builder) calculateChecksum(data []byte) []byte {
func (b *Builder) calculateChecksum(data []byte, ct pb.Checksum_Algorithm) []byte {
// Build checksum for the index.
checksum := pb.Checksum{
// TODO: The checksum type should be configurable from the
// options.

// We chose to use CRC32 as the default option because
// it performed better compared to xxHash64.
// See the BenchmarkChecksum in table_test.go file
// Size => 1024 B 2048 B
// CRC32 => 63.7 ns/op 112 ns/op
// xxHash64 => 87.5 ns/op 158 ns/op
Sum: y.CalculateChecksum(data, pb.Checksum_CRC32C),
Algo: pb.Checksum_CRC32C,
Sum: y.CalculateChecksum(data, ct),
Algo: ct,
}

// Write checksum to the file.
Expand Down
3 changes: 3 additions & 0 deletions table/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ type Options struct {
// ChkMode is the checksum verification mode for Table.
ChkMode options.ChecksumVerificationMode

// ChkAlgo is the checksum algorithm mode for Table.
ChkAlgo pb.Checksum_Algorithm

// Options for Table builder.

// BloomFalsePositive is the false positive probabiltiy of bloom filter.
Expand Down
Loading