From d5859be1504d272a59c40794ed6e89114ec7b16e Mon Sep 17 00:00:00 2001 From: yuanchao Date: Wed, 26 Aug 2020 23:12:15 +0800 Subject: [PATCH 1/4] shard fingerprints Signed-off-by: yuanchao --- pkg/ingester/index/index.go | 96 ++++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 23 deletions(-) diff --git a/pkg/ingester/index/index.go b/pkg/ingester/index/index.go index 001dd1a817a..509dbbeb29d 100644 --- a/pkg/ingester/index/index.go +++ b/pkg/ingester/index/index.go @@ -1,6 +1,7 @@ package index import ( + "math" "sort" "sync" "unsafe" @@ -88,12 +89,68 @@ func (ii *InvertedIndex) Delete(labels labels.Labels, fp model.Fingerprint) { // NB slice entries are sorted in fp order. type indexEntry struct { name string - fps map[string]indexValueEntry + fps map[string]*indexValueEntry } type indexValueEntry struct { value string - fps []model.Fingerprint + shards [][]model.Fingerprint +} + + +const indexValueShards = 200 + +func newIndexValueEntry(value string) *indexValueEntry { + shards := make([][]model.Fingerprint, indexValueShards) + return &indexValueEntry{ + value: value, + shards: shards, + } +} + +func (c *indexValueEntry) fps() []model.Fingerprint { + var fps []model.Fingerprint + for _, shard := range c.shards { + fps = append(fps, shard...) + } + return fps +} + +func (c *indexValueEntry) delete(fp model.Fingerprint){ + num := c.shard(fp) + fps := c.shards[num] + j := sort.Search(len(fps), func(i int) bool { + return fps[i] >= fp + }) + c.shards[num] = fps[:j+copy(fps[j:], fps[j+1:])] + if len(c.shards[num]) == 0 { + c.shards[num] = []model.Fingerprint{} + } +} + +func (c *indexValueEntry) length() int { + var i int + for _, shard := range c.shards { + i += len(shard) + } + return i +} + +func (c *indexValueEntry) shard(fp model.Fingerprint) int { + return int(math.Floor(float64(len(c.shards)) * float64(fp) / math.MaxUint64)) +} + +func (c *indexValueEntry) add(fp model.Fingerprint){ + num := c.shard(fp) + fps := c.shards[num] + // Insert into the right position to keep fingerprints sorted + j := sort.Search(len(fps), func(i int) bool { + return fps[i] >= fp + }) + fps = append(fps, 0) + copy(fps[j+1:], fps[j:]) + fps[j] = fp + c.shards[num] = fps } type unlockIndex map[string]indexEntry @@ -126,23 +183,15 @@ func (shard *indexShard) add(metric []client.LabelAdapter, fp model.Fingerprint) if !ok { values = indexEntry{ name: copyString(pair.Name), - fps: map[string]indexValueEntry{}, + fps: map[string]*indexValueEntry{}, } shard.idx[values.name] = values } fingerprints, ok := values.fps[pair.Value] if !ok { - fingerprints = indexValueEntry{ - value: copyString(pair.Value), - } + fingerprints = newIndexValueEntry(copyString(pair.Value)) } - // Insert into the right position to keep fingerprints sorted - j := sort.Search(len(fingerprints.fps), func(i int) bool { - return fingerprints.fps[i] >= fp - }) - fingerprints.fps = append(fingerprints.fps, 0) - copy(fingerprints.fps[j+1:], fingerprints.fps[j:]) - fingerprints.fps[j] = fp + fingerprints.add(fp) values.fps[fingerprints.value] = fingerprints internedLabels[i] = labels.Label{Name: values.name, Value: fingerprints.value} } @@ -167,13 +216,18 @@ func (shard *indexShard) lookup(matchers []*labels.Matcher) []model.Fingerprint } var toIntersect model.Fingerprints if matcher.Type == labels.MatchEqual { - fps := values.fps[matcher.Value] - toIntersect = append(toIntersect, fps.fps...) // deliberate copy + fps, ok := values.fps[matcher.Value] + if ok { + toIntersect = append(toIntersect, fps.fps()...) // deliberate copy + } } else if matcher.Type == labels.MatchRegexp && len(chunk.FindSetMatches(matcher.Value)) > 0 { // The lookup is of the form `=~"a|b|c|d"` set := chunk.FindSetMatches(matcher.Value) for _, value := range set { - toIntersect = append(toIntersect, values.fps[value].fps...) + fps, ok := values.fps[value] + if ok { + toIntersect = append(toIntersect, fps.fps()...) + } } sort.Sort(toIntersect) } else { @@ -181,7 +235,7 @@ func (shard *indexShard) lookup(matchers []*labels.Matcher) []model.Fingerprint // then sort to maintain the invariant for value, fps := range values.fps { if matcher.Matches(value) { - toIntersect = append(toIntersect, fps.fps...) + toIntersect = append(toIntersect, fps.fps()...) } } sort.Sort(toIntersect) @@ -240,13 +294,9 @@ func (shard *indexShard) delete(labels labels.Labels, fp model.Fingerprint) { if !ok { continue } + fingerprints.delete(fp) - j := sort.Search(len(fingerprints.fps), func(i int) bool { - return fingerprints.fps[i] >= fp - }) - fingerprints.fps = fingerprints.fps[:j+copy(fingerprints.fps[j:], fingerprints.fps[j+1:])] - - if len(fingerprints.fps) == 0 { + if fingerprints.length() == 0 { delete(values.fps, value) } else { values.fps[value] = fingerprints From b1fad7e9045d7031b754fd67599d9d9cf88360be Mon Sep 17 00:00:00 2001 From: yuanchao Date: Thu, 27 Aug 2020 01:27:51 +0800 Subject: [PATCH 2/4] lint Signed-off-by: yuanchao --- pkg/ingester/index/index.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pkg/ingester/index/index.go b/pkg/ingester/index/index.go index 509dbbeb29d..3540f3ba2ad 100644 --- a/pkg/ingester/index/index.go +++ b/pkg/ingester/index/index.go @@ -93,11 +93,10 @@ type indexEntry struct { } type indexValueEntry struct { - value string + value string shards [][]model.Fingerprint } - const indexValueShards = 200 func newIndexValueEntry(value string) *indexValueEntry { @@ -116,7 +115,7 @@ func (c *indexValueEntry) fps() []model.Fingerprint { return fps } -func (c *indexValueEntry) delete(fp model.Fingerprint){ +func (c *indexValueEntry) delete(fp model.Fingerprint) { num := c.shard(fp) fps := c.shards[num] j := sort.Search(len(fps), func(i int) bool { @@ -140,7 +139,7 @@ func (c *indexValueEntry) shard(fp model.Fingerprint) int { return int(math.Floor(float64(len(c.shards)) * float64(fp) / math.MaxUint64)) } -func (c *indexValueEntry) add(fp model.Fingerprint){ +func (c *indexValueEntry) add(fp model.Fingerprint) { num := c.shard(fp) fps := c.shards[num] // Insert into the right position to keep fingerprints sorted From ad7eaa58cbc5472235af235bbf33004d905da4b8 Mon Sep 17 00:00:00 2001 From: storyicon Date: Thu, 27 Aug 2020 11:48:28 +0800 Subject: [PATCH 3/4] add unit test Signed-off-by: storyicon --- pkg/ingester/index/index_test.go | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pkg/ingester/index/index_test.go b/pkg/ingester/index/index_test.go index d2d5ffe745e..1fbb4a9a43e 100644 --- a/pkg/ingester/index/index_test.go +++ b/pkg/ingester/index/index_test.go @@ -2,6 +2,8 @@ package index import ( "fmt" + "math/rand" + "sort" "strconv" "strings" "testing" @@ -134,3 +136,43 @@ func mustParseMatcher(s string) []*labels.Matcher { } return ms } + +func TestIndexValueEntry(t *testing.T) { + const size = 100 + c := newIndexValueEntry("value") + var fps []model.Fingerprint + for i := 0; i < size; i++ { + fingerprint := model.Fingerprint(rand.Uint64()) + + // add element into fps + j := sort.Search(len(fps), func(i int) bool { + return fps[i] >= fingerprint + }) + fps = append(fps, 0) + copy(fps[j+1:], fps[j:]) + fps[j] = fingerprint + + c.add(fingerprint) + + assert.Equal(t, fps, c.fps()) + assert.Equal(t, len(c.fps()), c.length()) + } + + for len(fps) > 0 { + fp := fps[rand.Intn(len(fps))] + + // delete element in fps + j := sort.Search(len(fps), func(i int) bool { + return fps[i] >= fp + }) + fps = fps[:j+copy(fps[j:], fps[j+1:])] + + c.delete(fp) + + if len(fps) == 0 { + fps = nil + } + + assert.Equal(t, fps, c.fps()) + } +} From 401aeb83ea43a5af70d1c3aceb38ff3d89ff693f Mon Sep 17 00:00:00 2001 From: yuanchao Date: Sat, 29 Aug 2020 00:15:56 +0800 Subject: [PATCH 4/4] some modifications && benchmark Signed-off-by: yuanchao --- pkg/ingester/index/index.go | 20 ++++++++++------ pkg/ingester/index/index_test.go | 41 ++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/pkg/ingester/index/index.go b/pkg/ingester/index/index.go index 3540f3ba2ad..f39c3799feb 100644 --- a/pkg/ingester/index/index.go +++ b/pkg/ingester/index/index.go @@ -95,6 +95,7 @@ type indexEntry struct { type indexValueEntry struct { value string shards [][]model.Fingerprint + len int } const indexValueShards = 200 @@ -108,7 +109,7 @@ func newIndexValueEntry(value string) *indexValueEntry { } func (c *indexValueEntry) fps() []model.Fingerprint { - var fps []model.Fingerprint + fps := make([]model.Fingerprint, 0, c.length()) for _, shard := range c.shards { fps = append(fps, shard...) } @@ -121,22 +122,26 @@ func (c *indexValueEntry) delete(fp model.Fingerprint) { j := sort.Search(len(fps), func(i int) bool { return fps[i] >= fp }) + if len(fps) == j { + return + } c.shards[num] = fps[:j+copy(fps[j:], fps[j+1:])] if len(c.shards[num]) == 0 { c.shards[num] = []model.Fingerprint{} } + c.len-- } func (c *indexValueEntry) length() int { - var i int - for _, shard := range c.shards { - i += len(shard) - } - return i + return c.len } func (c *indexValueEntry) shard(fp model.Fingerprint) int { - return int(math.Floor(float64(len(c.shards)) * float64(fp) / math.MaxUint64)) + n := int(math.Floor(float64(len(c.shards)) * float64(fp) / math.MaxUint64)) + if n == len(c.shards) { + n = len(c.shards) - 1 + } + return n } func (c *indexValueEntry) add(fp model.Fingerprint) { @@ -150,6 +155,7 @@ func (c *indexValueEntry) add(fp model.Fingerprint) { copy(fps[j+1:], fps[j:]) fps[j] = fp c.shards[num] = fps + c.len++ } type unlockIndex map[string]indexEntry diff --git a/pkg/ingester/index/index_test.go b/pkg/ingester/index/index_test.go index 1fbb4a9a43e..901183631f4 100644 --- a/pkg/ingester/index/index_test.go +++ b/pkg/ingester/index/index_test.go @@ -137,6 +137,43 @@ func mustParseMatcher(s string) []*labels.Matcher { return ms } +func BenchmarkIndexValueEntry_Add(b *testing.B) { + var fingerprintsGen = func(size int) func(i int) model.Fingerprint { + fps := make([]model.Fingerprint, size) + for i := 0; i < size; i++ { + fps[i] = model.Fingerprint(rand.Uint64()) + } + return func(i int) model.Fingerprint { + if i >= size { + i = i % size + } + return fps[i] + } + } + getFingerprints := fingerprintsGen(100000) + + c := newIndexValueEntry("") + b.Run("shard", func(b *testing.B) { + for i := 0; i < b.N; i++ { + c.add(getFingerprints(i)) + } + }) + + b.Run("plain", func(b *testing.B) { + var fps []model.Fingerprint + for i := 0; i < b.N; i++ { + fingerprint := getFingerprints(i) + j := sort.Search(len(fps), func(i int) bool { + return fps[i] >= fingerprint + }) + fps = append(fps, 0) + copy(fps[j+1:], fps[j:]) + fps[j] = fingerprint + } + }) + +} + func TestIndexValueEntry(t *testing.T) { const size = 100 c := newIndexValueEntry("value") @@ -169,10 +206,6 @@ func TestIndexValueEntry(t *testing.T) { c.delete(fp) - if len(fps) == 0 { - fps = nil - } - assert.Equal(t, fps, c.fps()) } }