From 5e618a8d0328cbc19d9d592970a6b61fc3a61126 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Thu, 16 Apr 2020 16:27:04 +0100 Subject: [PATCH 1/3] Regex set optimisation when looking series in ingester Similar to https://github.com/cortexproject/cortex/pull/2446/ Super useful for templated grafana dashboards which send matchers such as =~"a|b|c|d|" Signed-off-by: Goutham Veeramachaneni --- pkg/chunk/chunk_store.go | 4 ++-- pkg/chunk/opts.go | 4 ++-- pkg/chunk/opts_test.go | 2 +- pkg/ingester/index/index.go | 8 ++++++++ pkg/ingester/index/index_test.go | 12 ++++++++++++ 5 files changed, 25 insertions(+), 5 deletions(-) diff --git a/pkg/chunk/chunk_store.go b/pkg/chunk/chunk_store.go index 02cc7da879b..6e904f6d85d 100644 --- a/pkg/chunk/chunk_store.go +++ b/pkg/chunk/chunk_store.go @@ -441,8 +441,8 @@ func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, thro } else if matcher.Type == labels.MatchEqual { labelName = matcher.Name queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value) - } else if matcher.Type == labels.MatchRegexp && len(findSetMatches(matcher.Value)) > 0 { - set := findSetMatches(matcher.Value) + } else if matcher.Type == labels.MatchRegexp && len(FindSetMatches(matcher.Value)) > 0 { + set := FindSetMatches(matcher.Value) for _, v := range set { var qs []IndexQuery qs, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, v) diff --git a/pkg/chunk/opts.go b/pkg/chunk/opts.go index 10d07b012f7..3384ecd7a43 100644 --- a/pkg/chunk/opts.go +++ b/pkg/chunk/opts.go @@ -19,9 +19,9 @@ func init() { } } +// FindSetMatches returns list of values that can be equality matched on. // copied from Prometheus querier.go, removed check for Prometheus wrapper. -// Returns list of values that can regex matches. -func findSetMatches(pattern string) []string { +func FindSetMatches(pattern string) []string { escaped := false sets := []*strings.Builder{{}} for i := 0; i < len(pattern); i++ { diff --git a/pkg/chunk/opts_test.go b/pkg/chunk/opts_test.go index 282049f6696..01d05407e6d 100644 --- a/pkg/chunk/opts_test.go +++ b/pkg/chunk/opts_test.go @@ -44,7 +44,7 @@ func TestFindSetMatches(t *testing.T) { } for _, c := range cases { - matches := findSetMatches(c.pattern) + matches := FindSetMatches(c.pattern) require.Equal(t, c.exp, matches) } } diff --git a/pkg/ingester/index/index.go b/pkg/ingester/index/index.go index 7c1c46c5708..001dd1a817a 100644 --- a/pkg/ingester/index/index.go +++ b/pkg/ingester/index/index.go @@ -8,6 +8,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/pkg/labels" + "github.com/cortexproject/cortex/pkg/chunk" "github.com/cortexproject/cortex/pkg/ingester/client" "github.com/cortexproject/cortex/pkg/util" ) @@ -168,6 +169,13 @@ func (shard *indexShard) lookup(matchers []*labels.Matcher) []model.Fingerprint if matcher.Type == labels.MatchEqual { fps := values.fps[matcher.Value] toIntersect = append(toIntersect, fps.fps...) // deliberate copy + } else if matcher.Type == labels.MatchRegexp && len(chunk.FindSetMatches(matcher.Value)) > 0 { + // The lookup is of the form `=~"a|b|c|d"` + set := chunk.FindSetMatches(matcher.Value) + for _, value := range set { + toIntersect = append(toIntersect, values.fps[value].fps...) + } + sort.Sort(toIntersect) } else { // accumulate the matching fingerprints (which are all distinct) // then sort to maintain the invariant diff --git a/pkg/ingester/index/index_test.go b/pkg/ingester/index/index_test.go index 9b3351c55f3..d09928ab10e 100644 --- a/pkg/ingester/index/index_test.go +++ b/pkg/ingester/index/index_test.go @@ -42,6 +42,18 @@ func TestIndex(t *testing.T) { {mustParseMatcher(`{foo="bar", flip="flap"}`), []model.Fingerprint{2}}, {mustParseMatcher(`{foo="baz", flip="flop"}`), []model.Fingerprint{1}}, {mustParseMatcher(`{foo="baz", flip="flap"}`), []model.Fingerprint{0}}, + + {mustParseMatcher(`{fizz=~"b.*"}`), []model.Fingerprint{}}, + + {mustParseMatcher(`{foo=~"bar.*"}`), []model.Fingerprint{2, 3}}, + {mustParseMatcher(`{foo=~"ba.*"}`), []model.Fingerprint{0, 1, 2, 3}}, + {mustParseMatcher(`{flip=~"flop|flap"}`), []model.Fingerprint{0, 1, 2, 3}}, + {mustParseMatcher(`{flip=~"flaps"}`), []model.Fingerprint{}}, + + {mustParseMatcher(`{foo=~"bar|bax", flip="flop"}`), []model.Fingerprint{3}}, + {mustParseMatcher(`{foo=~"bar|baz", flip="flap"}`), []model.Fingerprint{0, 2}}, + {mustParseMatcher(`{foo=~"baz.+", flip="flop"}`), []model.Fingerprint{}}, + {mustParseMatcher(`{foo=~"baz", flip="flap"}`), []model.Fingerprint{0}}, } { assert.Equal(t, tc.fps, index.Lookup(tc.matchers)) } From 59385d5b980bcf6b3c368f3984d5e1111622f05d Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Fri, 17 Apr 2020 08:38:29 +0100 Subject: [PATCH 2/3] Add CHANGELOG entry Signed-off-by: Goutham Veeramachaneni --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a095b7f75c6..34a734390ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,7 @@ * [ENHANCEMENT] FIFO cache to support eviction based on memory usage. The `-.fifocache.size` CLI flag has been renamed to `-.fifocache.max-size-items` as well as its YAML config option `size` renamed to `max_size_items`. Added `-.fifocache.max-size-bytes` CLI flag and YAML config option `max_size_bytes` to specify memory limit of the cache. #2319 * [ENHANCEMENT] Single Binary: Added query-frontend to the single binary. Single binary users will now benefit from various query-frontend features. Primarily: sharding, parallelization, load shedding, additional caching (if configured), and query retries. #2437 * [ENHANCEMENT] Allow 1w (where w denotes week) and 1y (where y denotes year) when setting `-store.cache-lookups-older-than` and `-store.max-look-back-period`. #2454 -* [ENHANCEMENT] Optimize index queries for matchers using "a|b|c"-type regex. #2446 +* [ENHANCEMENT] Optimize index queries for matchers using "a|b|c"-type regex. #2446 #2475 * [BUGFIX] Fixes #2411, Ensure requests are properly routed to the prometheus api embedded in the query if `-server.path-prefix` is set. #2372 * [BUGFIX] Experimental TSDB: fixed chunk data corruption when querying back series using the experimental blocks storage. #2400 * [BUGFIX] Cassandra Storage: Fix endpoint TLS host verification. #2109 From b2023dac0c1e3b83688f5294a1bece0f1bb1bcf4 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Fri, 17 Apr 2020 10:04:07 +0100 Subject: [PATCH 3/3] Add benchmark for set optimisation Signed-off-by: Goutham Veeramachaneni --- pkg/ingester/index/index_test.go | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/pkg/ingester/index/index_test.go b/pkg/ingester/index/index_test.go index d09928ab10e..0ac06d5e090 100644 --- a/pkg/ingester/index/index_test.go +++ b/pkg/ingester/index/index_test.go @@ -1,6 +1,9 @@ package index import ( + "fmt" + "strconv" + "strings" "testing" "github.com/prometheus/common/model" @@ -63,6 +66,67 @@ func TestIndex(t *testing.T) { assert.Equal(t, []string{"flap", "flop"}, index.LabelValues("flip")) } +func BenchmarkSetRegexLookup(b *testing.B) { + // Prepare the benchmark. + seriesLabels := []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"} + seriesPerLabel := 100000 + + idx := New() + for _, l := range seriesLabels { + for i := 0; i < seriesPerLabel; i++ { + lbls := labels.FromStrings("foo", l, "bar", strconv.Itoa(i)) + idx.Add(client.FromLabelsToLabelAdapters(lbls), model.Fingerprint(lbls.Hash())) + } + } + + selectionLabels := []string{} + for i := 0; i < 100; i++ { + selectionLabels = append(selectionLabels, strconv.Itoa(i)) + } + + tests := []struct { + name string + matcher string + }{ + { + name: "select all", + matcher: fmt.Sprintf(`{bar=~"%s"}`, strings.Join(selectionLabels, "|")), + }, + { + name: "select two", + matcher: fmt.Sprintf(`{bar=~"%s"}`, strings.Join(selectionLabels[:2], "|")), + }, + { + name: "select half", + matcher: fmt.Sprintf(`{bar=~"%s"}`, strings.Join(selectionLabels[:len(selectionLabels)/2], "|")), + }, + { + name: "select none", + matcher: `{bar=~"bleep|bloop"}`, + }, + { + name: "equality matcher", + matcher: `{bar="1"}`, + }, + { + name: "regex (non-set) matcher", + matcher: `{bar=~"1.*"}`, + }, + } + + b.ResetTimer() + + for _, tc := range tests { + b.Run(fmt.Sprintf("%s:%s", tc.name, tc.matcher), func(b *testing.B) { + matcher := mustParseMatcher(tc.matcher) + for n := 0; n < b.N; n++ { + idx.Lookup(matcher) + } + }) + } + +} + func mustParseMatcher(s string) []*labels.Matcher { ms, err := promql.ParseMetricSelector(s) if err != nil {