diff --git a/CHANGELOG.md b/CHANGELOG.md index 7da08bf6ead..b52f1e86958 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ * [ENHANCEMENT] Ingester: If a limit per label set entry doesn't have any label, use it as the default partition to catch all series that doesn't match any other label sets entries. #6435 * [ENHANCEMENT] Querier: Add new `cortex_querier_codec_response_size` metric to track the size of the encoded query responses from queriers. #6444 * [ENHANCEMENT] Distributor: Added `cortex_distributor_received_samples_per_labelset_total` metric to calculate ingestion rate per label set. #6443 +* [ENHANCEMENT] StoreGateway: Added `cortex_bucket_store_indexheader_load_duration_seconds` and `cortex_bucket_store_indexheader_download_duration_seconds` metrics for time of downloading and loading index header files. #6445 * [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224 * [BUGFIX] Ruler: Allow rule evaluation to complete during shutdown. #6326 * [BUGFIX] Ring: update ring with new ip address when instance is lost, rejoins, but heartbeat is disabled. #6271 diff --git a/pkg/storegateway/bucket_store_metrics.go b/pkg/storegateway/bucket_store_metrics.go index 8319e6a70f8..4938e73b411 100644 --- a/pkg/storegateway/bucket_store_metrics.go +++ b/pkg/storegateway/bucket_store_metrics.go @@ -55,6 +55,8 @@ type BucketStoreMetrics struct { indexHeaderLazyUnloadCount *prometheus.Desc indexHeaderLazyUnloadFailedCount *prometheus.Desc indexHeaderLazyLoadDuration *prometheus.Desc + indexHeaderDownloadDuration *prometheus.Desc + indexHeaderLoadDuration *prometheus.Desc } func NewBucketStoreMetrics() *BucketStoreMetrics { @@ -205,6 +207,14 @@ func NewBucketStoreMetrics() *BucketStoreMetrics { "cortex_bucket_store_indexheader_lazy_load_duration_seconds", "Duration of the index-header lazy loading in seconds.", nil, nil), + indexHeaderDownloadDuration: prometheus.NewDesc( + "cortex_bucket_store_indexheader_download_duration_seconds", + "Duration of the index-header download from objstore in seconds.", + nil, nil), + indexHeaderLoadDuration: prometheus.NewDesc( + "cortex_bucket_store_indexheader_load_duration_seconds", + "Duration of the index-header loading in seconds.", + nil, nil), lazyExpandedPostingsCount: prometheus.NewDesc( "cortex_bucket_store_lazy_expanded_postings_total", @@ -272,6 +282,8 @@ func (m *BucketStoreMetrics) Describe(out chan<- *prometheus.Desc) { out <- m.indexHeaderLazyUnloadCount out <- m.indexHeaderLazyUnloadFailedCount out <- m.indexHeaderLazyLoadDuration + out <- m.indexHeaderDownloadDuration + out <- m.indexHeaderLoadDuration out <- m.lazyExpandedPostingsCount out <- m.lazyExpandedPostingGroups @@ -323,6 +335,8 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) { data.SendSumOfCounters(out, m.indexHeaderLazyUnloadCount, "thanos_bucket_store_indexheader_lazy_unload_total") data.SendSumOfCounters(out, m.indexHeaderLazyUnloadFailedCount, "thanos_bucket_store_indexheader_lazy_unload_failed_total") data.SendSumOfHistograms(out, m.indexHeaderLazyLoadDuration, "thanos_bucket_store_indexheader_lazy_load_duration_seconds") + data.SendSumOfHistograms(out, m.indexHeaderDownloadDuration, "thanos_bucket_store_indexheader_download_duration_seconds") + data.SendSumOfHistograms(out, m.indexHeaderLoadDuration, "thanos_bucket_store_indexheader_load_duration_seconds") data.SendSumOfCounters(out, m.lazyExpandedPostingsCount, "thanos_bucket_store_lazy_expanded_postings_total") data.SendSumOfCountersWithLabels(out, m.lazyExpandedPostingGroups, "thanos_bucket_store_lazy_expanded_posting_groups_total", "reason") diff --git a/pkg/storegateway/bucket_store_metrics_test.go b/pkg/storegateway/bucket_store_metrics_test.go index 079f2017f07..ac4ff00df87 100644 --- a/pkg/storegateway/bucket_store_metrics_test.go +++ b/pkg/storegateway/bucket_store_metrics_test.go @@ -493,6 +493,22 @@ func TestBucketStoreMetrics(t *testing.T) { # HELP cortex_bucket_store_empty_postings_total Total number of empty postings when fetching block series. # TYPE cortex_bucket_store_empty_postings_total counter cortex_bucket_store_empty_postings_total 112595 + + # HELP cortex_bucket_store_indexheader_download_duration_seconds Duration of the index-header download from objstore in seconds. + # TYPE cortex_bucket_store_indexheader_download_duration_seconds histogram + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.01"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.02"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.05"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.1"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.2"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.5"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="1"} 3 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="2"} 3 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="5"} 3 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="+Inf"} 3 + cortex_bucket_store_indexheader_download_duration_seconds_sum 2.25 + cortex_bucket_store_indexheader_download_duration_seconds_count 3 + # HELP cortex_bucket_store_postings_fetch_duration_seconds Time it takes to fetch postings to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses. # TYPE cortex_bucket_store_postings_fetch_duration_seconds histogram cortex_bucket_store_postings_fetch_duration_seconds_bucket{le="0.001"} 0 @@ -543,6 +559,22 @@ func TestBucketStoreMetrics(t *testing.T) { # HELP cortex_bucket_store_indexheader_lazy_unload_total Total number of index-header lazy unload operations. # TYPE cortex_bucket_store_indexheader_lazy_unload_total counter cortex_bucket_store_indexheader_lazy_unload_total 1.396178e+06 + + # HELP cortex_bucket_store_indexheader_load_duration_seconds Duration of the index-header loading in seconds. + # TYPE cortex_bucket_store_indexheader_load_duration_seconds histogram + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.01"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.02"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.05"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.1"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.2"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.5"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="1"} 3 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="2"} 3 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="5"} 3 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="+Inf"} 3 + cortex_bucket_store_indexheader_load_duration_seconds_sum 2.55 + cortex_bucket_store_indexheader_load_duration_seconds_count 3 + # HELP cortex_bucket_store_lazy_expanded_posting_groups_total Total number of posting groups that are marked as lazy and corresponding reason. # TYPE cortex_bucket_store_lazy_expanded_posting_groups_total counter cortex_bucket_store_lazy_expanded_posting_groups_total{reason="keys_limit"} 202671 @@ -685,6 +717,8 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry { m.indexHeaderLazyUnloadCount.Add(62 * base) m.indexHeaderLazyUnloadFailedCount.Add(63 * base) m.indexHeaderLazyLoadDuration.Observe(0.65) + m.indexHeaderDownloadDuration.Observe(0.75) + m.indexHeaderLoadDuration.Observe(0.85) m.emptyPostingCount.Add(5 * base) @@ -737,6 +771,8 @@ type mockedBucketStoreMetrics struct { indexHeaderLazyUnloadCount prometheus.Counter indexHeaderLazyUnloadFailedCount prometheus.Counter indexHeaderLazyLoadDuration prometheus.Histogram + indexHeaderDownloadDuration prometheus.Histogram + indexHeaderLoadDuration prometheus.Histogram lazyExpandedPostingsCount prometheus.Counter lazyExpandedPostingGroups *prometheus.CounterVec @@ -913,6 +949,16 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe Help: "Duration of the index-header lazy loading in seconds.", Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5}, }) + m.indexHeaderDownloadDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_indexheader_download_duration_seconds", + Help: "Duration of the index-header download from objstore in seconds.", + Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5}, + }) + m.indexHeaderLoadDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_indexheader_load_duration_seconds", + Help: "Duration of the index-header loading in seconds.", + Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5}, + }) m.emptyPostingCount = promauto.With(reg).NewCounter(prometheus.CounterOpts{ Name: "thanos_bucket_store_empty_postings_total",