Skip to content

Commit 0b21bdc

Browse files
harry671003alexqyle
authored andcommitted
StoreGateway: Allow filtering out recent blocks during sync (cortexproject#5166)
* Allow filtering out recent blocks during sync Signed-off-by: 🌲 Harry 🌊 John 🏔 <[email protected]> * Fix build failures and comments Signed-off-by: 🌲 Harry 🌊 John 🏔 <[email protected]> --------- Signed-off-by: 🌲 Harry 🌊 John 🏔 <[email protected]> Signed-off-by: Alex Le <[email protected]>
1 parent 8d6adfa commit 0b21bdc

13 files changed

+162
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
* [FEATURE] Added zstd as an option for grpc compression #5092
3030
* [FEATURE] Ring: Add new kv store option `dynamodb`. #5026
3131
* [FEATURE] Cache: Support redis as backend for caching bucket and index cache. #5057
32+
* [FEATURE] Querier/Store-Gateway: Added `-blocks-storage.bucket-store.ignore-blocks-within` allowing to filter out the recently created blocks from being synced by queriers and store-gateways. #5166
3233
* [BUGFIX] Updated `golang.org/x/net` dependency to fix CVE-2022-27664. #5008
3334
* [BUGFIX] Fix panic when otel and xray tracing is enabled. #5044
3435

docs/blocks-storage/querier.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,6 +1084,15 @@ blocks_storage:
10841084
# CLI flag: -blocks-storage.bucket-store.ignore-deletion-marks-delay
10851085
[ignore_deletion_mark_delay: <duration> | default = 6h]
10861086
1087+
# The blocks created since `now() - ignore_blocks_within` will not be
1088+
# synced. This should be used together with `-querier.query-store-after` to
1089+
# filter out the blocks that are too new to be queried. A reasonable value
1090+
# for this flag would be `-querier.query-store-after -
1091+
# blocks-storage.bucket-store.bucket-index.max-stale-period` to give some
1092+
# buffer. 0 to disable.
1093+
# CLI flag: -blocks-storage.bucket-store.ignore-blocks-within
1094+
[ignore_blocks_within: <duration> | default = 0s]
1095+
10871096
bucket_index:
10881097
# True to enable querier and store-gateway to discover blocks in the
10891098
# storage via bucket index instead of bucket scanning.

docs/blocks-storage/store-gateway.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,15 @@ blocks_storage:
11611161
# CLI flag: -blocks-storage.bucket-store.ignore-deletion-marks-delay
11621162
[ignore_deletion_mark_delay: <duration> | default = 6h]
11631163
1164+
# The blocks created since `now() - ignore_blocks_within` will not be
1165+
# synced. This should be used together with `-querier.query-store-after` to
1166+
# filter out the blocks that are too new to be queried. A reasonable value
1167+
# for this flag would be `-querier.query-store-after -
1168+
# blocks-storage.bucket-store.bucket-index.max-stale-period` to give some
1169+
# buffer. 0 to disable.
1170+
# CLI flag: -blocks-storage.bucket-store.ignore-blocks-within
1171+
[ignore_blocks_within: <duration> | default = 0s]
1172+
11641173
bucket_index:
11651174
# True to enable querier and store-gateway to discover blocks in the
11661175
# storage via bucket index instead of bucket scanning.

docs/configuration/config-file-reference.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4024,6 +4024,15 @@ bucket_store:
40244024
# CLI flag: -blocks-storage.bucket-store.ignore-deletion-marks-delay
40254025
[ignore_deletion_mark_delay: <duration> | default = 6h]
40264026
4027+
# The blocks created since `now() - ignore_blocks_within` will not be synced.
4028+
# This should be used together with `-querier.query-store-after` to filter out
4029+
# the blocks that are too new to be queried. A reasonable value for this flag
4030+
# would be `-querier.query-store-after -
4031+
# blocks-storage.bucket-store.bucket-index.max-stale-period` to give some
4032+
# buffer. 0 to disable.
4033+
# CLI flag: -blocks-storage.bucket-store.ignore-blocks-within
4034+
[ignore_blocks_within: <duration> | default = 0s]
4035+
40274036
bucket_index:
40284037
# True to enable querier and store-gateway to discover blocks in the storage
40294038
# via bucket index instead of bucket scanning.

pkg/querier/blocks_finder_bucket_index.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ type BucketIndexBlocksFinderConfig struct {
2424
IndexLoader bucketindex.LoaderConfig
2525
MaxStalePeriod time.Duration
2626
IgnoreDeletionMarksDelay time.Duration
27+
IgnoreBlocksWithin time.Duration
2728
}
2829

2930
// BucketIndexBlocksFinder implements BlocksFinder interface and find blocks in the bucket

pkg/querier/blocks_finder_bucket_index_test.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,19 @@ func TestBucketIndexBlocksFinder_GetBlocks(t *testing.T) {
2525
bkt, _ := cortex_testutil.PrepareFilesystemBucket(t)
2626

2727
// Mock a bucket index.
28+
now := time.Now()
2829
block1 := &bucketindex.Block{ID: ulid.MustNew(1, nil), MinTime: 10, MaxTime: 15}
2930
block2 := &bucketindex.Block{ID: ulid.MustNew(2, nil), MinTime: 12, MaxTime: 20}
3031
block3 := &bucketindex.Block{ID: ulid.MustNew(3, nil), MinTime: 20, MaxTime: 30}
3132
block4 := &bucketindex.Block{ID: ulid.MustNew(4, nil), MinTime: 30, MaxTime: 40}
32-
block5 := &bucketindex.Block{ID: ulid.MustNew(5, nil), MinTime: 30, MaxTime: 40} // Time range overlaps with block4, but this block deletion mark is above the threshold.
33+
block5 := &bucketindex.Block{ID: ulid.MustNew(5, nil), MinTime: 30, MaxTime: 40} // Time range overlaps with block4, but this block deletion mark is above the threshold.
34+
block6 := &bucketindex.Block{ID: ulid.MustNew(6, nil), MinTime: now.Add(-2 * time.Hour).UnixMilli(), MaxTime: now.UnixMilli()} // This block is within ignoreBlocksWithin and shouldn't be loaded.
3335
mark3 := &bucketindex.BlockDeletionMark{ID: block3.ID, DeletionTime: time.Now().Unix()}
3436
mark5 := &bucketindex.BlockDeletionMark{ID: block5.ID, DeletionTime: time.Now().Add(-2 * time.Hour).Unix()}
3537

3638
require.NoError(t, bucketindex.WriteIndex(ctx, bkt, userID, nil, &bucketindex.Index{
3739
Version: bucketindex.IndexVersion1,
38-
Blocks: bucketindex.Blocks{block1, block2, block3, block4, block5},
40+
Blocks: bucketindex.Blocks{block1, block2, block3, block4, block5, block6},
3941
BlockDeletionMarks: bucketindex.BlockDeletionMarks{mark3, mark5},
4042
UpdatedAt: time.Now().Unix(),
4143
}))
@@ -102,6 +104,14 @@ func TestBucketIndexBlocksFinder_GetBlocks(t *testing.T) {
102104
block3.ID: mark3,
103105
},
104106
},
107+
"query range matching all blocks but should ignore non-queryable block": {
108+
minT: 0,
109+
maxT: block5.MaxTime,
110+
expectedBlocks: bucketindex.Blocks{block4, block3, block2, block1},
111+
expectedMarks: map[ulid.ULID]*bucketindex.BlockDeletionMark{
112+
block3.ID: mark3,
113+
},
114+
},
105115
}
106116

107117
for testName, testData := range tests {
@@ -209,6 +219,7 @@ func prepareBucketIndexBlocksFinder(t testing.TB, bkt objstore.Bucket) *BucketIn
209219
},
210220
MaxStalePeriod: time.Hour,
211221
IgnoreDeletionMarksDelay: time.Hour,
222+
IgnoreBlocksWithin: 10 * time.Hour,
212223
}
213224

214225
finder := NewBucketIndexBlocksFinder(cfg, bkt, nil, log.NewNopLogger(), nil)

pkg/querier/blocks_finder_bucket_scan.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ type BucketScanBlocksFinderConfig struct {
4242
CacheDir string
4343
ConsistencyDelay time.Duration
4444
IgnoreDeletionMarksDelay time.Duration
45+
IgnoreBlocksWithin time.Duration
4546
}
4647

4748
// BucketScanBlocksFinder is a BlocksFinder implementation periodically scanning the bucket to discover blocks.
@@ -378,6 +379,11 @@ func (d *BucketScanBlocksFinder) createMetaFetcher(userID string) (block.Metadat
378379
deletionMarkFilter := block.NewIgnoreDeletionMarkFilter(userLogger, userBucket, d.cfg.IgnoreDeletionMarksDelay, d.cfg.MetasConcurrency)
379380
filters := []block.MetadataFilter{deletionMarkFilter}
380381

382+
// Here we filter out the blocks that are too new to query.
383+
if d.cfg.IgnoreBlocksWithin > 0 {
384+
filters = append(filters, storegateway.NewIgnoreNonQueryableBlocksFilter(d.logger, d.cfg.IgnoreBlocksWithin))
385+
}
386+
381387
f, err := block.NewMetaFetcher(
382388
userLogger,
383389
d.cfg.MetasConcurrency,

pkg/querier/blocks_finder_bucket_scan_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,10 +383,12 @@ func TestBucketScanBlocksFinder_GetBlocks(t *testing.T) {
383383
ctx := context.Background()
384384
s, bucket, _, _ := prepareBucketScanBlocksFinder(t, prepareBucketScanBlocksFinderConfig())
385385

386+
now := time.Now()
386387
block1 := cortex_testutil.MockStorageBlock(t, bucket, "user-1", 10, 15)
387388
block2 := cortex_testutil.MockStorageBlock(t, bucket, "user-1", 12, 20)
388389
block3 := cortex_testutil.MockStorageBlock(t, bucket, "user-1", 20, 30)
389390
block4 := cortex_testutil.MockStorageBlock(t, bucket, "user-1", 30, 40)
391+
block5 := cortex_testutil.MockStorageBlock(t, bucket, "user-1", now.Add(-2*time.Hour).UnixMilli(), now.UnixMilli()) // This block is within ignoreBlocksWithin
390392
mark3 := bucketindex.BlockDeletionMarkFromThanosMarker(cortex_testutil.MockStorageDeletionMark(t, bucket, "user-1", block3))
391393

392394
require.NoError(t, services.StartAndAwaitRunning(ctx, s))
@@ -451,6 +453,14 @@ func TestBucketScanBlocksFinder_GetBlocks(t *testing.T) {
451453
block3.ULID: mark3,
452454
},
453455
},
456+
"query range matching all blocks but should ignore non-queryable block": {
457+
minT: 0,
458+
maxT: block5.MaxTime,
459+
expectedMetas: []tsdb.BlockMeta{block4, block3, block2, block1},
460+
expectedMarks: map[ulid.ULID]*bucketindex.BlockDeletionMark{
461+
block3.ULID: mark3,
462+
},
463+
},
454464
}
455465

456466
for testName, testData := range tests {
@@ -488,5 +498,6 @@ func prepareBucketScanBlocksFinderConfig() BucketScanBlocksFinderConfig {
488498
TenantsConcurrency: 10,
489499
MetasConcurrency: 10,
490500
IgnoreDeletionMarksDelay: time.Hour,
501+
IgnoreBlocksWithin: 10 * time.Hour, // All blocks created in the last 10 hour shoudn't be scanned.
491502
}
492503
}

pkg/querier/blocks_store_queryable.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegatewa
198198
},
199199
MaxStalePeriod: storageCfg.BucketStore.BucketIndex.MaxStalePeriod,
200200
IgnoreDeletionMarksDelay: storageCfg.BucketStore.IgnoreDeletionMarksDelay,
201+
IgnoreBlocksWithin: storageCfg.BucketStore.IgnoreBlocksWithin,
201202
}, bucketClient, limits, logger, reg)
202203
} else {
203204
finder = NewBucketScanBlocksFinder(BucketScanBlocksFinderConfig{
@@ -206,6 +207,7 @@ func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegatewa
206207
MetasConcurrency: storageCfg.BucketStore.MetaSyncConcurrency,
207208
CacheDir: storageCfg.BucketStore.SyncDir,
208209
IgnoreDeletionMarksDelay: storageCfg.BucketStore.IgnoreDeletionMarksDelay,
210+
IgnoreBlocksWithin: storageCfg.BucketStore.IgnoreBlocksWithin,
209211
}, bucketClient, limits, logger, reg)
210212
}
211213

pkg/storage/tsdb/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ type BucketStoreConfig struct {
239239
ChunksCache ChunksCacheConfig `yaml:"chunks_cache"`
240240
MetadataCache MetadataCacheConfig `yaml:"metadata_cache"`
241241
IgnoreDeletionMarksDelay time.Duration `yaml:"ignore_deletion_mark_delay"`
242+
IgnoreBlocksWithin time.Duration `yaml:"ignore_blocks_within"`
242243
BucketIndex BucketIndexConfig `yaml:"bucket_index"`
243244

244245
// Chunk pool.
@@ -282,6 +283,7 @@ func (cfg *BucketStoreConfig) RegisterFlags(f *flag.FlagSet) {
282283
f.DurationVar(&cfg.IgnoreDeletionMarksDelay, "blocks-storage.bucket-store.ignore-deletion-marks-delay", time.Hour*6, "Duration after which the blocks marked for deletion will be filtered out while fetching blocks. "+
283284
"The idea of ignore-deletion-marks-delay is to ignore blocks that are marked for deletion with some delay. This ensures store can still serve blocks that are meant to be deleted but do not have a replacement yet. "+
284285
"Default is 6h, half of the default value for -compactor.deletion-delay.")
286+
f.DurationVar(&cfg.IgnoreBlocksWithin, "blocks-storage.bucket-store.ignore-blocks-within", 0, "The blocks created since `now() - ignore_blocks_within` will not be synced. This should be used together with `-querier.query-store-after` to filter out the blocks that are too new to be queried. A reasonable value for this flag would be `-querier.query-store-after - blocks-storage.bucket-store.bucket-index.max-stale-period` to give some buffer. 0 to disable.")
285287
f.IntVar(&cfg.PostingOffsetsInMemSampling, "blocks-storage.bucket-store.posting-offsets-in-mem-sampling", store.DefaultPostingOffsetInMemorySampling, "Controls what is the ratio of postings offsets that the store will hold in memory.")
286288
f.BoolVar(&cfg.IndexHeaderLazyLoadingEnabled, "blocks-storage.bucket-store.index-header-lazy-loading-enabled", false, "If enabled, store-gateway will lazily memory-map an index-header only once required by a query.")
287289
f.DurationVar(&cfg.IndexHeaderLazyLoadingIdleTimeout, "blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout", 20*time.Minute, "If index-header lazy loading is enabled and this setting is > 0, the store-gateway will release memory-mapped index-headers after 'idle timeout' inactivity.")

0 commit comments

Comments
 (0)