Skip to content

Commit 60282da

Browse files
authored
Merge pull request #743 from Altinity/list_objects_cache
Antalya: Cache the list objects operation on object storage using a TTL + prefix matching cache implementation
2 parents 4ffaf6c + 96cf2d2 commit 60282da

21 files changed

+773
-21
lines changed

programs/server/Server.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
#include <Storages/System/attachInformationSchemaTables.h>
8484
#include <Storages/Cache/ExternalDataSourceCache.h>
8585
#include <Storages/Cache/registerRemoteFileMetadatas.h>
86+
#include <Storages/Cache/ObjectStorageListObjectsCache.h>
8687
#include <AggregateFunctions/registerAggregateFunctions.h>
8788
#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
8889
#include <Functions/registerFunctions.h>
@@ -318,6 +319,9 @@ namespace ServerSetting
318319
extern const ServerSettingsUInt64 max_prefixes_deserialization_thread_pool_free_size;
319320
extern const ServerSettingsUInt64 prefixes_deserialization_thread_pool_thread_pool_queue_size;
320321
extern const ServerSettingsUInt64 input_format_parquet_metadata_cache_max_size;
322+
extern const ServerSettingsUInt64 object_storage_list_objects_cache_size;
323+
extern const ServerSettingsUInt64 object_storage_list_objects_cache_max_entries;
324+
extern const ServerSettingsUInt64 object_storage_list_objects_cache_ttl;
321325
}
322326

323327
}
@@ -2327,6 +2331,10 @@ try
23272331
if (dns_cache_updater)
23282332
dns_cache_updater->start();
23292333

2334+
ObjectStorageListObjectsCache::instance().setMaxSizeInBytes(server_settings[ServerSetting::object_storage_list_objects_cache_size]);
2335+
ObjectStorageListObjectsCache::instance().setMaxCount(server_settings[ServerSetting::object_storage_list_objects_cache_max_entries]);
2336+
ObjectStorageListObjectsCache::instance().setTTL(server_settings[ServerSetting::object_storage_list_objects_cache_ttl]);
2337+
23302338
auto replicas_reconnector = ReplicasReconnector::init(global_context);
23312339
ParquetFileMetaDataCache::instance()->setMaxSizeInBytes(server_settings[ServerSetting::input_format_parquet_metadata_cache_max_size]);
23322340

src/Access/Common/AccessType.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,8 @@ enum class AccessType : uint8_t
182182
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
183183
M(SYSTEM_DROP_FORMAT_SCHEMA_CACHE, "SYSTEM DROP FORMAT SCHEMA CACHE, DROP FORMAT SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
184184
M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
185-
M(SYSTEM_DROP_PARQUET_METADATA_CACHE, "SYSTEM DROP PARQUET METADATA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
185+
M(SYSTEM_DROP_PARQUET_METADATA_CACHE, "SYSTEM DROP PARQUET METADATA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
186+
M(SYSTEM_DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE, "SYSTEM DROP OBJECT STORAGE LIST OBJECTS CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
186187
M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \
187188
M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \
188189
M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \

src/Common/ProfileEvents.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -963,7 +963,11 @@ The server successfully detected this situation and will download merged part fr
963963
M(ParquetFetchWaitTimeMicroseconds, "Time of waiting fetching parquet data", ValueType::Microseconds) \
964964
\
965965
M(ParquetMetaDataCacheHits, "Number of times the read from filesystem cache hit the cache.", ValueType::Number) \
966-
M(ParquetMetaDataCacheMisses, "Number of times the read from filesystem cache miss the cache.", ValueType::Number) \
966+
M(ParquetMetaDataCacheMisses, "Number of times the read from filesystem cache miss the cache.", ValueType::Number) \
967+
M(ObjectStorageListObjectsCacheHits, "Number of times object storage list objects operation hit the cache.", ValueType::Number) \
968+
M(ObjectStorageListObjectsCacheMisses, "Number of times object storage list objects operation miss the cache.", ValueType::Number) \
969+
M(ObjectStorageListObjectsCacheExactMatchHits, "Number of times object storage list objects operation hit the cache with an exact match.", ValueType::Number) \
970+
M(ObjectStorageListObjectsCachePrefixMatchHits, "Number of times object storage list objects operation miss the cache using prefix matching.", ValueType::Number) \
967971

968972
#ifdef APPLY_FOR_EXTERNAL_EVENTS
969973
#define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)

src/Common/TTLCachePolicy.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,10 +243,10 @@ class TTLCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunc
243243
return res;
244244
}
245245

246-
private:
246+
protected:
247247
using Cache = std::unordered_map<Key, MappedPtr, HashFunction>;
248248
Cache cache;
249-
249+
private:
250250
/// TODO To speed up removal of stale entries, we could also add another container sorted on expiry times which maps keys to iterators
251251
/// into the cache. To insert an entry, add it to the cache + add the iterator to the sorted container. To remove stale entries, do a
252252
/// binary search on the sorted container and erase all left of the found key.

src/Core/ServerSettings.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,8 +1004,10 @@ namespace DB
10041004
```
10051005
)", 0) \
10061006
DECLARE(Bool, storage_shared_set_join_use_inner_uuid, false, "If enabled, an inner UUID is generated during the creation of SharedSet and SharedJoin. ClickHouse Cloud only", 0) \
1007-
DECLARE(UInt64, input_format_parquet_metadata_cache_max_size, 500000000, "Maximum size of parquet file metadata cache", 0) \
1008-
1007+
DECLARE(UInt64, input_format_parquet_metadata_cache_max_size, 500000000, "Maximum size of parquet file metadata cache", 0) \
1008+
DECLARE(UInt64, object_storage_list_objects_cache_size, 500000000, "Maximum size of ObjectStorage list objects cache in bytes. Zero means disabled.", 0) \
1009+
DECLARE(UInt64, object_storage_list_objects_cache_max_entries, 1000, "Maximum size of ObjectStorage list objects cache in entries. Zero means disabled.", 0) \
1010+
DECLARE(UInt64, object_storage_list_objects_cache_ttl, 3600, "Time to live of records in ObjectStorage list objects cache in seconds. Zero means unlimited", 0) \
10091011
// clang-format on
10101012

10111013
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in dumpToSystemServerSettingsColumns below

src/Core/Settings.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6108,6 +6108,9 @@ Limit for hosts used for request in object storage cluster table functions - azu
61086108
Possible values:
61096109
- Positive integer.
61106110
- 0 — All hosts in cluster.
6111+
)", EXPERIMENTAL) \
6112+
DECLARE(Bool, use_object_storage_list_objects_cache, false, R"(
6113+
Cache the list of objects returned by list objects calls in object storage
61116114
)", EXPERIMENTAL) \
61126115
\
61136116
/* ####################################################### */ \

src/Core/SettingsChangesHistory.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
7272
{"use_iceberg_metadata_files_cache", true, true, "New setting"},
7373
{"iceberg_timestamp_ms", 0, 0, "New setting."},
7474
{"iceberg_snapshot_id", 0, 0, "New setting."},
75+
{"use_object_storage_list_objects_cache", true, false, "New setting."},
7576
});
7677
addSettingsChanges(settings_changes_history, "24.12.2.20000",
7778
{

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ class AzureObjectStorage : public IObjectStorage
3131
const String & object_namespace_,
3232
const String & description_);
3333

34+
bool supportsListObjectsCache() override { return true; }
35+
3436
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
3537

3638
ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;

src/Disks/ObjectStorages/IObjectStorage.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,8 @@ class IObjectStorage
276276
#endif
277277

278278

279+
virtual bool supportsListObjectsCache() { return false; }
280+
279281
private:
280282
mutable std::mutex throttlers_mutex;
281283
ThrottlerPtr remote_read_throttler;

src/Disks/ObjectStorages/S3/S3ObjectStorage.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ class S3ObjectStorage : public IObjectStorage
8181

8282
ObjectStorageType getType() const override { return ObjectStorageType::S3; }
8383

84+
bool supportsListObjectsCache() override { return true; }
85+
8486
bool exists(const StoredObject & object) const override;
8587

8688
std::unique_ptr<ReadBufferFromFileBase> readObject( /// NOLINT

0 commit comments

Comments
 (0)