Skip to content

Commit 5e3869e

Browse files
Jiayue Baofacebook-github-bot
authored andcommitted
Move size distribution tracking logic out of size-controller
Summary: Use a separate worker to track and upload size distribution stats. Reviewed By: jaesoo-fb Differential Revision: D47028003 fbshipit-source-id: 931f17a09d79365a02771ac18dde165a56bd46e8
1 parent f75546b commit 5e3869e

File tree

5 files changed

+164
-66
lines changed

5 files changed

+164
-66
lines changed

cachelib/experimental/objcache2/ObjectCache-inl.h

Lines changed: 41 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,22 @@ void ObjectCache<AllocatorT>::init() {
103103
}
104104
}
105105

106+
initWorkers();
107+
}
108+
109+
template <typename AllocatorT>
110+
void ObjectCache<AllocatorT>::initWorkers() {
106111
if (config_.objectSizeTrackingEnabled &&
107112
config_.sizeControllerIntervalMs != 0) {
108-
startSizeController(
109-
std::chrono::milliseconds{config_.sizeControllerIntervalMs},
110-
config_.sizeControllerThrottlerConfig);
113+
startWorker(kSizeControllerName, sizeController_,
114+
std::chrono::milliseconds{config_.sizeControllerIntervalMs},
115+
config_.sizeControllerThrottlerConfig);
116+
}
117+
118+
if (config_.objectSizeTrackingEnabled &&
119+
config_.objectSizeDistributionTrackingEnabled) {
120+
startWorker(kSizeDistTrackerName, sizeDistTracker_,
121+
std::chrono::seconds{60} /*default interval to be 60s*/);
111122
}
112123
}
113124

@@ -304,7 +315,7 @@ uint32_t ObjectCache<AllocatorT>::getL1AllocSize(uint8_t maxKeySizeBytes) {
304315

305316
template <typename AllocatorT>
306317
ObjectCache<AllocatorT>::~ObjectCache() {
307-
stopSizeController();
318+
stopAllWorkers();
308319

309320
for (auto itr = this->l1Cache_->begin(); itr != this->l1Cache_->end();
310321
++itr) {
@@ -339,6 +350,10 @@ void ObjectCache<AllocatorT>::getObjectCacheCounters(
339350
if (sizeController_) {
340351
sizeController_->getCounters(visitor);
341352
}
353+
354+
if (sizeDistTracker_) {
355+
sizeDistTracker_->getCounters(visitor);
356+
}
342357
}
343358

344359
template <typename AllocatorT>
@@ -357,41 +372,44 @@ ObjectCache<AllocatorT>::serializeConfigParams() const {
357372
}
358373

359374
template <typename AllocatorT>
360-
bool ObjectCache<AllocatorT>::startSizeController(
361-
std::chrono::milliseconds interval, const util::Throttler::Config& config) {
362-
if (!stopSizeController()) {
363-
XLOG(ERR) << "Size controller is already running. Cannot start it again.";
375+
template <typename WorkerT, typename... Args>
376+
bool ObjectCache<AllocatorT>::startWorker(folly::StringPiece name,
377+
std::unique_ptr<WorkerT>& worker,
378+
std::chrono::milliseconds interval,
379+
Args&&... args) {
380+
if (!stopWorker(name, worker)) {
381+
XLOGF(ERR, "Worker '{}' is already running. Cannot start it again.", name);
364382
return false;
365383
}
366384

367-
sizeController_ =
368-
std::make_unique<ObjectCacheSizeController<AllocatorT>>(*this, config);
369-
bool ret = sizeController_->start(interval, "ObjectCache-SizeController");
385+
worker = std::make_unique<WorkerT>(*this, std::forward<Args>(args)...);
386+
bool ret = worker->start(interval, name);
370387
if (ret) {
371-
XLOG(DBG) << "Started ObjectCache SizeController";
388+
XLOGF(INFO, "Started worker '{}'", name);
372389
} else {
373-
XLOGF(
374-
ERR,
375-
"Couldn't start ObjectCache SizeController, interval: {} milliseconds",
376-
interval.count());
390+
XLOGF(ERR, "Couldn't start worker '{}', interval: {} milliseconds", name,
391+
interval.count());
377392
}
378393
return ret;
379394
}
380395

381396
template <typename AllocatorT>
382-
bool ObjectCache<AllocatorT>::stopSizeController(std::chrono::seconds timeout) {
383-
if (!sizeController_) {
397+
template <typename WorkerT>
398+
bool ObjectCache<AllocatorT>::stopWorker(folly::StringPiece name,
399+
std::unique_ptr<WorkerT>& worker,
400+
std::chrono::seconds timeout) {
401+
if (!worker) {
402+
XLOGF(INFO, "Worker '{}' has not been started. No need to stop.", name);
384403
return true;
385404
}
386405

387-
bool ret = sizeController_->stop(timeout);
406+
bool ret = worker->stop(timeout);
388407
if (ret) {
389-
XLOG(DBG) << "Stopped ObjectCache SizeController";
408+
XLOGF(INFO, "Stopped worker '{}'", name);
390409
} else {
391-
XLOGF(ERR, "Couldn't stop ObjectCache SizeController, timeout: {} seconds",
410+
XLOGF(ERR, "Couldn't stop worker '{}', timeout: {} seconds", name,
392411
timeout.count());
393412
}
394-
sizeController_.reset();
395413
return ret;
396414
}
397415

@@ -402,11 +420,7 @@ bool ObjectCache<AllocatorT>::persist() {
402420
}
403421

404422
// Stop all the other workers before persist
405-
if (!stopSizeController()) {
406-
return false;
407-
}
408-
409-
if (!this->l1Cache_->stopWorkers()) {
423+
if (!stopAllWorkers()) {
410424
return false;
411425
}
412426

cachelib/experimental/objcache2/ObjectCache.h

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "cachelib/experimental/objcache2/ObjectCacheBase.h"
3838
#include "cachelib/experimental/objcache2/ObjectCacheConfig.h"
3939
#include "cachelib/experimental/objcache2/ObjectCacheSizeController.h"
40+
#include "cachelib/experimental/objcache2/ObjectCacheSizeDistTracker.h"
4041
#include "cachelib/experimental/objcache2/persistence/Persistence.h"
4142
#include "cachelib/experimental/objcache2/persistence/gen-cpp2/persistent_data_types.h"
4243
#include "cachelib/experimental/objcache2/util/ThreadMemoryTracker.h"
@@ -159,6 +160,7 @@ class ObjectCache : public ObjectCacheBase<AllocatorT> {
159160
using Persistor = Persistor<ObjectCache<AllocatorT>>;
160161
using Restorer = Restorer<ObjectCache<AllocatorT>>;
161162
using EvictionIterator = typename AllocatorT::EvictionIterator;
163+
using AccessIterator = typename AllocatorT::AccessIterator;
162164

163165
enum class AllocStatus { kSuccess, kAllocError, kKeyAlreadyExists };
164166

@@ -271,6 +273,11 @@ class ObjectCache : public ObjectCacheBase<AllocatorT> {
271273
// This is only used in tests.
272274
AllocatorT& getL1Cache() { return *this->l1Cache_; }
273275

276+
// Get an iterator to iterate over all items in object-cache.
277+
AccessIterator begin() { return this->l1Cache_->begin(); }
278+
279+
AccessIterator end() { return this->l1Cache_->end(); }
280+
274281
// Get the default l1 allocation size in bytes.
275282
static uint32_t getL1AllocSize(uint8_t maxKeySizeBytes);
276283

@@ -386,6 +393,25 @@ class ObjectCache : public ObjectCacheBase<AllocatorT> {
386393
->objectSize;
387394
}
388395

396+
size_t getObjectSize(AccessIterator& itr) const {
397+
if (!itr.asHandle()) {
398+
return 0;
399+
}
400+
return reinterpret_cast<const ObjectCacheItem*>(itr.asHandle()->getMemory())
401+
->objectSize;
402+
}
403+
404+
// Stop all workers
405+
//
406+
// @return true if all workers have been successfully stopped
407+
bool stopAllWorkers(std::chrono::seconds timeout = std::chrono::seconds{0}) {
408+
bool success = true;
409+
success &= stopWorker(kSizeControllerName, sizeController_, timeout);
410+
success &= stopWorker(kSizeDistTrackerName, sizeDistTracker_, timeout);
411+
success &= this->l1Cache_->stopWorkers(timeout);
412+
return success;
413+
}
414+
389415
protected:
390416
// Serialize cache allocator config for exporting to Scuba
391417
std::map<std::string, std::string> serializeConfigParams() const override;
@@ -395,8 +421,14 @@ class ObjectCache : public ObjectCacheBase<AllocatorT> {
395421
static constexpr uint32_t kL1AllocSizeMin = 64;
396422
static constexpr const char* kPlaceholderKey = "_cl_ph";
397423

424+
// Names of periodic workers
425+
static constexpr folly::StringPiece kSizeControllerName{"SizeController"};
426+
static constexpr folly::StringPiece kSizeDistTrackerName{"SizeDistTracker"};
427+
398428
void init();
399429

430+
void initWorkers();
431+
400432
// Allocate an item handle from the interal cache allocator. This item's
401433
// storage is used to cache pointer to objects in object-cache.
402434
typename AllocatorT::WriteHandle allocateFromL1(folly::StringPiece key,
@@ -411,19 +443,29 @@ class ObjectCache : public ObjectCacheBase<AllocatorT> {
411443
// Returns the total number of placeholders
412444
size_t getNumPlaceholders() const { return placeholders_.size(); }
413445

414-
// Start size controller
446+
// Start a periodic worker
415447
//
448+
// @param name name of the worker
449+
// @param worker unique pointer of the worker to start
416450
// @param interval the period this worker fires
417-
// @param config throttling config
418-
// @return true if size controller has been successfully started
419-
bool startSizeController(std::chrono::milliseconds interval,
420-
const util::Throttler::Config& config);
421-
422-
// Stop size controller
451+
// @param args... the rest of the arguments to initialize the worker
452+
// @return true if the worker has been successfully started
453+
template <typename WorkerT, typename... Args>
454+
bool startWorker(folly::StringPiece name,
455+
std::unique_ptr<WorkerT>& worker,
456+
std::chrono::milliseconds interval,
457+
Args&&... args);
458+
459+
// Stop a periodic worker
423460
//
461+
// @param name name of the worker
462+
// @param worker unique pointer of the worker to stop
463+
// @param timeout timeout for the worker stopping
424464
// @return true if size controller has been successfully stopped
425-
bool stopSizeController(std::chrono::seconds timeout = std::chrono::seconds{
426-
0});
465+
template <typename WorkerT>
466+
bool stopWorker(folly::StringPiece name,
467+
std::unique_ptr<WorkerT>& worker,
468+
std::chrono::seconds timeout = std::chrono::seconds{0});
427469

428470
// Get a ReadHandle reference from the object shared_ptr
429471
template <typename T>
@@ -462,6 +504,10 @@ class ObjectCache : public ObjectCacheBase<AllocatorT> {
462504
// cache size limit
463505
std::unique_ptr<ObjectCacheSizeController<AllocatorT>> sizeController_;
464506

507+
// A periodic worker that tracks the object size distribution stats.
508+
std::unique_ptr<ObjectCacheSizeDistTracker<ObjectCache<AllocatorT>>>
509+
sizeDistTracker_;
510+
465511
// Actual object size in total
466512
std::atomic<size_t> totalObjectSizeBytes_{0};
467513

cachelib/experimental/objcache2/ObjectCacheSizeController-inl.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,6 @@ void ObjectCacheSizeController<AllocatorT>::work() {
7272
totalObjSize, currentNumEntries, averageObjSize,
7373
newEntriesLimit, currentEntriesLimit_);
7474
}
75-
76-
if (objCache_.config_.objectSizeDistributionTrackingEnabled) {
77-
trackObjectSizeDistributionStats();
78-
}
7975
}
8076

8177
template <typename AllocatorT>
@@ -131,17 +127,6 @@ void ObjectCacheSizeController<AllocatorT>::getCounters(
131127
visitor("objcache.jemalloc_active_bytes", jemallocActiveBytes);
132128
visitor("objcache.jemalloc_allocated_bytes", jemallocAllocatedBytes);
133129
}
134-
if (objCache_.config_.objectSizeDistributionTrackingEnabled) {
135-
constexpr folly::StringPiece fmt = "{}_p{}";
136-
constexpr folly::StringPiece prefix =
137-
"objcache.size_distribution.object_size_bytes";
138-
139-
for (auto quantile : objectSizeBytesHist_.quantiles()) {
140-
visitor(
141-
folly::sformat(fmt, prefix, static_cast<uint32_t>(quantile * 100)),
142-
objectSizeBytesHist_.estimateQuantile(quantile));
143-
}
144-
}
145130
}
146131

147132
template <typename AllocatorT>

cachelib/experimental/objcache2/ObjectCacheSizeController.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#pragma once
1818

1919
#include <folly/memory/Malloc.h>
20-
#include <folly/stats/QuantileHistogram.h>
2120

2221
#include "cachelib/common/PeriodicWorker.h"
2322

@@ -58,18 +57,6 @@ class ObjectCacheSizeController : public PeriodicWorker {
5857
return {jemallocAllocatedBytes, jemallocActiveBytes};
5958
}
6059

61-
void trackObjectSizeDistributionStats() {
62-
// scan the cache to get the object size
63-
for (auto itr = objCache_.l1Cache_->begin();
64-
itr != objCache_.l1Cache_->end();
65-
++itr) {
66-
size_t objectSize = reinterpret_cast<const typename ObjectCache::Item*>(
67-
itr.asHandle()->getMemory())
68-
->objectSize;
69-
objectSizeBytesHist_.addValue(objectSize);
70-
}
71-
}
72-
7360
// threshold in percentage to determine whether the size-controller should do
7461
// the calculation
7562
const size_t kSizeControllerThresholdPct = 50;
@@ -81,8 +68,6 @@ class ObjectCacheSizeController : public PeriodicWorker {
8168

8269
// will be adjusted to control the cache size limit
8370
std::atomic<size_t> currentEntriesLimit_;
84-
85-
folly::QuantileHistogram<> objectSizeBytesHist_{};
8671
};
8772

8873
} // namespace objcache2
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <folly/stats/QuantileHistogram.h>
20+
21+
#include "cachelib/common/PeriodicWorker.h"
22+
23+
namespace facebook {
24+
namespace cachelib {
25+
namespace objcache2 {
26+
27+
template <typename ObjectCache>
28+
class ObjectCacheSizeDistTracker : public PeriodicWorker {
29+
public:
30+
explicit ObjectCacheSizeDistTracker(ObjectCache& objCache)
31+
: objCache_(objCache),
32+
objectSizeBytesHist_{std::make_shared<folly::QuantileHistogram<>>()} {}
33+
34+
void getCounters(const util::CounterVisitor& visitor) const {
35+
std::shared_ptr<folly::QuantileHistogram<>> curHist;
36+
{
37+
// lock the mutex before accessing objectSizeBytesHist_
38+
std::lock_guard<std::mutex> l(mutex_);
39+
curHist = objectSizeBytesHist_;
40+
}
41+
42+
for (auto quantile : curHist->quantiles()) {
43+
visitor(fmt::format("objcache.size_distribution.object_size_bytes_p{}",
44+
static_cast<uint32_t>(quantile * 100)),
45+
curHist->estimateQuantile(quantile));
46+
}
47+
}
48+
49+
private:
50+
void work() override final {
51+
auto newHist = std::make_shared<folly::QuantileHistogram<>>();
52+
// scan the cache to get the object size
53+
for (auto itr = objCache_.begin(); itr != objCache_.end(); ++itr) {
54+
newHist->addValue(objCache_.getObjectSize(itr));
55+
}
56+
// lock the mutex before updating objectSizeBytesHist_
57+
std::lock_guard<std::mutex> l(mutex_);
58+
objectSizeBytesHist_ = newHist;
59+
}
60+
61+
ObjectCache& objCache_;
62+
mutable std::mutex mutex_;
63+
std::shared_ptr<folly::QuantileHistogram<>> objectSizeBytesHist_{};
64+
};
65+
66+
} // namespace objcache2
67+
} // namespace cachelib
68+
} // namespace facebook

0 commit comments

Comments
 (0)