Skip to content

Commit 0726695

Browse files
committed
sanitizer_common: optimize memory drain
Currently we allocate MemoryMapper per size class. MemoryMapper mmap's and munmap's internal buffer. This results in 50 mmap/munmap calls under the global allocator mutex. Reuse MemoryMapper and the buffer for all size classes. This radically reduces number of mmap/munmap calls. Smaller size classes tend to have more objects allocated, so it's highly likely that the buffer allocated for the first size class will be enough for all subsequent size classes. Reviewed By: melver Differential Revision: https://reviews.llvm.org/D105778
1 parent b270c3f commit 0726695

File tree

3 files changed

+106
-87
lines changed

3 files changed

+106
-87
lines changed

compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
template <class SizeClassAllocator>
1818
struct SizeClassAllocator64LocalCache {
1919
typedef SizeClassAllocator Allocator;
20+
typedef MemoryMapper<Allocator> MemoryMapper;
2021

2122
void Init(AllocatorGlobalStats *s) {
2223
stats_.Init();
@@ -53,7 +54,7 @@ struct SizeClassAllocator64LocalCache {
5354
PerClass *c = &per_class_[class_id];
5455
InitCache(c);
5556
if (UNLIKELY(c->count == c->max_count))
56-
Drain(c, allocator, class_id, c->max_count / 2);
57+
Drain(c, allocator, class_id);
5758
CompactPtrT chunk = allocator->PointerToCompactPtr(
5859
allocator->GetRegionBeginBySizeClass(class_id),
5960
reinterpret_cast<uptr>(p));
@@ -62,10 +63,10 @@ struct SizeClassAllocator64LocalCache {
6263
}
6364

6465
void Drain(SizeClassAllocator *allocator) {
66+
MemoryMapper memory_mapper(*allocator);
6567
for (uptr i = 1; i < kNumClasses; i++) {
6668
PerClass *c = &per_class_[i];
67-
while (c->count > 0)
68-
Drain(c, allocator, i, c->count);
69+
while (c->count > 0) Drain(&memory_mapper, c, allocator, i, c->count);
6970
}
7071
}
7172

@@ -106,12 +107,18 @@ struct SizeClassAllocator64LocalCache {
106107
return true;
107108
}
108109

109-
NOINLINE void Drain(PerClass *c, SizeClassAllocator *allocator, uptr class_id,
110-
uptr count) {
110+
NOINLINE void Drain(PerClass *c, SizeClassAllocator *allocator,
111+
uptr class_id) {
112+
MemoryMapper memory_mapper(*allocator);
113+
Drain(&memory_mapper, c, allocator, class_id, c->max_count / 2);
114+
}
115+
116+
void Drain(MemoryMapper *memory_mapper, PerClass *c,
117+
SizeClassAllocator *allocator, uptr class_id, uptr count) {
111118
CHECK_GE(c->count, count);
112119
const uptr first_idx_to_drain = c->count - count;
113120
c->count -= count;
114-
allocator->ReturnToAllocator(&stats_, class_id,
121+
allocator->ReturnToAllocator(memory_mapper, &stats_, class_id,
115122
&c->chunks[first_idx_to_drain], count);
116123
}
117124
};

compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h

Lines changed: 88 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,60 @@ struct SizeClassAllocator64FlagMasks { // Bit masks.
4242
};
4343
};
4444

45+
template <typename Allocator>
46+
class MemoryMapper {
47+
public:
48+
typedef typename Allocator::CompactPtrT CompactPtrT;
49+
50+
explicit MemoryMapper(const Allocator &allocator) : allocator_(allocator) {}
51+
52+
~MemoryMapper() {
53+
if (buffer_)
54+
UnmapOrDie(buffer_, buffer_size_);
55+
}
56+
57+
bool GetAndResetStats(uptr &ranges, uptr &bytes) {
58+
ranges = released_ranges_count_;
59+
released_ranges_count_ = 0;
60+
bytes = released_bytes_;
61+
released_bytes_ = 0;
62+
return ranges != 0;
63+
}
64+
65+
void *MapPackedCounterArrayBuffer(uptr buffer_size) {
66+
// TODO(alekseyshl): The idea to explore is to check if we have enough
67+
// space between num_freed_chunks*sizeof(CompactPtrT) and
68+
// mapped_free_array to fit buffer_size bytes and use that space instead
69+
// of mapping a temporary one.
70+
if (buffer_size_ < buffer_size) {
71+
if (buffer_)
72+
UnmapOrDie(buffer_, buffer_size_);
73+
buffer_ = MmapOrDieOnFatalError(buffer_size, "ReleaseToOSPageCounters");
74+
buffer_size_ = buffer_size;
75+
} else {
76+
internal_memset(buffer_, 0, buffer_size);
77+
}
78+
return buffer_;
79+
}
80+
81+
// Releases [from, to) range of pages back to OS.
82+
void ReleasePageRangeToOS(CompactPtrT from, CompactPtrT to, uptr class_id) {
83+
const uptr region_base = allocator_.GetRegionBeginBySizeClass(class_id);
84+
const uptr from_page = allocator_.CompactPtrToPointer(region_base, from);
85+
const uptr to_page = allocator_.CompactPtrToPointer(region_base, to);
86+
ReleaseMemoryPagesToOS(from_page, to_page);
87+
released_ranges_count_++;
88+
released_bytes_ += to_page - from_page;
89+
}
90+
91+
private:
92+
const Allocator &allocator_;
93+
uptr released_ranges_count_ = 0;
94+
uptr released_bytes_ = 0;
95+
void *buffer_ = nullptr;
96+
uptr buffer_size_ = 0;
97+
};
98+
4599
template <class Params>
46100
class SizeClassAllocator64 {
47101
public:
@@ -57,6 +111,7 @@ class SizeClassAllocator64 {
57111

58112
typedef SizeClassAllocator64<Params> ThisT;
59113
typedef SizeClassAllocator64LocalCache<ThisT> AllocatorCache;
114+
typedef MemoryMapper<ThisT> MemoryMapper;
60115

61116
// When we know the size class (the region base) we can represent a pointer
62117
// as a 4-byte integer (offset from the region start shifted right by 4).
@@ -120,9 +175,10 @@ class SizeClassAllocator64 {
120175
}
121176

122177
void ForceReleaseToOS() {
178+
MemoryMapper memory_mapper(*this);
123179
for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
124180
BlockingMutexLock l(&GetRegionInfo(class_id)->mutex);
125-
MaybeReleaseToOS(class_id, true /*force*/);
181+
MaybeReleaseToOS(&memory_mapper, class_id, true /*force*/);
126182
}
127183
}
128184

@@ -131,7 +187,8 @@ class SizeClassAllocator64 {
131187
alignment <= SizeClassMap::kMaxSize;
132188
}
133189

134-
NOINLINE void ReturnToAllocator(AllocatorStats *stat, uptr class_id,
190+
NOINLINE void ReturnToAllocator(MemoryMapper *memory_mapper,
191+
AllocatorStats *stat, uptr class_id,
135192
const CompactPtrT *chunks, uptr n_chunks) {
136193
RegionInfo *region = GetRegionInfo(class_id);
137194
uptr region_beg = GetRegionBeginBySizeClass(class_id);
@@ -154,7 +211,7 @@ class SizeClassAllocator64 {
154211
region->num_freed_chunks = new_num_freed_chunks;
155212
region->stats.n_freed += n_chunks;
156213

157-
MaybeReleaseToOS(class_id, false /*force*/);
214+
MaybeReleaseToOS(memory_mapper, class_id, false /*force*/);
158215
}
159216

160217
NOINLINE bool GetFromAllocator(AllocatorStats *stat, uptr class_id,
@@ -362,10 +419,10 @@ class SizeClassAllocator64 {
362419
// For the performance sake, none of the accessors check the validity of the
363420
// arguments, it is assumed that index is always in [0, n) range and the value
364421
// is not incremented past max_value.
365-
template<class MemoryMapperT>
422+
template <typename MemoryMapper>
366423
class PackedCounterArray {
367424
public:
368-
PackedCounterArray(u64 num_counters, u64 max_value, MemoryMapperT *mapper)
425+
PackedCounterArray(u64 num_counters, u64 max_value, MemoryMapper *mapper)
369426
: n(num_counters), memory_mapper(mapper) {
370427
CHECK_GT(num_counters, 0);
371428
CHECK_GT(max_value, 0);
@@ -389,11 +446,6 @@ class SizeClassAllocator64 {
389446
buffer = reinterpret_cast<u64*>(
390447
memory_mapper->MapPackedCounterArrayBuffer(buffer_size));
391448
}
392-
~PackedCounterArray() {
393-
if (buffer) {
394-
memory_mapper->UnmapPackedCounterArrayBuffer(buffer, buffer_size);
395-
}
396-
}
397449

398450
bool IsAllocated() const {
399451
return !!buffer;
@@ -430,18 +482,21 @@ class SizeClassAllocator64 {
430482
u64 packing_ratio_log;
431483
u64 bit_offset_mask;
432484

433-
MemoryMapperT* const memory_mapper;
485+
MemoryMapper *const memory_mapper;
434486
u64 buffer_size;
435487
u64* buffer;
436488
};
437489

438-
template<class MemoryMapperT>
490+
template <class MemoryMapperT>
439491
class FreePagesRangeTracker {
440492
public:
441-
explicit FreePagesRangeTracker(MemoryMapperT* mapper)
493+
explicit FreePagesRangeTracker(MemoryMapperT *mapper, uptr class_id)
442494
: memory_mapper(mapper),
495+
class_id(class_id),
443496
page_size_scaled_log(Log2(GetPageSizeCached() >> kCompactPtrScale)),
444-
in_the_range(false), current_page(0), current_range_start_page(0) {}
497+
in_the_range(false),
498+
current_page(0),
499+
current_range_start_page(0) {}
445500

446501
void NextPage(bool freed) {
447502
if (freed) {
@@ -463,13 +518,14 @@ class SizeClassAllocator64 {
463518
void CloseOpenedRange() {
464519
if (in_the_range) {
465520
memory_mapper->ReleasePageRangeToOS(
466-
current_range_start_page << page_size_scaled_log,
521+
class_id, current_range_start_page << page_size_scaled_log,
467522
current_page << page_size_scaled_log);
468523
in_the_range = false;
469524
}
470525
}
471526

472-
MemoryMapperT* const memory_mapper;
527+
MemoryMapperT *const memory_mapper;
528+
const uptr class_id;
473529
const uptr page_size_scaled_log;
474530
bool in_the_range;
475531
uptr current_page;
@@ -480,11 +536,12 @@ class SizeClassAllocator64 {
480536
// chunks only and returns these pages back to OS.
481537
// allocated_pages_count is the total number of pages allocated for the
482538
// current bucket.
483-
template<class MemoryMapperT>
539+
template <class MemoryMapper>
484540
static void ReleaseFreeMemoryToOS(CompactPtrT *free_array,
485541
uptr free_array_count, uptr chunk_size,
486542
uptr allocated_pages_count,
487-
MemoryMapperT *memory_mapper) {
543+
MemoryMapper *memory_mapper,
544+
uptr class_id) {
488545
const uptr page_size = GetPageSizeCached();
489546

490547
// Figure out the number of chunks per page and whether we can take a fast
@@ -520,9 +577,8 @@ class SizeClassAllocator64 {
520577
UNREACHABLE("All chunk_size/page_size ratios must be handled.");
521578
}
522579

523-
PackedCounterArray<MemoryMapperT> counters(allocated_pages_count,
524-
full_pages_chunk_count_max,
525-
memory_mapper);
580+
PackedCounterArray<MemoryMapper> counters(
581+
allocated_pages_count, full_pages_chunk_count_max, memory_mapper);
526582
if (!counters.IsAllocated())
527583
return;
528584

@@ -547,7 +603,7 @@ class SizeClassAllocator64 {
547603

548604
// Iterate over pages detecting ranges of pages with chunk counters equal
549605
// to the expected number of chunks for the particular page.
550-
FreePagesRangeTracker<MemoryMapperT> range_tracker(memory_mapper);
606+
FreePagesRangeTracker<MemoryMapper> range_tracker(memory_mapper, class_id);
551607
if (same_chunk_count_per_page) {
552608
// Fast path, every page has the same number of chunks affecting it.
553609
for (uptr i = 0; i < counters.GetCount(); i++)
@@ -586,7 +642,7 @@ class SizeClassAllocator64 {
586642
}
587643

588644
private:
589-
friend class MemoryMapper;
645+
friend class __sanitizer::MemoryMapper<ThisT>;
590646

591647
ReservedAddressRange address_range;
592648

@@ -820,57 +876,13 @@ class SizeClassAllocator64 {
820876
return true;
821877
}
822878

823-
class MemoryMapper {
824-
public:
825-
MemoryMapper(const ThisT& base_allocator, uptr class_id)
826-
: allocator(base_allocator),
827-
region_base(base_allocator.GetRegionBeginBySizeClass(class_id)),
828-
released_ranges_count(0),
829-
released_bytes(0) {
830-
}
831-
832-
uptr GetReleasedRangesCount() const {
833-
return released_ranges_count;
834-
}
835-
836-
uptr GetReleasedBytes() const {
837-
return released_bytes;
838-
}
839-
840-
void *MapPackedCounterArrayBuffer(uptr buffer_size) {
841-
// TODO(alekseyshl): The idea to explore is to check if we have enough
842-
// space between num_freed_chunks*sizeof(CompactPtrT) and
843-
// mapped_free_array to fit buffer_size bytes and use that space instead
844-
// of mapping a temporary one.
845-
return MmapOrDieOnFatalError(buffer_size, "ReleaseToOSPageCounters");
846-
}
847-
848-
void UnmapPackedCounterArrayBuffer(void *buffer, uptr buffer_size) {
849-
UnmapOrDie(buffer, buffer_size);
850-
}
851-
852-
// Releases [from, to) range of pages back to OS.
853-
void ReleasePageRangeToOS(CompactPtrT from, CompactPtrT to) {
854-
const uptr from_page = allocator.CompactPtrToPointer(region_base, from);
855-
const uptr to_page = allocator.CompactPtrToPointer(region_base, to);
856-
ReleaseMemoryPagesToOS(from_page, to_page);
857-
released_ranges_count++;
858-
released_bytes += to_page - from_page;
859-
}
860-
861-
private:
862-
const ThisT& allocator;
863-
const uptr region_base;
864-
uptr released_ranges_count;
865-
uptr released_bytes;
866-
};
867-
868879
// Attempts to release RAM occupied by freed chunks back to OS. The region is
869880
// expected to be locked.
870881
//
871882
// TODO(morehouse): Support a callback on memory release so HWASan can release
872883
// aliases as well.
873-
void MaybeReleaseToOS(uptr class_id, bool force) {
884+
void MaybeReleaseToOS(MemoryMapper *memory_mapper, uptr class_id,
885+
bool force) {
874886
RegionInfo *region = GetRegionInfo(class_id);
875887
const uptr chunk_size = ClassIdToSize(class_id);
876888
const uptr page_size = GetPageSizeCached();
@@ -894,17 +906,16 @@ class SizeClassAllocator64 {
894906
}
895907
}
896908

897-
MemoryMapper memory_mapper(*this, class_id);
898-
899-
ReleaseFreeMemoryToOS<MemoryMapper>(
909+
ReleaseFreeMemoryToOS(
900910
GetFreeArray(GetRegionBeginBySizeClass(class_id)), n, chunk_size,
901-
RoundUpTo(region->allocated_user, page_size) / page_size,
902-
&memory_mapper);
911+
RoundUpTo(region->allocated_user, page_size) / page_size, memory_mapper,
912+
class_id);
903913

904-
if (memory_mapper.GetReleasedRangesCount() > 0) {
914+
uptr ranges, bytes;
915+
if (memory_mapper->GetAndResetStats(ranges, bytes)) {
905916
region->rtoi.n_freed_at_last_release = region->stats.n_freed;
906-
region->rtoi.num_releases += memory_mapper.GetReleasedRangesCount();
907-
region->rtoi.last_released_bytes = memory_mapper.GetReleasedBytes();
917+
region->rtoi.num_releases += ranges;
918+
region->rtoi.last_released_bytes = bytes;
908919
}
909920
region->rtoi.last_release_at_ns = MonotonicNanoTime();
910921
}

compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,7 +1243,7 @@ class RangeRecorder {
12431243
Log2(GetPageSizeCached() >> Allocator64::kCompactPtrScale)),
12441244
last_page_reported(0) {}
12451245

1246-
void ReleasePageRangeToOS(u32 from, u32 to) {
1246+
void ReleasePageRangeToOS(u32 class_id, u32 from, u32 to) {
12471247
from >>= page_size_scaled_log;
12481248
to >>= page_size_scaled_log;
12491249
ASSERT_LT(from, to);
@@ -1253,6 +1253,7 @@ class RangeRecorder {
12531253
reported_pages.append(to - from, 'x');
12541254
last_page_reported = to;
12551255
}
1256+
12561257
private:
12571258
const uptr page_size_scaled_log;
12581259
u32 last_page_reported;
@@ -1282,7 +1283,7 @@ TEST(SanitizerCommon, SizeClassAllocator64FreePagesRangeTracker) {
12821283

12831284
for (auto test_case : test_cases) {
12841285
RangeRecorder range_recorder;
1285-
RangeTracker tracker(&range_recorder);
1286+
RangeTracker tracker(&range_recorder, 1);
12861287
for (int i = 0; test_case[i] != 0; i++)
12871288
tracker.NextPage(test_case[i] == 'x');
12881289
tracker.Done();
@@ -1308,7 +1309,7 @@ class ReleasedPagesTrackingMemoryMapper {
13081309
free(buffer);
13091310
}
13101311

1311-
void ReleasePageRangeToOS(u32 from, u32 to) {
1312+
void ReleasePageRangeToOS(u32 class_id, u32 from, u32 to) {
13121313
uptr page_size_scaled =
13131314
GetPageSizeCached() >> Allocator64::kCompactPtrScale;
13141315
for (u32 i = from; i < to; i += page_size_scaled)
@@ -1352,7 +1353,7 @@ void TestReleaseFreeMemoryToOS() {
13521353

13531354
Allocator::ReleaseFreeMemoryToOS(&free_array[0], free_array.size(),
13541355
chunk_size, kAllocatedPagesCount,
1355-
&memory_mapper);
1356+
&memory_mapper, class_id);
13561357

13571358
// Verify that there are no released pages touched by used chunks and all
13581359
// ranges of free chunks big enough to contain the entire memory pages had

0 commit comments

Comments
 (0)