1
1
/* ----------------------------------------------------------------------------
2
- Copyright (c) 2018-2021 , Microsoft Research, Daan Leijen
2
+ Copyright (c) 2018-2022 , Microsoft Research, Daan Leijen
3
3
This is free software; you can redistribute it and/or modify it under the
4
4
terms of the MIT license. A copy of the license can be found in the file
5
5
"LICENSE" at the root of this distribution.
@@ -43,6 +43,11 @@ terms of the MIT license. A copy of the license can be found in the file
43
43
#define mi_decl_externc
44
44
#endif
45
45
46
+ #if !defined(_WIN32 ) && !defined(__wasi__ )
47
+ #define MI_USE_PTHREADS
48
+ #include <pthread.h>
49
+ #endif
50
+
46
51
// "options.c"
47
52
void _mi_fputs (mi_output_fun * out , void * arg , const char * prefix , const char * message );
48
53
void _mi_fprintf (mi_output_fun * out , void * arg , const char * fmt , ...);
@@ -78,7 +83,7 @@ bool _mi_os_unprotect(void* addr, size_t size);
78
83
bool _mi_os_commit (void * addr , size_t size , bool * is_zero , mi_stats_t * stats );
79
84
bool _mi_os_decommit (void * p , size_t size , mi_stats_t * stats );
80
85
bool _mi_os_reset (void * p , size_t size , mi_stats_t * stats );
81
- bool _mi_os_unreset (void * p , size_t size , bool * is_zero , mi_stats_t * stats );
86
+ // bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
82
87
size_t _mi_os_good_alloc_size (size_t size );
83
88
bool _mi_os_has_overcommit (void );
84
89
@@ -90,6 +95,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed,
90
95
// "segment-cache.c"
91
96
void * _mi_segment_cache_pop (size_t size , mi_commit_mask_t * commit_mask , mi_commit_mask_t * decommit_mask , bool * large , bool * is_pinned , bool * is_zero , size_t * memid , mi_os_tld_t * tld );
92
97
bool _mi_segment_cache_push (void * start , size_t size , size_t memid , const mi_commit_mask_t * commit_mask , const mi_commit_mask_t * decommit_mask , bool is_large , bool is_pinned , mi_os_tld_t * tld );
98
+ void _mi_segment_cache_collect (bool force , mi_os_tld_t * tld );
93
99
void _mi_segment_map_allocated_at (const mi_segment_t * segment );
94
100
void _mi_segment_map_freed_at (const mi_segment_t * segment );
95
101
@@ -104,6 +110,7 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi
104
110
uint8_t * _mi_segment_page_start (const mi_segment_t * segment , const mi_page_t * page , size_t * page_size ); // page start for any page
105
111
void _mi_abandoned_reclaim_all (mi_heap_t * heap , mi_segments_tld_t * tld );
106
112
void _mi_abandoned_await_readers (void );
113
+ void _mi_abandoned_collect (mi_heap_t * heap , bool force , mi_segments_tld_t * tld );
107
114
108
115
109
116
@@ -157,8 +164,8 @@ bool _mi_page_is_valid(mi_page_t* page);
157
164
// ------------------------------------------------------
158
165
159
166
#if defined(__GNUC__ ) || defined(__clang__ )
160
- #define mi_unlikely (x ) __builtin_expect((x),0 )
161
- #define mi_likely (x ) __builtin_expect((x),1 )
167
+ #define mi_unlikely (x ) __builtin_expect(!! (x),false )
168
+ #define mi_likely (x ) __builtin_expect(!! (x),true )
162
169
#else
163
170
#define mi_unlikely (x ) (x)
164
171
#define mi_likely (x ) (x)
@@ -263,11 +270,6 @@ static inline size_t _mi_wsize_from_size(size_t size) {
263
270
return (size + sizeof (uintptr_t ) - 1 ) / sizeof (uintptr_t );
264
271
}
265
272
266
- // Does malloc satisfy the alignment constraints already?
267
- static inline bool mi_malloc_satisfies_alignment (size_t alignment , size_t size ) {
268
- return (alignment == sizeof (void * ) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE /2 )));
269
- }
270
-
271
273
// Overflow detecting multiply
272
274
#if __has_builtin (__builtin_umul_overflow ) || (defined(__GNUC__ ) && (__GNUC__ >= 5 ))
273
275
#include <limits.h> // UINT_MAX, ULONG_MAX
@@ -318,7 +320,7 @@ We try to circumvent this in an efficient way:
318
320
- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
319
321
loader itself calls `malloc` even before the modules are initialized.
320
322
- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
321
- - DragonFly: the uniqueid use is buggy but kept for reference.
323
+ - DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323)
322
324
------------------------------------------------------------------------------------------- */
323
325
324
326
extern const mi_heap_t _mi_heap_empty ; // read-only empty heap, initial value of the thread local default heap
@@ -335,16 +337,18 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea
335
337
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
336
338
// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
337
339
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
338
- #elif defined(__DragonFly__ )
339
- #warning "mimalloc is not working correctly on DragonFly yet."
340
- //#define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
340
+ // #elif defined(__DragonFly__)
341
+ // #warning "mimalloc is not working correctly on DragonFly yet."
342
+ // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
343
+ #elif defined(__ANDROID__ )
344
+ // See issue #381
345
+ #define MI_TLS_PTHREAD
341
346
#endif
342
347
#endif
343
348
344
349
#if defined(MI_TLS_SLOT )
345
350
static inline void * mi_tls_slot (size_t slot ) mi_attr_noexcept ; // forward declaration
346
351
#elif defined(MI_TLS_PTHREAD_SLOT_OFS )
347
- #include <pthread.h>
348
352
static inline mi_heap_t * * mi_tls_pthread_heap_slot (void ) {
349
353
pthread_t self = pthread_self ();
350
354
#if defined(__DragonFly__ )
@@ -356,7 +360,6 @@ static inline mi_heap_t** mi_tls_pthread_heap_slot(void) {
356
360
return (mi_heap_t * * )((uint8_t * )self + MI_TLS_PTHREAD_SLOT_OFS );
357
361
}
358
362
#elif defined(MI_TLS_PTHREAD )
359
- #include <pthread.h>
360
363
extern pthread_key_t _mi_heap_default_key ;
361
364
#endif
362
365
@@ -366,11 +369,15 @@ extern pthread_key_t _mi_heap_default_key;
366
369
// However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356).
367
370
extern mi_decl_thread mi_heap_t * _mi_heap_default ; // default heap to allocate from
368
371
369
-
370
372
static inline mi_heap_t * mi_get_default_heap (void ) {
371
373
#if defined(MI_TLS_SLOT )
372
374
mi_heap_t * heap = (mi_heap_t * )mi_tls_slot (MI_TLS_SLOT );
373
- if (mi_unlikely (heap == NULL )) { heap = (mi_heap_t * )& _mi_heap_empty ; } //_mi_heap_empty_get(); }
375
+ if (mi_unlikely (heap == NULL )) {
376
+ #ifdef __GNUC__
377
+ __asm("" ); // prevent conditional load of the address of _mi_heap_empty
378
+ #endif
379
+ heap = (mi_heap_t * )& _mi_heap_empty ;
380
+ }
374
381
return heap ;
375
382
#elif defined(MI_TLS_PTHREAD_SLOT_OFS )
376
383
mi_heap_t * heap = * mi_tls_pthread_heap_slot ();
@@ -785,90 +792,105 @@ static inline size_t _mi_os_numa_node_count(void) {
785
792
// -------------------------------------------------------------------
786
793
// Getting the thread id should be performant as it is called in the
787
794
// fast path of `_mi_free` and we specialize for various platforms.
795
+ // We only require _mi_threadid() to return a unique id for each thread.
788
796
// -------------------------------------------------------------------
789
797
#if defined(_WIN32 )
798
+
790
799
#define WIN32_LEAN_AND_MEAN
791
800
#include <windows.h>
792
801
static inline mi_threadid_t _mi_thread_id (void ) mi_attr_noexcept {
793
802
// Windows: works on Intel and ARM in both 32- and 64-bit
794
803
return (uintptr_t )NtCurrentTeb ();
795
804
}
796
805
797
- #elif defined(__GNUC__ ) && \
798
- (defined(__x86_64__ ) || defined(__i386__ ) || defined(__arm__ ) || defined(__aarch64__ ))
806
+ // We use assembly for a fast thread id on the main platforms. The TLS layout depends on
807
+ // both the OS and libc implementation so we use specific tests for each main platform.
808
+ // If you test on another platform and it works please send a PR :-)
809
+ // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
810
+ #elif defined(__GNUC__ ) && ( \
811
+ (defined(__GLIBC__ ) && (defined(__x86_64__ ) || defined(__i386__ ) || defined(__arm__ ) || defined(__aarch64__ ))) \
812
+ || (defined(__APPLE__ ) && (defined(__x86_64__ ) || defined(__aarch64__ ))) \
813
+ || (defined(__BIONIC__ ) && (defined(__x86_64__ ) || defined(__i386__ ) || defined(__arm__ ) || defined(__aarch64__ ))) \
814
+ || (defined(__FreeBSD__ ) && (defined(__x86_64__ ) || defined(__i386__ ) || defined(__aarch64__ ))) \
815
+ || (defined(__OpenBSD__ ) && (defined(__x86_64__ ) || defined(__i386__ ) || defined(__aarch64__ ))) \
816
+ )
799
817
800
- // TLS register on x86 is in the FS or GS register, see: https://akkadia.org/drepper/tls.pdf
801
818
static inline void * mi_tls_slot (size_t slot ) mi_attr_noexcept {
802
819
void * res ;
803
820
const size_t ofs = (slot * sizeof (void * ));
804
- #if defined(__i386__ )
805
- __asm__("movl %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // 32-bit always uses GS
806
- #elif defined(__APPLE__ ) && defined(__x86_64__ )
807
- __asm__("movq %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 macOSX uses GS
808
- #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
809
- __asm__("movl %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x32 ABI
810
- #elif defined(__x86_64__ )
811
- __asm__("movq %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 Linux, BSD uses FS
812
- #elif defined(__arm__ )
813
- void * * tcb ; MI_UNUSED (ofs );
814
- __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
815
- res = tcb [slot ];
816
- #elif defined(__aarch64__ )
817
- void * * tcb ; MI_UNUSED (ofs );
818
- #if defined(__APPLE__ ) // M1, issue #343
819
- __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb ));
820
- tcb = (void * * )((uintptr_t )tcb & ~0x07UL ); // clear lower 3 bits
821
- #else
822
- __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
821
+ #if defined(__i386__ )
822
+ __asm__("movl %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86 32-bit always uses GS
823
+ #elif defined(__APPLE__ ) && defined(__x86_64__ )
824
+ __asm__("movq %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 macOSX uses GS
825
+ #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
826
+ __asm__("movl %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x32 ABI
827
+ #elif defined(__x86_64__ )
828
+ __asm__("movq %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 Linux, BSD uses FS
829
+ #elif defined(__arm__ )
830
+ void * * tcb ; MI_UNUSED (ofs );
831
+ __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
832
+ res = tcb [slot ];
833
+ #elif defined(__aarch64__ )
834
+ void * * tcb ; MI_UNUSED (ofs );
835
+ #if defined(__APPLE__ ) // M1, issue #343
836
+ __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb ));
837
+ #else
838
+ __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
839
+ #endif
840
+ res = tcb [slot ];
823
841
#endif
824
- res = tcb [slot ];
825
- #endif
826
842
return res ;
827
843
}
828
844
829
- // setting is only used on macOSX for now
845
+ // setting a tls slot is only used on macOS for now
830
846
static inline void mi_tls_slot_set (size_t slot , void * value ) mi_attr_noexcept {
831
847
const size_t ofs = (slot * sizeof (void * ));
832
- #if defined(__i386__ )
833
- __asm__("movl %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // 32-bit always uses GS
834
- #elif defined(__APPLE__ ) && defined(__x86_64__ )
835
- __asm__("movq %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 macOSX uses GS
836
- #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
837
- __asm__("movl %1,%%fs:%1" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x32 ABI
838
- #elif defined(__x86_64__ )
839
- __asm__("movq %1,%%fs:%1" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 Linux, BSD uses FS
840
- #elif defined(__arm__ )
841
- void * * tcb ; MI_UNUSED (ofs );
842
- __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
843
- tcb [slot ] = value ;
844
- #elif defined(__aarch64__ )
845
- void * * tcb ; MI_UNUSED (ofs );
846
- #if defined(__APPLE__ ) // M1, issue #343
847
- __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb ));
848
- tcb = (void * * )((uintptr_t )tcb & ~0x07UL ); // clear lower 3 bits
849
- #else
850
- __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
848
+ #if defined(__i386__ )
849
+ __asm__("movl %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // 32-bit always uses GS
850
+ #elif defined(__APPLE__ ) && defined(__x86_64__ )
851
+ __asm__("movq %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 macOS uses GS
852
+ #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
853
+ __asm__("movl %1,%%fs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x32 ABI
854
+ #elif defined(__x86_64__ )
855
+ __asm__("movq %1,%%fs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 Linux, BSD uses FS
856
+ #elif defined(__arm__ )
857
+ void * * tcb ; MI_UNUSED (ofs );
858
+ __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
859
+ tcb [slot ] = value ;
860
+ #elif defined(__aarch64__ )
861
+ void * * tcb ; MI_UNUSED (ofs );
862
+ #if defined(__APPLE__ ) // M1, issue #343
863
+ __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb ));
864
+ #else
865
+ __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
866
+ #endif
867
+ tcb [slot ] = value ;
851
868
#endif
852
- tcb [slot ] = value ;
853
- #endif
854
869
}
855
870
856
871
static inline mi_threadid_t _mi_thread_id (void ) mi_attr_noexcept {
857
- #if defined(__BIONIC__ ) && (defined(__arm__ ) || defined(__aarch64__ ))
858
- // on Android, slot 1 is the thread ID (pointer to pthread internal struct)
859
- return (uintptr_t )mi_tls_slot (1 );
860
- #else
861
- // in all our other targets, slot 0 is the pointer to the thread control block
862
- return (uintptr_t )mi_tls_slot (0 );
863
- #endif
872
+ #if defined(__BIONIC__ )
873
+ // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
874
+ // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
875
+ return (uintptr_t )mi_tls_slot (1 );
876
+ #else
877
+ // in all our other targets, slot 0 is the thread id
878
+ // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
879
+ // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
880
+ return (uintptr_t )mi_tls_slot (0 );
881
+ #endif
864
882
}
883
+
865
884
#else
866
- // otherwise use standard C
885
+
886
+ // otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
867
887
static inline mi_threadid_t _mi_thread_id (void ) mi_attr_noexcept {
868
888
return (uintptr_t )& _mi_heap_default ;
869
889
}
890
+
870
891
#endif
871
892
893
+
872
894
// -----------------------------------------------------------------------
873
895
// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
874
896
// -----------------------------------------------------------------------
0 commit comments