60
60
61
61
typedef volatile LONG atomic_int;
62
62
typedef atomic_int atomic_bool;
63
+ typedef atomic_int atomic_flag;
64
+
65
+ #define ATOMIC_FLAG_INIT 0
63
66
64
67
static void atomic_store(atomic_int * ptr, LONG val) {
65
68
InterlockedExchange(ptr, val);
@@ -73,6 +76,12 @@ static LONG atomic_fetch_add(atomic_int * ptr, LONG inc) {
73
76
static LONG atomic_fetch_sub(atomic_int * ptr, LONG dec) {
74
77
return atomic_fetch_add(ptr, -(dec));
75
78
}
79
+ static atomic_bool atomic_flag_test_and_set(atomic_flag * ptr) {
80
+ return InterlockedExchange(ptr, 1);
81
+ }
82
+ static void atomic_flag_clear(atomic_flag * ptr) {
83
+ InterlockedExchange(ptr, 0);
84
+ }
76
85
77
86
typedef HANDLE pthread_t;
78
87
@@ -2883,24 +2892,20 @@ struct ggml_state {
2883
2892
2884
2893
// global state
2885
2894
static struct ggml_state g_state;
2886
- static atomic_int g_state_barrier = 0 ;
2895
+ static atomic_flag g_state_critical = ATOMIC_FLAG_INIT ;
2887
2896
2888
2897
// barrier via spin lock
2889
2898
inline static void ggml_critical_section_start(void) {
2890
- int processing = atomic_fetch_add(&g_state_barrier, 1);
2891
-
2892
- while (processing > 0) {
2893
- // wait for other threads to finish
2894
- atomic_fetch_sub(&g_state_barrier, 1);
2895
- sched_yield(); // TODO: reconsider this
2896
- processing = atomic_fetch_add(&g_state_barrier, 1);
2899
+ while (atomic_flag_test_and_set(&g_state_critical)) {
2900
+ // spin
2901
+ sched_yield();
2897
2902
}
2898
2903
}
2899
2904
2900
2905
// TODO: make this somehow automatically executed
2901
2906
// some sort of "sentry" mechanism
2902
2907
inline static void ggml_critical_section_end(void) {
2903
- atomic_fetch_sub(&g_state_barrier, 1 );
2908
+ atomic_flag_clear(&g_state_critical );
2904
2909
}
2905
2910
2906
2911
#if defined(__gnu_linux__)
0 commit comments