Skip to content

Commit 8824130

Browse files
committed
lock instead of spinlock
1 parent 6e7801d commit 8824130

File tree

1 file changed

+160
-64
lines changed

1 file changed

+160
-64
lines changed

ggml.c

Lines changed: 160 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -9112,6 +9112,19 @@ typedef pthread_t ggml_thread_t;
91129112
#define ggml_thread_create pthread_create
91139113
#define ggml_thread_join pthread_join
91149114

9115+
typedef pthread_mutex_t ggml_mutex_t;
9116+
typedef pthread_cond_t ggml_cond_t;
9117+
9118+
#define ggml_mutex_init pthread_mutex_init
9119+
#define ggml_mutex_destroy pthread_mutex_destroy
9120+
#define ggml_cond_init pthread_cond_init
9121+
#define ggml_cond_destroy pthread_cond_destroy
9122+
9123+
#define ggml_mutex_lock pthread_mutex_lock
9124+
#define ggml_mutex_unlock pthread_mutex_unlock
9125+
#define ggml_cond_broadcast pthread_cond_broadcast
9126+
#define ggml_cond_wait pthread_cond_wait
9127+
91159128
#else
91169129

91179130
//typedef pthread_spinlock_t ggml_lock_t;
@@ -9135,17 +9148,31 @@ typedef pthread_t ggml_thread_t;
91359148
#define ggml_thread_create pthread_create
91369149
#define ggml_thread_join pthread_join
91379150

9151+
typedef pthread_mutex_t ggml_mutex_t;
9152+
typedef pthread_cond_t ggml_cond_t;
9153+
9154+
#define ggml_mutex_init pthread_mutex_init
9155+
#define ggml_mutex_destroy pthread_mutex_destroy
9156+
#define ggml_cond_init pthread_cond_init
9157+
#define ggml_cond_destroy pthread_cond_destroy
9158+
9159+
#define ggml_mutex_lock pthread_mutex_lock
9160+
#define ggml_mutex_unlock pthread_mutex_unlock
9161+
#define ggml_cond_broadcast pthread_cond_broadcast
9162+
#define ggml_cond_wait pthread_cond_wait
9163+
91389164
#endif
91399165

91409166
struct ggml_compute_state_shared {
9141-
ggml_lock_t spin;
91429167

91439168
int n_threads;
91449169

91459170
// synchronization primitives
9146-
atomic_int n_ready;
9147-
atomic_bool has_work;
9148-
atomic_bool stop; // stop all threads
9171+
int n_ready;
9172+
bool has_work;
9173+
bool stop; // stop all threads
9174+
ggml_mutex_t mutex;
9175+
ggml_cond_t cond;
91499176
};
91509177

91519178
struct ggml_compute_state {
@@ -9161,43 +9188,57 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
91619188
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
91629189

91639190
const int n_threads = state->shared->n_threads;
9164-
91659191
while (true) {
9166-
if (atomic_fetch_add(&state->shared->n_ready, 1) == n_threads - 1) {
9167-
atomic_store(&state->shared->has_work, false);
9192+
ggml_mutex_lock(&state->shared->mutex);
9193+
if (state->shared->n_ready++ == n_threads - 1) {
9194+
state->shared->has_work = false;
9195+
ggml_cond_broadcast(&state->shared->cond);
91689196
} else {
9169-
while (atomic_load(&state->shared->has_work)) {
9170-
if (atomic_load(&state->shared->stop)) {
9197+
while (state->shared->has_work) {
9198+
if (state->shared->stop) {
9199+
ggml_mutex_unlock(&state->shared->mutex);
9200+
return 0;
9201+
}
9202+
ggml_cond_wait(&state->shared->cond, &state->shared->mutex);
9203+
if (state->shared->stop) {
9204+
ggml_mutex_unlock(&state->shared->mutex);
91719205
return 0;
91729206
}
9173-
ggml_lock_lock (&state->shared->spin);
9174-
ggml_lock_unlock(&state->shared->spin);
91759207
}
91769208
}
9209+
ggml_mutex_unlock(&state->shared->mutex);
91779210

9178-
atomic_fetch_sub(&state->shared->n_ready, 1);
9211+
ggml_mutex_lock(&state->shared->mutex);
9212+
state->shared->n_ready--;
9213+
ggml_cond_broadcast(&state->shared->cond);
9214+
ggml_mutex_unlock(&state->shared->mutex);
91799215

91809216
// wait for work
9181-
while (!atomic_load(&state->shared->has_work)) {
9182-
if (atomic_load(&state->shared->stop)) {
9183-
return 0;
9217+
ggml_mutex_lock(&state->shared->mutex);
9218+
while (!state->shared->has_work && !state->shared->stop) {
9219+
if (state->shared->stop) {
9220+
ggml_mutex_unlock(&state->shared->mutex);
9221+
return 0;
91849222
}
9185-
ggml_lock_lock (&state->shared->spin);
9186-
ggml_lock_unlock(&state->shared->spin);
9223+
ggml_cond_wait(&state->shared->cond, &state->shared->mutex);
91879224
}
9225+
ggml_mutex_unlock(&state->shared->mutex);
91889226

91899227
// check if we should stop
9190-
if (atomic_load(&state->shared->stop)) {
9228+
ggml_mutex_lock(&state->shared->mutex);
9229+
if (state->shared->stop) {
9230+
ggml_mutex_unlock(&state->shared->mutex);
91919231
break;
91929232
}
9233+
ggml_mutex_unlock(&state->shared->mutex);
91939234

91949235
if (state->node) {
91959236
if (state->params.ith < state->params.nth) {
91969237
ggml_compute_forward(&state->params, state->node);
91979238
}
9198-
91999239
state->node = NULL;
92009240
} else {
9241+
ggml_mutex_unlock(&state->shared->mutex);
92019242
break;
92029243
}
92039244
}
@@ -9209,19 +9250,32 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
92099250
const int n_threads = cgraph->n_threads;
92109251

92119252
struct ggml_compute_state_shared state_shared = {
9212-
/*.spin =*/ GGML_LOCK_INITIALIZER,
92139253
/*.n_threads =*/ n_threads,
92149254
/*.n_ready =*/ 0,
92159255
/*.has_work =*/ false,
92169256
/*.stop =*/ false,
9257+
/*.mutex =*/ {0},
9258+
/*.cond =*/ {0},
92179259
};
9260+
{
9261+
int rc = ggml_mutex_init(&state_shared.mutex, NULL);
9262+
GGML_ASSERT(rc == 0);
9263+
UNUSED(rc);
9264+
}
9265+
{
9266+
int rc = ggml_cond_init(&state_shared.cond, NULL);
9267+
GGML_ASSERT(rc == 0);
9268+
UNUSED(rc);
9269+
}
92189270
struct ggml_compute_state * workers = n_threads > 1 ? alloca(sizeof(struct ggml_compute_state)*(n_threads - 1)) : NULL;
92199271

92209272
// create thread pool
92219273
if (n_threads > 1) {
9222-
ggml_lock_init(&state_shared.spin);
92239274

9224-
atomic_store(&state_shared.has_work, true);
9275+
ggml_mutex_lock(&state_shared.mutex);
9276+
state_shared.has_work = true;
9277+
ggml_cond_broadcast(&state_shared.cond);
9278+
ggml_mutex_unlock(&state_shared.mutex);
92259279

92269280
for (int j = 0; j < n_threads - 1; j++) {
92279281
workers[j] = (struct ggml_compute_state) {
@@ -9477,14 +9531,18 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
94779531

94789532
// COMPUTE
94799533
if (node->n_tasks > 1) {
9480-
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
9481-
atomic_store(&state_shared.has_work, false);
9534+
ggml_mutex_lock(&state_shared.mutex);
9535+
if (state_shared.n_ready++ == n_threads - 1) {
9536+
state_shared.has_work = false;
9537+
ggml_cond_broadcast(&state_shared.cond);
94829538
}
9539+
ggml_mutex_unlock(&state_shared.mutex);
94839540

9484-
while (atomic_load(&state_shared.has_work)) {
9485-
ggml_lock_lock (&state_shared.spin);
9486-
ggml_lock_unlock(&state_shared.spin);
9541+
ggml_mutex_lock(&state_shared.mutex);
9542+
while (state_shared.has_work) {
9543+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
94879544
}
9545+
ggml_mutex_unlock(&state_shared.mutex);
94889546

94899547
// launch thread pool
94909548
for (int j = 0; j < n_threads - 1; j++) {
@@ -9498,48 +9556,68 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
94989556
workers[j].node = node;
94999557
}
95009558

9501-
atomic_fetch_sub(&state_shared.n_ready, 1);
9559+
ggml_mutex_lock(&state_shared.mutex);
9560+
state_shared.n_ready--;
9561+
ggml_cond_broadcast(&state_shared.cond);
9562+
ggml_mutex_unlock(&state_shared.mutex);
95029563

9503-
while (atomic_load(&state_shared.n_ready) > 0) {
9504-
ggml_lock_lock (&state_shared.spin);
9505-
ggml_lock_unlock(&state_shared.spin);
9564+
ggml_mutex_lock(&state_shared.mutex);
9565+
while (state_shared.n_ready > 0) {
9566+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
95069567
}
9568+
ggml_mutex_unlock(&state_shared.mutex);
95079569

9508-
atomic_store(&state_shared.has_work, true);
9570+
9571+
ggml_mutex_lock(&state_shared.mutex);
9572+
state_shared.has_work = true;
9573+
ggml_cond_broadcast(&state_shared.cond);
9574+
ggml_mutex_unlock(&state_shared.mutex);
95099575
}
95109576

95119577
params.type = GGML_TASK_COMPUTE;
95129578
ggml_compute_forward(&params, node);
95139579

95149580
// wait for thread pool
95159581
if (node->n_tasks > 1) {
9516-
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
9517-
atomic_store(&state_shared.has_work, false);
9582+
ggml_mutex_lock(&state_shared.mutex);
9583+
if (state_shared.n_ready++ == n_threads - 1) {
9584+
state_shared.has_work = false;
9585+
ggml_cond_broadcast(&state_shared.cond);
95189586
}
9587+
ggml_mutex_unlock(&state_shared.mutex);
95199588

9520-
while (atomic_load(&state_shared.has_work)) {
9521-
ggml_lock_lock (&state_shared.spin);
9522-
ggml_lock_unlock(&state_shared.spin);
9589+
ggml_mutex_lock(&state_shared.mutex);
9590+
while (state_shared.has_work) {
9591+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
95239592
}
9593+
ggml_mutex_unlock(&state_shared.mutex);
95249594

9525-
atomic_fetch_sub(&state_shared.n_ready, 1);
9595+
ggml_mutex_lock(&state_shared.mutex);
9596+
state_shared.n_ready--;
9597+
ggml_cond_broadcast(&state_shared.cond);
9598+
ggml_mutex_unlock(&state_shared.mutex);
95269599

9527-
while (atomic_load(&state_shared.n_ready) != 0) {
9528-
ggml_lock_lock (&state_shared.spin);
9529-
ggml_lock_unlock(&state_shared.spin);
9600+
ggml_mutex_lock(&state_shared.mutex);
9601+
while (state_shared.n_ready != 0) {
9602+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
95309603
}
9604+
ggml_mutex_unlock(&state_shared.mutex);
95319605
}
95329606

95339607
// FINALIZE
95349608
if (node->n_tasks > 1) {
9535-
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
9536-
atomic_store(&state_shared.has_work, false);
9609+
ggml_mutex_lock(&state_shared.mutex);
9610+
if (state_shared.n_ready++ == n_threads - 1) {
9611+
state_shared.has_work = false;
9612+
ggml_cond_broadcast(&state_shared.cond);
95379613
}
9614+
ggml_mutex_unlock(&state_shared.mutex);
95389615

9539-
while (atomic_load(&state_shared.has_work)) {
9540-
ggml_lock_lock (&state_shared.spin);
9541-
ggml_lock_unlock(&state_shared.spin);
9616+
ggml_mutex_lock(&state_shared.mutex);
9617+
while (state_shared.has_work) {
9618+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
95429619
}
9620+
ggml_mutex_unlock(&state_shared.mutex);
95439621

95449622
// launch thread pool
95459623
for (int j = 0; j < n_threads - 1; j++) {
@@ -9553,36 +9631,51 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
95539631
workers[j].node = node;
95549632
}
95559633

9556-
atomic_fetch_sub(&state_shared.n_ready, 1);
9634+
ggml_mutex_lock(&state_shared.mutex);
9635+
state_shared.n_ready -= 1;
9636+
ggml_cond_broadcast(&state_shared.cond);
9637+
ggml_mutex_unlock(&state_shared.mutex);
95579638

9558-
while (atomic_load(&state_shared.n_ready) > 0) {
9559-
ggml_lock_lock (&state_shared.spin);
9560-
ggml_lock_unlock(&state_shared.spin);
9639+
ggml_mutex_lock(&state_shared.mutex);
9640+
while (state_shared.n_ready > 0) {
9641+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
95619642
}
9643+
ggml_mutex_unlock(&state_shared.mutex);
95629644

9563-
atomic_store(&state_shared.has_work, true);
9645+
ggml_mutex_lock(&state_shared.mutex);
9646+
state_shared.has_work = true;
9647+
ggml_cond_broadcast(&state_shared.cond);
9648+
ggml_mutex_unlock(&state_shared.mutex);
95649649
}
95659650

95669651
params.type = GGML_TASK_FINALIZE;
95679652
ggml_compute_forward(&params, node);
95689653

95699654
// wait for thread pool
95709655
if (node->n_tasks > 1) {
9571-
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
9572-
atomic_store(&state_shared.has_work, false);
9656+
ggml_mutex_lock(&state_shared.mutex);
9657+
if (state_shared.n_ready++ == n_threads - 1) {
9658+
state_shared.has_work = false;
9659+
ggml_cond_broadcast(&state_shared.cond);
95739660
}
9661+
ggml_mutex_unlock(&state_shared.mutex);
95749662

9575-
while (atomic_load(&state_shared.has_work)) {
9576-
ggml_lock_lock (&state_shared.spin);
9577-
ggml_lock_unlock(&state_shared.spin);
9663+
ggml_mutex_lock(&state_shared.mutex);
9664+
while (state_shared.has_work) {
9665+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
95789666
}
9667+
ggml_mutex_unlock(&state_shared.mutex);
95799668

9580-
atomic_fetch_sub(&state_shared.n_ready, 1);
9669+
ggml_mutex_lock(&state_shared.mutex);
9670+
state_shared.n_ready--;
9671+
ggml_cond_broadcast(&state_shared.cond);
9672+
ggml_mutex_unlock(&state_shared.mutex);
95819673

9582-
while (atomic_load(&state_shared.n_ready) != 0) {
9583-
ggml_lock_lock (&state_shared.spin);
9584-
ggml_lock_unlock(&state_shared.spin);
9674+
ggml_mutex_lock(&state_shared.mutex);
9675+
while (state_shared.n_ready != 0) {
9676+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
95859677
}
9678+
ggml_mutex_unlock(&state_shared.mutex);
95869679
}
95879680

95889681
// performance stats (node)
@@ -9598,16 +9691,19 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
95989691

95999692
// join thread pool
96009693
if (n_threads > 1) {
9601-
atomic_store(&state_shared.stop, true);
9602-
atomic_store(&state_shared.has_work, true);
9603-
9694+
ggml_mutex_lock(&state_shared.mutex);
9695+
state_shared.stop = true;
9696+
state_shared.has_work = true;
9697+
ggml_cond_broadcast(&state_shared.cond);
9698+
ggml_mutex_unlock(&state_shared.mutex);
96049699
for (int j = 0; j < n_threads - 1; j++) {
96059700
int rc = ggml_thread_join(workers[j].thrd, NULL);
96069701
GGML_ASSERT(rc == 0);
96079702
UNUSED(rc);
96089703
}
96099704

9610-
ggml_lock_destroy(&state_shared.spin);
9705+
ggml_cond_destroy(&state_shared.cond);
9706+
ggml_mutex_destroy(&state_shared.mutex);
96119707
}
96129708

96139709
// performance stats (graph)

0 commit comments

Comments
 (0)