Skip to content

Commit 316d873

Browse files
committed
lock instead of spinlock
1 parent cbef542 commit 316d873

File tree

1 file changed

+160
-64
lines changed

1 file changed

+160
-64
lines changed

ggml.c

Lines changed: 160 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -8954,6 +8954,19 @@ typedef pthread_t ggml_thread_t;
89548954
#define ggml_thread_create pthread_create
89558955
#define ggml_thread_join pthread_join
89568956

8957+
typedef pthread_mutex_t ggml_mutex_t;
8958+
typedef pthread_cond_t ggml_cond_t;
8959+
8960+
#define ggml_mutex_init pthread_mutex_init
8961+
#define ggml_mutex_destroy pthread_mutex_destroy
8962+
#define ggml_cond_init pthread_cond_init
8963+
#define ggml_cond_destroy pthread_cond_destroy
8964+
8965+
#define ggml_mutex_lock pthread_mutex_lock
8966+
#define ggml_mutex_unlock pthread_mutex_unlock
8967+
#define ggml_cond_broadcast pthread_cond_broadcast
8968+
#define ggml_cond_wait pthread_cond_wait
8969+
89578970
#else
89588971

89598972
//typedef pthread_spinlock_t ggml_lock_t;
@@ -8977,17 +8990,31 @@ typedef pthread_t ggml_thread_t;
89778990
#define ggml_thread_create pthread_create
89788991
#define ggml_thread_join pthread_join
89798992

8993+
typedef pthread_mutex_t ggml_mutex_t;
8994+
typedef pthread_cond_t ggml_cond_t;
8995+
8996+
#define ggml_mutex_init pthread_mutex_init
8997+
#define ggml_mutex_destroy pthread_mutex_destroy
8998+
#define ggml_cond_init pthread_cond_init
8999+
#define ggml_cond_destroy pthread_cond_destroy
9000+
9001+
#define ggml_mutex_lock pthread_mutex_lock
9002+
#define ggml_mutex_unlock pthread_mutex_unlock
9003+
#define ggml_cond_broadcast pthread_cond_broadcast
9004+
#define ggml_cond_wait pthread_cond_wait
9005+
89809006
#endif
89819007

89829008
struct ggml_compute_state_shared {
8983-
ggml_lock_t spin;
89849009

89859010
int n_threads;
89869011

89879012
// synchronization primitives
8988-
atomic_int n_ready;
8989-
atomic_bool has_work;
8990-
atomic_bool stop; // stop all threads
9013+
int n_ready;
9014+
bool has_work;
9015+
bool stop; // stop all threads
9016+
ggml_mutex_t mutex;
9017+
ggml_cond_t cond;
89919018
};
89929019

89939020
struct ggml_compute_state {
@@ -9003,43 +9030,57 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
90039030
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
90049031

90059032
const int n_threads = state->shared->n_threads;
9006-
90079033
while (true) {
9008-
if (atomic_fetch_add(&state->shared->n_ready, 1) == n_threads - 1) {
9009-
atomic_store(&state->shared->has_work, false);
9034+
ggml_mutex_lock(&state->shared->mutex);
9035+
if (state->shared->n_ready++ == n_threads - 1) {
9036+
state->shared->has_work = false;
9037+
ggml_cond_broadcast(&state->shared->cond);
90109038
} else {
9011-
while (atomic_load(&state->shared->has_work)) {
9012-
if (atomic_load(&state->shared->stop)) {
9039+
while (state->shared->has_work) {
9040+
if (state->shared->stop) {
9041+
ggml_mutex_unlock(&state->shared->mutex);
9042+
return 0;
9043+
}
9044+
ggml_cond_wait(&state->shared->cond, &state->shared->mutex);
9045+
if (state->shared->stop) {
9046+
ggml_mutex_unlock(&state->shared->mutex);
90139047
return 0;
90149048
}
9015-
ggml_lock_lock (&state->shared->spin);
9016-
ggml_lock_unlock(&state->shared->spin);
90179049
}
90189050
}
9051+
ggml_mutex_unlock(&state->shared->mutex);
90199052

9020-
atomic_fetch_sub(&state->shared->n_ready, 1);
9053+
ggml_mutex_lock(&state->shared->mutex);
9054+
state->shared->n_ready--;
9055+
ggml_cond_broadcast(&state->shared->cond);
9056+
ggml_mutex_unlock(&state->shared->mutex);
90219057

90229058
// wait for work
9023-
while (!atomic_load(&state->shared->has_work)) {
9024-
if (atomic_load(&state->shared->stop)) {
9025-
return 0;
9059+
ggml_mutex_lock(&state->shared->mutex);
9060+
while (!state->shared->has_work && !state->shared->stop) {
9061+
if (state->shared->stop) {
9062+
ggml_mutex_unlock(&state->shared->mutex);
9063+
return 0;
90269064
}
9027-
ggml_lock_lock (&state->shared->spin);
9028-
ggml_lock_unlock(&state->shared->spin);
9065+
ggml_cond_wait(&state->shared->cond, &state->shared->mutex);
90299066
}
9067+
ggml_mutex_unlock(&state->shared->mutex);
90309068

90319069
// check if we should stop
9032-
if (atomic_load(&state->shared->stop)) {
9070+
ggml_mutex_lock(&state->shared->mutex);
9071+
if (state->shared->stop) {
9072+
ggml_mutex_unlock(&state->shared->mutex);
90339073
break;
90349074
}
9075+
ggml_mutex_unlock(&state->shared->mutex);
90359076

90369077
if (state->node) {
90379078
if (state->params.ith < state->params.nth) {
90389079
ggml_compute_forward(&state->params, state->node);
90399080
}
9040-
90419081
state->node = NULL;
90429082
} else {
9083+
ggml_mutex_unlock(&state->shared->mutex);
90439084
break;
90449085
}
90459086
}
@@ -9051,19 +9092,32 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
90519092
const int n_threads = cgraph->n_threads;
90529093

90539094
struct ggml_compute_state_shared state_shared = {
9054-
/*.spin =*/ GGML_LOCK_INITIALIZER,
90559095
/*.n_threads =*/ n_threads,
90569096
/*.n_ready =*/ 0,
90579097
/*.has_work =*/ false,
90589098
/*.stop =*/ false,
9099+
/*.mutex =*/ {0},
9100+
/*.cond =*/ {0},
90599101
};
9102+
{
9103+
int rc = ggml_mutex_init(&state_shared.mutex, NULL);
9104+
GGML_ASSERT(rc == 0);
9105+
UNUSED(rc);
9106+
}
9107+
{
9108+
int rc = ggml_cond_init(&state_shared.cond, NULL);
9109+
GGML_ASSERT(rc == 0);
9110+
UNUSED(rc);
9111+
}
90609112
struct ggml_compute_state * workers = n_threads > 1 ? alloca(sizeof(struct ggml_compute_state)*(n_threads - 1)) : NULL;
90619113

90629114
// create thread pool
90639115
if (n_threads > 1) {
9064-
ggml_lock_init(&state_shared.spin);
90659116

9066-
atomic_store(&state_shared.has_work, true);
9117+
ggml_mutex_lock(&state_shared.mutex);
9118+
state_shared.has_work = true;
9119+
ggml_cond_broadcast(&state_shared.cond);
9120+
ggml_mutex_unlock(&state_shared.mutex);
90679121

90689122
for (int j = 0; j < n_threads - 1; j++) {
90699123
workers[j] = (struct ggml_compute_state) {
@@ -9319,14 +9373,18 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
93199373

93209374
// COMPUTE
93219375
if (node->n_tasks > 1) {
9322-
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
9323-
atomic_store(&state_shared.has_work, false);
9376+
ggml_mutex_lock(&state_shared.mutex);
9377+
if (state_shared.n_ready++ == n_threads - 1) {
9378+
state_shared.has_work = false;
9379+
ggml_cond_broadcast(&state_shared.cond);
93249380
}
9381+
ggml_mutex_unlock(&state_shared.mutex);
93259382

9326-
while (atomic_load(&state_shared.has_work)) {
9327-
ggml_lock_lock (&state_shared.spin);
9328-
ggml_lock_unlock(&state_shared.spin);
9383+
ggml_mutex_lock(&state_shared.mutex);
9384+
while (state_shared.has_work) {
9385+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
93299386
}
9387+
ggml_mutex_unlock(&state_shared.mutex);
93309388

93319389
// launch thread pool
93329390
for (int j = 0; j < n_threads - 1; j++) {
@@ -9340,48 +9398,68 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
93409398
workers[j].node = node;
93419399
}
93429400

9343-
atomic_fetch_sub(&state_shared.n_ready, 1);
9401+
ggml_mutex_lock(&state_shared.mutex);
9402+
state_shared.n_ready--;
9403+
ggml_cond_broadcast(&state_shared.cond);
9404+
ggml_mutex_unlock(&state_shared.mutex);
93449405

9345-
while (atomic_load(&state_shared.n_ready) > 0) {
9346-
ggml_lock_lock (&state_shared.spin);
9347-
ggml_lock_unlock(&state_shared.spin);
9406+
ggml_mutex_lock(&state_shared.mutex);
9407+
while (state_shared.n_ready > 0) {
9408+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
93489409
}
9410+
ggml_mutex_unlock(&state_shared.mutex);
93499411

9350-
atomic_store(&state_shared.has_work, true);
9412+
9413+
ggml_mutex_lock(&state_shared.mutex);
9414+
state_shared.has_work = true;
9415+
ggml_cond_broadcast(&state_shared.cond);
9416+
ggml_mutex_unlock(&state_shared.mutex);
93519417
}
93529418

93539419
params.type = GGML_TASK_COMPUTE;
93549420
ggml_compute_forward(&params, node);
93559421

93569422
// wait for thread pool
93579423
if (node->n_tasks > 1) {
9358-
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
9359-
atomic_store(&state_shared.has_work, false);
9424+
ggml_mutex_lock(&state_shared.mutex);
9425+
if (state_shared.n_ready++ == n_threads - 1) {
9426+
state_shared.has_work = false;
9427+
ggml_cond_broadcast(&state_shared.cond);
93609428
}
9429+
ggml_mutex_unlock(&state_shared.mutex);
93619430

9362-
while (atomic_load(&state_shared.has_work)) {
9363-
ggml_lock_lock (&state_shared.spin);
9364-
ggml_lock_unlock(&state_shared.spin);
9431+
ggml_mutex_lock(&state_shared.mutex);
9432+
while (state_shared.has_work) {
9433+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
93659434
}
9435+
ggml_mutex_unlock(&state_shared.mutex);
93669436

9367-
atomic_fetch_sub(&state_shared.n_ready, 1);
9437+
ggml_mutex_lock(&state_shared.mutex);
9438+
state_shared.n_ready--;
9439+
ggml_cond_broadcast(&state_shared.cond);
9440+
ggml_mutex_unlock(&state_shared.mutex);
93689441

9369-
while (atomic_load(&state_shared.n_ready) != 0) {
9370-
ggml_lock_lock (&state_shared.spin);
9371-
ggml_lock_unlock(&state_shared.spin);
9442+
ggml_mutex_lock(&state_shared.mutex);
9443+
while (state_shared.n_ready != 0) {
9444+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
93729445
}
9446+
ggml_mutex_unlock(&state_shared.mutex);
93739447
}
93749448

93759449
// FINALIZE
93769450
if (node->n_tasks > 1) {
9377-
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
9378-
atomic_store(&state_shared.has_work, false);
9451+
ggml_mutex_lock(&state_shared.mutex);
9452+
if (state_shared.n_ready++ == n_threads - 1) {
9453+
state_shared.has_work = false;
9454+
ggml_cond_broadcast(&state_shared.cond);
93799455
}
9456+
ggml_mutex_unlock(&state_shared.mutex);
93809457

9381-
while (atomic_load(&state_shared.has_work)) {
9382-
ggml_lock_lock (&state_shared.spin);
9383-
ggml_lock_unlock(&state_shared.spin);
9458+
ggml_mutex_lock(&state_shared.mutex);
9459+
while (state_shared.has_work) {
9460+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
93849461
}
9462+
ggml_mutex_unlock(&state_shared.mutex);
93859463

93869464
// launch thread pool
93879465
for (int j = 0; j < n_threads - 1; j++) {
@@ -9395,36 +9473,51 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
93959473
workers[j].node = node;
93969474
}
93979475

9398-
atomic_fetch_sub(&state_shared.n_ready, 1);
9476+
ggml_mutex_lock(&state_shared.mutex);
9477+
state_shared.n_ready -= 1;
9478+
ggml_cond_broadcast(&state_shared.cond);
9479+
ggml_mutex_unlock(&state_shared.mutex);
93999480

9400-
while (atomic_load(&state_shared.n_ready) > 0) {
9401-
ggml_lock_lock (&state_shared.spin);
9402-
ggml_lock_unlock(&state_shared.spin);
9481+
ggml_mutex_lock(&state_shared.mutex);
9482+
while (state_shared.n_ready > 0) {
9483+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
94039484
}
9485+
ggml_mutex_unlock(&state_shared.mutex);
94049486

9405-
atomic_store(&state_shared.has_work, true);
9487+
ggml_mutex_lock(&state_shared.mutex);
9488+
state_shared.has_work = true;
9489+
ggml_cond_broadcast(&state_shared.cond);
9490+
ggml_mutex_unlock(&state_shared.mutex);
94069491
}
94079492

94089493
params.type = GGML_TASK_FINALIZE;
94099494
ggml_compute_forward(&params, node);
94109495

94119496
// wait for thread pool
94129497
if (node->n_tasks > 1) {
9413-
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
9414-
atomic_store(&state_shared.has_work, false);
9498+
ggml_mutex_lock(&state_shared.mutex);
9499+
if (state_shared.n_ready++ == n_threads - 1) {
9500+
state_shared.has_work = false;
9501+
ggml_cond_broadcast(&state_shared.cond);
94159502
}
9503+
ggml_mutex_unlock(&state_shared.mutex);
94169504

9417-
while (atomic_load(&state_shared.has_work)) {
9418-
ggml_lock_lock (&state_shared.spin);
9419-
ggml_lock_unlock(&state_shared.spin);
9505+
ggml_mutex_lock(&state_shared.mutex);
9506+
while (state_shared.has_work) {
9507+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
94209508
}
9509+
ggml_mutex_unlock(&state_shared.mutex);
94219510

9422-
atomic_fetch_sub(&state_shared.n_ready, 1);
9511+
ggml_mutex_lock(&state_shared.mutex);
9512+
state_shared.n_ready--;
9513+
ggml_cond_broadcast(&state_shared.cond);
9514+
ggml_mutex_unlock(&state_shared.mutex);
94239515

9424-
while (atomic_load(&state_shared.n_ready) != 0) {
9425-
ggml_lock_lock (&state_shared.spin);
9426-
ggml_lock_unlock(&state_shared.spin);
9516+
ggml_mutex_lock(&state_shared.mutex);
9517+
while (state_shared.n_ready != 0) {
9518+
ggml_cond_wait(&state_shared.cond, &state_shared.mutex);
94279519
}
9520+
ggml_mutex_unlock(&state_shared.mutex);
94289521
}
94299522

94309523
// performance stats (node)
@@ -9440,16 +9533,19 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
94409533

94419534
// join thread pool
94429535
if (n_threads > 1) {
9443-
atomic_store(&state_shared.stop, true);
9444-
atomic_store(&state_shared.has_work, true);
9445-
9536+
ggml_mutex_lock(&state_shared.mutex);
9537+
state_shared.stop = true;
9538+
state_shared.has_work = true;
9539+
ggml_cond_broadcast(&state_shared.cond);
9540+
ggml_mutex_unlock(&state_shared.mutex);
94469541
for (int j = 0; j < n_threads - 1; j++) {
94479542
int rc = ggml_thread_join(workers[j].thrd, NULL);
94489543
GGML_ASSERT(rc == 0);
94499544
UNUSED(rc);
94509545
}
94519546

9452-
ggml_lock_destroy(&state_shared.spin);
9547+
ggml_cond_destroy(&state_shared.cond);
9548+
ggml_mutex_destroy(&state_shared.mutex);
94539549
}
94549550

94559551
// performance stats (graph)

0 commit comments

Comments
 (0)