Skip to content

Commit e771674

Browse files
threadpool: proper handling for non-specified cpumask
1 parent ef1b87d commit e771674

File tree

1 file changed

+8
-18
lines changed

1 file changed

+8
-18
lines changed

ggml.c

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,6 +1834,7 @@ struct ggml_compute_threadpool {
18341834
struct ggml_compute_state {
18351835
ggml_thread_t thrd;
18361836
bool cpumask[GGML_N_CORES_MAX];
1837+
bool mask_specified;
18371838
int ith;
18381839
struct ggml_compute_threadpool * threadpool;
18391840
enum ggml_status ec;
@@ -19472,13 +19473,6 @@ static bool __thread_priority(int32_t prio) {
1947219473

1947319474
#endif
1947419475

19475-
static void __init_stack(size_t size) {
19476-
void* ptr = alloca(size);
19477-
if (ptr) {
19478-
memset(ptr, 0, size);
19479-
}
19480-
}
19481-
1948219476
#ifdef __aarch64__
1948319477

1948419478
static inline void __cpu_relax(void) {
@@ -19553,8 +19547,6 @@ struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_p
1955319547

1955419548
threadpool->workers = workers;
1955519549

19556-
__init_stack(2ULL * 1024 * 1024);
19557-
1955819550
int cpumask_iter = 0;
1955919551

1956019552
__process_priority(tpp->prio);
@@ -19566,12 +19558,12 @@ struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_p
1956619558
.ith = j,
1956719559
.threadpool = threadpool,
1956819560
.ec = GGML_STATUS_SUCCESS,
19561+
.mask_specified = false
1956919562
};
1957019563

1957119564
if (tpp->mask_specified) {
1957219565
__cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
19573-
} else {
19574-
workers[j].cpumask[j] = true;
19566+
workers[j].mask_specified = true;
1957519567
}
1957619568

1957719569
// Spin threads for all secondary workers
@@ -19841,12 +19833,9 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
1984119833
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
1984219834
struct ggml_compute_threadpool * threadpool = state->threadpool;
1984319835

19844-
#ifndef __aarch64__
19845-
__init_stack(2ULL * 1024 * 1024);
19846-
#endif
19847-
1984819836
__thread_priority(threadpool->prio);
19849-
__thread_affinity(state->cpumask);
19837+
if (state->mask_specified)
19838+
__thread_affinity(state->cpumask);
1985019839

1985119840
// Indicate that we're ready to go
1985219841
atomic_fetch_add(&threadpool->n_ready, 1);
@@ -20096,7 +20085,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
2009620085
bool disposable_threadpool = false;
2009720086

2009820087
if (threadpool == NULL) {
20099-
//GGML_PRINT("NOTE: Threadpool is not specified. Will create a disposable threadpool\n");
20088+
// GGML_PRINT("NOTE: Threadpool is not specified. Will create a disposable threadpool\n");
2010020089
struct ggml_threadpool_params tpp = {
2010120090
.mask_specified = false,
2010220091
.n_threads = n_threads,
@@ -20118,7 +20107,8 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
2011820107
}
2011920108

2012020109
// Update main thread affinity to match the current threadpool
20121-
__thread_affinity(threadpool->workers[0].cpumask);
20110+
if (threadpool->workers[0].mask_specified)
20111+
__thread_affinity(threadpool->workers[0].cpumask);
2012220112

2012320113
// Set up work
2012420114
threadpool->cgraph = cgraph;

0 commit comments

Comments
 (0)