Skip to content

Commit 492b76d

Browse files
committed
Address review comments
1 parent 316df55 commit 492b76d

File tree

4 files changed

+30
-17
lines changed

4 files changed

+30
-17
lines changed

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,11 @@ ifdef LLAMA_DISABLE_LOGS
219219
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
220220
endif # LLAMA_DISABLE_LOGS
221221

222+
# disable ggml.c's use of sgemm.cpp
223+
ifdef LLAMA_NO_LLAMAFILE
224+
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=0
225+
endif
226+
222227
# warnings
223228
WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
224229
MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \

common/common.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,11 @@ int32_t get_num_physical_cores() {
109109

110110
static void cpuid(unsigned leaf, unsigned subleaf,
111111
unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) {
112-
__asm__("movq\t%%rbx,%%rsi\n\t"
113-
"cpuid\n\t"
114-
"xchgq\t%%rbx,%%rsi"
115-
: "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx)
116-
: "0"(leaf), "2"(subleaf));
112+
__asm__("movq\t%%rbx,%%rsi\n\t"
113+
"cpuid\n\t"
114+
"xchgq\t%%rbx,%%rsi"
115+
: "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx)
116+
: "0"(leaf), "2"(subleaf));
117117
}
118118

119119
static int pin_cpu(int cpu) {
@@ -140,10 +140,12 @@ static bool is_running_on_efficiency_core(void) {
140140
static int count_math_cpus(int cpu_count) {
141141
int result = 0;
142142
for (int cpu = 0; cpu < cpu_count; ++cpu) {
143-
if (pin_cpu(cpu))
143+
if (pin_cpu(cpu)) {
144144
return -1;
145-
if (is_running_on_efficiency_core())
145+
}
146+
if (is_running_on_efficiency_core()) {
146147
continue; // efficiency cores harm lockstep threading
148+
}
147149
++cpu; // hyperthreading isn't useful for linear algebra
148150
++result;
149151
}
@@ -158,15 +160,17 @@ static int count_math_cpus(int cpu_count) {
158160
int get_math_cpu_count() {
159161
#if defined(__x86_64__) && defined(__linux__)
160162
int cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
161-
if (cpu_count < 1)
163+
if (cpu_count < 1) {
162164
return get_num_physical_cores();
165+
}
163166
if (is_hybrid_cpu()) {
164167
cpu_set_t affinity;
165168
if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity)) {
166169
int result = count_math_cpus(cpu_count);
167170
pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity);
168-
if (result > 0)
171+
if (result > 0) {
169172
return result;
173+
}
170174
}
171175
}
172176
#endif

ggml.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@
3333
#include <unistd.h>
3434
#endif
3535

36+
#ifndef GGML_USE_LLAMAFILE
37+
#define GGML_USE_LLAMAFILE 1
38+
#endif
39+
3640
#if defined(_MSC_VER)
3741
// disable "possible loss of data" to avoid hundreds of casts
3842
// we should just be careful :)
@@ -10811,7 +10815,8 @@ static void ggml_compute_forward_mul_mat(
1081110815
}
1081210816
#endif
1081310817

10814-
if (src1_cont) {
10818+
#if GGML_USE_LLAMAFILE
10819+
if (nb10 == ggml_type_size(src1->type)) {
1081510820
for (int64_t j = 0; j < ne13; j++)
1081610821
for (int64_t i = 0; i < ne12; i++)
1081710822
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
@@ -10830,6 +10835,7 @@ static void ggml_compute_forward_mul_mat(
1083010835
return;
1083110836
}
1083210837
UseGgmlGemm1:;
10838+
#endif
1083310839

1083410840
if (params->type == GGML_TASK_TYPE_INIT) {
1083510841
if (ith != 0) {
@@ -10862,7 +10868,8 @@ UseGgmlGemm1:;
1086210868
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
1086310869
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
1086410870

10865-
if (src1_cont) {
10871+
#if GGML_USE_LLAMAFILE
10872+
if (nb10 == ggml_type_size(src1->type) || src1->type != vec_dot_type) {
1086610873
for (int64_t j = 0; j < ne13; j++)
1086710874
for (int64_t i = 0; i < ne12; i++)
1086810875
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
@@ -10882,6 +10889,7 @@ UseGgmlGemm1:;
1088210889
return;
1088310890
}
1088410891
UseGgmlGemm2:;
10892+
#endif
1088510893

1088610894
const int64_t nr0 = ne01; // src0 rows
1088710895
const int64_t nr1 = ne1*ne12*ne13; // src1 rows

sgemm.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,10 +1079,8 @@ bool llamafile_sgemm(int m, int n, int k, const void *A, int lda, const void *B,
10791079
}
10801080

10811081
case GGML_TYPE_Q8_0: {
1082-
if (k % 32)
1083-
return false;
1084-
if (Btype != GGML_TYPE_Q8_0)
1085-
return false;
1082+
if (Btype != GGML_TYPE_Q8_0)
1083+
return false;
10861084
#if defined(__AVX2__) || defined(__AVX512F__)
10871085
tinyBLAS_Q0_AVX2<block_q8_0, block_q8_0, float> tb{
10881086
k, (const block_q8_0 *)A, lda,
@@ -1105,8 +1103,6 @@ bool llamafile_sgemm(int m, int n, int k, const void *A, int lda, const void *B,
11051103
}
11061104

11071105
case GGML_TYPE_Q4_0: {
1108-
if (k % 32)
1109-
return false;
11101106
if (Btype != GGML_TYPE_Q8_0)
11111107
return false;
11121108
#if defined(__AVX2__) || defined(__AVX512F__)

0 commit comments

Comments
 (0)