Skip to content

Commit a456d83

Browse files
authored
add fallback for m chip & fix compiler bugs (ggml-org#4)
1 parent e44f640 commit a456d83

File tree

3 files changed

+29
-16
lines changed

3 files changed

+29
-16
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ endif()
3737
#
3838

3939
if (APPLE)
40-
set(LLAMA_METAL_DEFAULT ON)
40+
set(LLAMA_METAL_DEFAULT OFF) # metal has not been supported on Apple Silicon yet
4141
else()
4242
set(LLAMA_METAL_DEFAULT OFF)
4343
endif()

ggml.c

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ void ggml_print_backtrace(void) {
146146
}
147147
#endif
148148

149-
#define GGML_PERF
149+
// #define GGML_PERF
150150
#define GGML_DEBUG 0
151151
#define GGML_GELU_FP16
152152
#define GGML_GELU_QUICK_FP16
@@ -14436,6 +14436,7 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
1443614436
// 计算剩余的元素个数
1443714437
int remainder = ne00 % 8;
1443814438

14439+
#if defined(__AVX2__)
1443914440
// 使用AVX指令进行向量化计算
1444014441
for (i = 0; i < ne00 - remainder; i += 8) {
1444114442
__m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数
@@ -14448,10 +14449,11 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
1444814449
for (i = ne00 - remainder; i < ne00; i++) {
1444914450
res[i] += tmp[i];
1445014451
}
14451-
// for (i = 0; i < dst->ne[0]; i++) {
14452-
// res[i] += tmp[i];
14453-
// }
14454-
14452+
#else
14453+
for (i = 0; i < dst->ne[0]; i++) {
14454+
res[i] += tmp[i];
14455+
}
14456+
#endif
1445514457
atomic_flag_clear(&g_axpy_dense_lock);
1445614458

1445714459
}
@@ -14586,6 +14588,7 @@ static void ggml_compute_forward_mul_mat_axpy(
1458614588
// 计算剩余的元素个数
1458714589
int remainder = ne00 % 8;
1458814590

14591+
#if defined(__AVX2__)
1458914592
// 使用AVX指令进行向量化计算
1459014593
for (i = 0; i < ne00 - remainder; i += 8) {
1459114594
__m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数
@@ -14598,8 +14601,11 @@ static void ggml_compute_forward_mul_mat_axpy(
1459814601
for (i = ne00 - remainder; i < ne00; i++) {
1459914602
res[i] += tmp[i];
1460014603
}
14601-
14602-
14604+
#else
14605+
for (i = 0; i < ne00; i++) {
14606+
res[i] += tmp[i];
14607+
}
14608+
#endif
1460314609
atomic_flag_clear(&g_axpy_lock);
1460414610
}
1460514611

@@ -14733,7 +14739,7 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0(
1473314739

1473414740
// 计算剩余的元素个数
1473514741
int remainder = ne00 % 8;
14736-
14742+
#if defined(__AVX2__)
1473714743
// 使用AVX指令进行向量化计算
1473814744
for (i = 0; i < ne00 - remainder; i += 8)
1473914745
{
@@ -14748,6 +14754,11 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0(
1474814754
{
1474914755
res[i] += tmp[i];
1475014756
}
14757+
#else
14758+
for (i = 0; i < ne00; i++) {
14759+
res[i] += tmp[i];
14760+
}
14761+
#endif
1475114762
atomic_flag_clear(&g_axpy_lock);
1475214763
}
1475314764

@@ -14869,6 +14880,7 @@ static void ggml_compute_forward_mul_mat_axpy_head(
1486914880
// 计算剩余的元素个数
1487014881
int remainder = ne00 % 8;
1487114882

14883+
#if defined(__AVX2__)
1487214884
// 使用AVX指令进行向量化计算
1487314885
for (i = 0; i < ne00 - remainder; i += 8) {
1487414886
__m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数
@@ -14881,10 +14893,11 @@ static void ggml_compute_forward_mul_mat_axpy_head(
1488114893
for (i = ne00 - remainder; i < ne00; i++) {
1488214894
res[i] += tmp[i];
1488314895
}
14884-
// for (i = 0; i < ne00; i++) {
14885-
// res[i] = tmp[i];
14886-
// }
14887-
14896+
#else
14897+
for (i = 0; i < ne00; i++) {
14898+
res[i] += tmp[i];
14899+
}
14900+
#endif
1488814901
atomic_flag_clear(&g_axpy_head_lock);
1488914902

1489014903
}

llama.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2737,7 +2737,7 @@ struct llama_mlp_model_loader {
27372737
offset = (offset + 31) & -32;
27382738
file.seek(offset, SEEK_SET);
27392739
// point to the mmaped mlp model file
2740-
mlp_tensor -> data = mapping -> addr + static_cast<std::streamoff>(offset);
2740+
mlp_tensor -> data = (void *) (static_cast<char *>(mapping -> addr) + offset);
27412741
file.seek(tensor_data_size, SEEK_CUR);
27422742
return mlp_tensor;
27432743
}
@@ -2757,7 +2757,7 @@ struct llama_augmentation_model_loader {
27572757
// const int64_t ggml_aux_tensor_size = 4 * (100 * 100 + 5120*40*4 * ggml_tensor_overhead() + (int64_t)13824*5120*40*4);
27582758
int model_layer = model->layers.size();
27592759
int ffn_dim = model->layers[0].ffn_up->ne[1];
2760-
const int64_t ggml_aux_tensor_size = 4 * (100 * 100 + model_layer*ffn_dim*sizeof(float) * ggml_tensor_overhead() );
2760+
const size_t ggml_aux_tensor_size = 4 * (100 * 100 + model_layer*ffn_dim*sizeof(float) * ggml_tensor_overhead() );
27612761
printf("augmentation buffer: %ld\n", ggml_aux_tensor_size);
27622762
struct ggml_init_params params = {
27632763
/*.mem_size =*/ ggml_aux_tensor_size,
@@ -2974,7 +2974,7 @@ static void llm_load_tensors(
29742974
auto create_tensor = [&] (const std::string & name, const std::vector<int64_t> & ne, ggml_backend_type backend) -> ggml_tensor * {
29752975
ggml_tensor * created_tensor = ml.create_tensor(ctx, name, ne, backend);
29762976
if (created_tensor == nullptr) {
2977-
LLAMA_LOG_ERROR("%s: error: failed to create tensor '%s'\n", __func__, name);
2977+
LLAMA_LOG_ERROR("%s: error: failed to create tensor '%s'\n", __func__, name.c_str());
29782978
return nullptr;
29792979
}
29802980
if (created_tensor->backend == GGML_BACKEND_GPU || created_tensor->backend == GGML_BACKEND_GPU_SPLIT) {

0 commit comments

Comments
 (0)