Skip to content

Commit c918fe8

Browse files
authored
metal : create autorelease pool during library build (#4970)
* metal : create autorelease pool during library build ggml-ci * test : simplify ggml-ci
1 parent 0f83e72 commit c918fe8

File tree

6 files changed

+45
-11
lines changed

6 files changed

+45
-11
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,4 @@ poetry.toml
105105
/tests/test-tokenizer-1-bpe
106106
/tests/test-rope
107107
/tests/test-backend-ops
108+
/tests/test-autorelease

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ TEST_TARGETS = \
99
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
1010
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
1111
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
12-
tests/test-backend-ops
12+
tests/test-backend-ops tests/test-autorelease
1313

1414
# Code coverage output files
1515
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -747,3 +747,6 @@ tests/test-c.o: tests/test-c.c llama.h
747747

748748
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
749749
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
750+
751+
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
752+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

ci/run.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ function gg_run_open_llama_3b_v2 {
179179

180180
wiki_test_60="${path_wiki}/wiki.test-60.raw"
181181

182+
./bin/test-autorelease ${model_f16}
183+
182184
./bin/quantize ${model_f16} ${model_q8_0} q8_0
183185
./bin/quantize ${model_f16} ${model_q4_0} q4_0
184186
./bin/quantize ${model_f16} ${model_q4_1} q4_1

ggml-metal.m

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -303,22 +303,21 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
303303
return NULL;
304304
}
305305

306-
// dictionary of preprocessor macros
307-
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
306+
@autoreleasepool {
307+
// dictionary of preprocessor macros
308+
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
308309

309310
#ifdef GGML_QKK_64
310-
prep[@"QK_K"] = @(64);
311+
prep[@"QK_K"] = @(64);
311312
#endif
312313

313-
MTLCompileOptions* options = [MTLCompileOptions new];
314-
options.preprocessorMacros = prep;
314+
MTLCompileOptions* options = [MTLCompileOptions new];
315+
options.preprocessorMacros = prep;
315316

316-
//[options setFastMathEnabled:false];
317+
//[options setFastMathEnabled:false];
317318

318-
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
319-
320-
[options release];
321-
[prep release];
319+
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
320+
}
322321
}
323322

324323
if (error) {

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
4949
llama_build_and_test_executable(test-grad0.cpp)
5050
# llama_build_and_test_executable(test-opt.cpp) # SLOW
5151
llama_build_and_test_executable(test-backend-ops.cpp)
52+
llama_build_and_test_executable(test-autorelease.cpp)
5253

5354
llama_build_and_test_executable(test-rope.cpp)
5455

tests/test-autorelease.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// ref: https://github.com/ggerganov/llama.cpp/issues/4952#issuecomment-1892864763
2+
3+
#include <cstdio>
4+
#include <string>
5+
#include <thread>
6+
7+
#include "llama.h"
8+
9+
// This creates a new context inside a pthread and then tries to exit cleanly.
10+
int main(int argc, char ** argv) {
11+
if (argc < 2) {
12+
printf("Usage: %s model.gguf\n", argv[0]);
13+
return 0; // intentionally return success
14+
}
15+
16+
const std::string fname = argv[1];
17+
18+
std::thread([&fname]() {
19+
llama_backend_init(false);
20+
auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params());
21+
auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
22+
llama_free(ctx);
23+
llama_free_model(model);
24+
llama_backend_free();
25+
}).join();
26+
27+
return 0;
28+
}

0 commit comments

Comments
 (0)