llama : avoid ggml include in llama-util.h

ggerganov · ggerganov · commit a38f4a233366 · 2023-06-26T20:27:24.000+03:00
diff --git a/llama-util.h b/llama-util.h
@@ -16,8 +16,6 @@
 #include <vector>
 #include <stdexcept>
 
-#include "ggml.h"
-
 #ifdef __has_include
     #if __has_include(<unistd.h>)
         #include <unistd.h>
@@ -174,12 +172,12 @@ struct llama_mmap {
 #ifdef _POSIX_MAPPED_FILES
     static constexpr bool SUPPORTED = true;
 
-    llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) {
+    llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
         size = file->size;
         int fd = fileno(file->fp);
         int flags = MAP_SHARED;
         // prefetch/readahead impairs performance on NUMA systems
-        if (ggml_is_numa()) { prefetch = 0; }
+        if (numa) { prefetch = 0; }
 #ifdef __linux__
         if (prefetch) { flags |= MAP_POPULATE; }
 #endif
@@ -195,7 +193,7 @@ struct llama_mmap {
                         strerror(errno));
             }
         }
-        if (ggml_is_numa()) {
+        if (numa) {
             // advise the kernel not to use readahead
             // (because the next page might not belong on the same node)
             if (madvise(addr, file->size, MADV_RANDOM)) {
diff --git a/llama.cpp b/llama.cpp
@@ -774,7 +774,7 @@ struct llama_model_loader {
         }
 
         if (use_mmap) {
-            mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size));
+            mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size, ggml_is_numa()));
             if (lmlock) {
                 lmlock->init(mapping->addr);
             }
@@ -2903,7 +2903,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
 
         // maybe this should in llama_model_loader
         if (model_loader->use_mmap) {
-            model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0));
+            model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0, ggml_is_numa()));
         }
     }
 

Original file line number	Diff line number	Diff line change
`@@ -774,7 +774,7 @@ struct llama_model_loader {`
`774`	`774`	`}`
`775`	`775`
`776`	`776`	`if (use_mmap) {`
`777`		`- mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size));`
	`777`	`+ mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size, ggml_is_numa()));`
`778`	`778`	`if (lmlock) {`
`779`	`779`	`lmlock->init(mapping->addr);`
`780`	`780`	`}`
`@@ -2903,7 +2903,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const`
`2903`	`2903`
`2904`	`2904`	`// maybe this should in llama_model_loader`
`2905`	`2905`	`if (model_loader->use_mmap) {`
`2906`		`- model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0));`
	`2906`	`+ model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0, ggml_is_numa()));`
`2907`	`2907`	`}`
`2908`	`2908`	`}`
`2909`	`2909`