Skip to content

Commit 71e3e4c

Browse files
committed
Update llama.cpp
1 parent 2b37d8e commit 71e3e4c

File tree

2 files changed

+35
-8
lines changed

2 files changed

+35
-8
lines changed

llama_cpp/llama_cpp.py

+34-7
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def _load_shared_library(lib_base_name: str):
9898
# llama.h bindings
9999

100100
_lib.llama_max_devices.argtypes = []
101-
_lib.llama_max_devices.restype = ctypes.c_int32
101+
_lib.llama_max_devices.restype = ctypes.c_size_t
102102

103103
LLAMA_MAX_DEVICES = _lib.llama_max_devices()
104104

@@ -390,7 +390,7 @@ class llama_model_kv_override(Structure):
390390
# // LLAMA_SPLIT_LAYER: ignored
391391
# int32_t main_gpu;
392392

393-
# // proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES
393+
# // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
394394
# const float * tensor_split;
395395

396396
# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
@@ -417,7 +417,7 @@ class llama_model_params(Structure):
417417
n_gpu_layers (int): number of layers to store in VRAM
418418
split_mode (int): how to split the model across multiple GPUs
419419
main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored
420-
tensor_split (ctypes.Array[ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES
420+
tensor_split (ctypes.Array[ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
421421
progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
422422
progress_callback_user_data (ctypes.c_void_p): context pointer passed to the progress callback
423423
kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
@@ -760,16 +760,43 @@ def llama_time_us() -> int:
760760
_lib.llama_time_us.restype = ctypes.c_int64
761761

762762

763-
# LLAMA_API int32_t llama_max_devices(void);
763+
# LLAMA_API size_t llama_max_devices(void);
764764
def llama_max_devices() -> int:
765765
return _lib.llama_max_devices()
766766

767767

768768
_lib.llama_max_devices.argtypes = []
769-
_lib.llama_max_devices.restype = ctypes.c_int32
769+
_lib.llama_max_devices.restype = ctypes.c_size_t
770770

771771

772-
# LLAMA_API bool llama_mmap_supported (void);
772+
# LLAMA_API bool llama_supports_mmap (void);
773+
def llama_supports_mmap() -> bool:
774+
return _lib.llama_supports_mmap()
775+
776+
777+
_lib.llama_supports_mmap.argtypes = []
778+
_lib.llama_supports_mmap.restype = c_bool
779+
780+
781+
# LLAMA_API bool llama_supports_mlock (void);
782+
def llama_supports_mlock() -> bool:
783+
return _lib.llama_supports_mlock()
784+
785+
786+
_lib.llama_supports_mlock.argtypes = []
787+
_lib.llama_supports_mlock.restype = c_bool
788+
789+
790+
# LLAMA_API bool llama_supports_gpu_offload(void);
791+
def llama_supports_gpu_offload() -> bool:
792+
return _lib.llama_supports_gpu_offload()
793+
794+
795+
_lib.llama_supports_gpu_offload.argtypes = []
796+
_lib.llama_supports_gpu_offload.restype = c_bool
797+
798+
799+
# LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
773800
def llama_mmap_supported() -> bool:
774801
return _lib.llama_mmap_supported()
775802

@@ -778,7 +805,7 @@ def llama_mmap_supported() -> bool:
778805
_lib.llama_mmap_supported.restype = c_bool
779806

780807

781-
# LLAMA_API bool llama_mlock_supported(void);
808+
# LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
782809
def llama_mlock_supported() -> bool:
783810
return _lib.llama_mlock_supported()
784811

vendor/llama.cpp

0 commit comments

Comments
 (0)