diff --git a/Makefile b/Makefile index ba73f063709c7..6aec13adcdc07 100644 --- a/Makefile +++ b/Makefile @@ -854,3 +854,6 @@ tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o te tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-gguf-meta: tests/test-gguf-meta.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 9129052a223bb..a1ac3eab97249 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -6,6 +6,7 @@ #include "ggml.h" #include "ggml-alloc.h" #include "ggml-backend.h" +#include "llama.h" #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" @@ -148,24 +149,6 @@ static std::string get_ftype(int ftype) { return ggml_type_name(static_cast(ftype)); } -static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) { - switch (type) { - case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]); - case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]); - case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]); - case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]); - case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]); - case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]); - case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]); - case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]); - case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]); - case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]); - case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false"; - default: return format("unknown type %d", type); - } -} - - static void replace_all(std::string & s, const std::string & search, const std::string & replace) { std::string result; for (size_t pos = 0; ; pos += search.length()) { @@ -180,43 +163,6 @@ static void replace_all(std::string & s, const std::string & search, const std:: s = std::move(result); } -static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { - const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); - - switch (type) { - case GGUF_TYPE_STRING: - return gguf_get_val_str(ctx_gguf, i); - case GGUF_TYPE_ARRAY: - { - const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); - int arr_n = gguf_get_arr_n(ctx_gguf, i); - const void * data = gguf_get_arr_data(ctx_gguf, i); - std::stringstream ss; - ss << "["; - for (int j = 0; j < arr_n; j++) { - if (arr_type == GGUF_TYPE_STRING) { - std::string val = gguf_get_arr_str(ctx_gguf, i, j); - // escape quotes - replace_all(val, "\\", "\\\\"); - replace_all(val, "\"", "\\\""); - ss << '"' << val << '"'; - } else if (arr_type == GGUF_TYPE_ARRAY) { - ss << "???"; - } else { - ss << gguf_data_to_str(arr_type, data, j); - } - if (j < arr_n - 1) { - ss << ", "; - } - } - ss << "]"; - return ss.str(); - } - default: - return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); - } -} - static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") { size_t tensor_size = ggml_nbytes(tensor); printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n", @@ -784,11 +730,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { const char * name = gguf_get_key(ctx, i); const enum gguf_type type = gguf_get_kv_type(ctx, i); const std::string type_name = - type == GGUF_TYPE_ARRAY + type == GGUF_TYPE_ARRAY || type == GGUF_TYPE_OBJ ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx, i)), gguf_get_arr_n(ctx, i)) : gguf_type_name(type); - std::string value = gguf_kv_to_str(ctx, i); + char * v = gguf_kv_to_c_str(ctx, i, name); + std::string value = v; const size_t MAX_VALUE_LEN = 40; if (value.size() > MAX_VALUE_LEN) { value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()); diff --git a/ggml.c b/ggml.c index b9ec0c981b630..735d68d419a4f 100644 --- a/ggml.c +++ b/ggml.c @@ -19284,8 +19284,9 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = { [GGUF_TYPE_INT64] = sizeof(int64_t), [GGUF_TYPE_FLOAT64] = sizeof(double), [GGUF_TYPE_ARRAY] = 0, // undefined + [GGUF_TYPE_OBJ] = 0, // undefined }; -static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); +static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14"); static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = { [GGUF_TYPE_UINT8] = "u8", @@ -19301,8 +19302,9 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = { [GGUF_TYPE_UINT64] = "u64", [GGUF_TYPE_INT64] = "i64", [GGUF_TYPE_FLOAT64] = "f64", + [GGUF_TYPE_OBJ] = "obj", }; -static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); +static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14"); union gguf_value { uint8_t uint8; @@ -19525,6 +19527,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break; case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break; case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break; + case GGUF_TYPE_OBJ: case GGUF_TYPE_ARRAY: { ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset); @@ -19571,7 +19574,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset); } } break; - case GGUF_TYPE_ARRAY: + case GGUF_TYPE_OBJ: + case GGUF_TYPE_ARRAY: break; default: GGML_ASSERT(false && "invalid type"); break; } } break; @@ -19778,7 +19782,7 @@ void gguf_free(struct gguf_context * ctx) { } } - if (kv->type == GGUF_TYPE_ARRAY) { + if (kv->type == GGUF_TYPE_ARRAY || kv->type == GGUF_TYPE_OBJ) { if (kv->value.arr.data) { if (kv->value.arr.type == GGUF_TYPE_STRING) { for (uint64_t j = 0; j < kv->value.arr.n; ++j) { @@ -19863,7 +19867,7 @@ enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) { enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) { GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); + GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ); return ctx->kv[key_id].value.arr.type; } @@ -19875,7 +19879,7 @@ const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) { const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) { GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); + GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ); struct gguf_kv * kv = &ctx->kv[key_id]; struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i]; return str->data; @@ -19883,7 +19887,7 @@ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) { GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); - GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); + GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ); return ctx->kv[key_id].value.arr.n; } @@ -19962,6 +19966,7 @@ const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) { const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) { GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY); + GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_OBJ); GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING); return &ctx->kv[key_id].value; } @@ -20106,6 +20111,10 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty ctx->kv[idx].type = GGUF_TYPE_ARRAY; ctx->kv[idx].value.arr.type = type; ctx->kv[idx].value.arr.n = n; + if (data == NULL) { + ctx->kv[idx].value.arr.data = NULL; + return; + } ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type)); memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type)); } @@ -20124,6 +20133,38 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** } } +void gguf_set_arr_obj(struct gguf_context * ctx, const char * key, int n) { + const int idx = gguf_get_or_add_key(ctx, key); + + ctx->kv[idx].type = GGUF_TYPE_ARRAY; + ctx->kv[idx].value.arr.type = GGUF_TYPE_OBJ; + ctx->kv[idx].value.arr.n = n; + ctx->kv[idx].value.arr.data = NULL; +} + +void gguf_set_arr_arr(struct gguf_context * ctx, const char * key, int n) { + const int idx = gguf_get_or_add_key(ctx, key); + + ctx->kv[idx].type = GGUF_TYPE_ARRAY; + ctx->kv[idx].value.arr.type = GGUF_TYPE_ARRAY; + ctx->kv[idx].value.arr.n = n; + ctx->kv[idx].value.arr.data = NULL; +} + +void gguf_set_obj_str(struct gguf_context * ctx, const char * key, const char ** data, int n) { + const int idx = gguf_get_or_add_key(ctx, key); + + ctx->kv[idx].type = GGUF_TYPE_OBJ; + ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING; + ctx->kv[idx].value.arr.n = n; + ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str)); + for (int i = 0; i < n; i++) { + struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i]; + str->n = strlen(data[i]); + str->data = strdup(data[i]); + } +} + // set or add KV pairs from another context void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { for (uint32_t i = 0; i < src->header.n_kv; i++) { @@ -20140,6 +20181,15 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break; case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break; case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break; + case GGUF_TYPE_OBJ: + { + const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *)); + for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) { + data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data; + } + gguf_set_obj_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); + free((void *)data); + } break; case GGUF_TYPE_ARRAY: { if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) { @@ -20149,8 +20199,6 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { } gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); GGML_FREE((void *)data); - } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) { - GGML_ASSERT(false && "nested arrays not supported"); } else { gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n); } @@ -20304,6 +20352,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break; case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break; case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break; + case GGUF_TYPE_OBJ: case GGUF_TYPE_ARRAY: { gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type)); @@ -20330,7 +20379,8 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]); } } break; - case GGUF_TYPE_ARRAY: + case GGUF_TYPE_OBJ: + case GGUF_TYPE_ARRAY: break; default: GGML_ASSERT(false && "invalid type"); break; } } break; diff --git a/ggml.h b/ggml.h index e0a4799f3bd0a..448e794b26b74 100644 --- a/ggml.h +++ b/ggml.h @@ -2141,6 +2141,7 @@ extern "C" { GGUF_TYPE_UINT64 = 10, GGUF_TYPE_INT64 = 11, GGUF_TYPE_FLOAT64 = 12, + GGUF_TYPE_OBJ = 13, GGUF_TYPE_COUNT, // marks the end of the enum }; @@ -2212,6 +2213,9 @@ extern "C" { GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val); GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n); GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n); + GGML_API void gguf_set_arr_obj (struct gguf_context * ctx, const char * key, int n); + GGML_API void gguf_set_arr_arr (struct gguf_context * ctx, const char * key, int n); + GGML_API void gguf_set_obj_str (struct gguf_context * ctx, const char * key, const char ** data, int n); // set or add KV pairs from another context GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src); diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index ed8e26f83e6a9..e5212559286a3 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -3,6 +3,7 @@ import sys from enum import Enum, IntEnum, auto from typing import Any +import numpy as np # # constants @@ -550,6 +551,64 @@ class GGUFValueType(IntEnum): UINT64 = 10 INT64 = 11 FLOAT64 = 12 + OBJ = 13 + + @staticmethod + def get_type_ex(val: Any) -> GGUFValueType: + if isinstance(val, (str, bytes, bytearray)): + return GGUFValueType.STRING + elif isinstance(val, list): + return GGUFValueType.ARRAY + elif isinstance(val, np.float32): + return GGUFValueType.FLOAT32 + elif isinstance(val, np.float64): + return GGUFValueType.FLOAT64 + elif isinstance(val, float): + return GGUFValueType.FLOAT32 + elif isinstance(val, bool): + return GGUFValueType.BOOL + elif isinstance(val, np.uint8): + return GGUFValueType.UINT8 + elif isinstance(val, np.uint16): + return GGUFValueType.UINT16 + elif isinstance(val, np.uint32): + return GGUFValueType.UINT32 + elif isinstance(val, np.uint64): + return GGUFValueType.UINT64 + elif isinstance(val, np.int8): + return GGUFValueType.INT8 + elif isinstance(val, np.int16): + return GGUFValueType.INT16 + elif isinstance(val, np.int32): + return GGUFValueType.INT32 + elif isinstance(val, np.int64): + return GGUFValueType.INT64 + elif isinstance(val, int): + if val >=0 and val <= np.iinfo(np.uint8).max: + return GGUFValueType.UINT8 + elif val >=0 and val <= np.iinfo(np.uint16).max: + return GGUFValueType.UINT16 + elif val >=0 and val <= np.iinfo(np.uint32).max: + return GGUFValueType.UINT32 + elif val >=0 and val <= np.iinfo(np.uint64).max: + return GGUFValueType.UINT64 + elif val >=np.iinfo(np.int8).min and val <= np.iinfo(np.int8).max: + return GGUFValueType.INT8 + elif val >=np.iinfo(np.int16).min and val <= np.iinfo(np.int16).max: + return GGUFValueType.INT16 + elif val >=np.iinfo(np.int32).min and val <= np.iinfo(np.int32).max: + return GGUFValueType.INT32 + elif val >=np.iinfo(np.int64).min and val <= np.iinfo(np.int64).max: + return GGUFValueType.INT64 + else: + print("The integer exceed limit:", val) + sys.exit() + elif isinstance(val, dict): + return GGUFValueType.OBJ + # TODO: need help with 64-bit types in Python + else: + print("Unknown type:", type(val)) + sys.exit() @staticmethod def get_type(val: Any) -> GGUFValueType: diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 5b6d4ba6bcce9..fa98b2aa7d34f 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -49,6 +49,34 @@ class ReaderField(NamedTuple): types: list[GGUFValueType] = [] + def get(self): + result = None + type = self.types[0] + itype = None + if type == GGUFValueType.ARRAY or type == GGUFValueType.OBJ: + itype = self.types[-1] + if itype == GGUFValueType.STRING: + result = [str(bytes(self.parts[idx]), encoding="utf-8") for idx in self.data] + elif itype == GGUFValueType.OBJ or itype == GGUFValueType.ARRAY: + count=self.parts[-1] + result = count + else: + result = [pv for idx in self.data for pv in self.parts[idx].tolist()] + elif type == GGUFValueType.STRING: + result = str(bytes(self.parts[-1]), encoding="utf-8") + else: + result = self.parts[-1].tolist()[0] + + return result + + def getType(self): + type = self.types[0] + if type == GGUFValueType.ARRAY: + itype = self.types[-1] + return type, itype + else: + return type + class ReaderTensor(NamedTuple): name: str @@ -165,13 +193,16 @@ def _get_field_parts( val = self._get(offs, nptype) return int(val.nbytes), [val], [0], types # Handle arrays. - if gtype == GGUFValueType.ARRAY: + if gtype == GGUFValueType.ARRAY or gtype == GGUFValueType.OBJ: raw_itype = self._get(offs, np.uint32) offs += int(raw_itype.nbytes) alen = self._get(offs, np.uint64) offs += int(alen.nbytes) aparts: list[npt.NDArray[Any]] = [raw_itype, alen] data_idxs: list[int] = [] + if raw_itype[0] == GGUFValueType.OBJ or raw_itype[0] == GGUFValueType.ARRAY: + types += [raw_itype[0]] + return offs - orig_offs, aparts, data_idxs, types for idx in range(alen[0]): curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0]) if idx == 0: @@ -212,11 +243,12 @@ def _build_fields(self, offs: int, count: int) -> int: offs += int(raw_kv_type.nbytes) parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type] idxs_offs = len(parts) + field_name = str(bytes(kv_kdata), encoding = 'utf-8') field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0]) parts += field_parts self._push_field(ReaderField( orig_offs, - str(bytes(kv_kdata), encoding = 'utf-8'), + field_name, parts, [idx + idxs_offs for idx in field_idxs], field_types, diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 16808196e769d..4e80db498dd6c 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -158,6 +158,75 @@ def add_array(self, key: str, val: Sequence[Any]) -> None: self.add_key(key) self.add_val(val, GGUFValueType.ARRAY) + def add_array_ex(self, key: str, val: Sequence[Any]) -> None: + if not isinstance(val, Sequence): + raise ValueError("Value must be a sequence for array type") + + self.add_key(key) + ltype = GGUFValueType.get_type_ex(val[0]) + if not all(GGUFValueType.get_type_ex(i) is ltype for i in val[1:]): + ltype = GGUFValueType.OBJ + if ltype == GGUFValueType.OBJ or ltype == GGUFValueType.ARRAY: + self.kv_data += self._pack("I", GGUFValueType.ARRAY) + self.kv_data_count += 1 + self.kv_data += self._pack("I", ltype) + self.kv_data += self._pack("Q", len(val)) + for i, item in enumerate(val): + if key[0] != '.': + key = "." + key + self.add_kv(key + "[" + str(i) + "]", item) + else: + self.add_val(val, GGUFValueType.ARRAY) + + def add_kv(self, key: str, val: Any) -> None: + vtype=GGUFValueType.get_type_ex(val) + if vtype == GGUFValueType.OBJ: + self.add_dict(key, val) + elif vtype == GGUFValueType.ARRAY: + self.add_array_ex(key, val) + elif vtype == GGUFValueType.STRING: + self.add_string(key, val) + elif vtype == GGUFValueType.BOOL: + self.add_bool(key, val) + elif vtype == GGUFValueType.INT8: + self.add_int8(key, val) + elif vtype == GGUFValueType.INT16: + self.add_int16(key, val) + elif vtype == GGUFValueType.INT32: + self.add_int32(key, val) + elif vtype == GGUFValueType.INT64: + self.add_int64(key, val) + elif vtype == GGUFValueType.UINT8: + self.add_uint8(key, val) + elif vtype == GGUFValueType.UINT16: + self.add_uint16(key, val) + elif vtype == GGUFValueType.UINT32: + self.add_uint32(key, val) + elif vtype == GGUFValueType.UINT64: + self.add_uint64(key, val) + elif vtype == GGUFValueType.FLOAT32: + self.add_float32(key, val) + elif vtype == GGUFValueType.FLOAT64: + self.add_float64(key, val) + else: + raise ValueError(f"Unsupported type: {type(val)}") + + def add_dict(self, key: str, val: dict, excludes: Sequence[str] = []) -> None: + if not isinstance(val, dict): + raise ValueError("Value must be a dict type") + + self.add_key(key) + self.add_val(val, GGUFValueType.OBJ) + for k, v in val.items(): + if k in excludes: + continue + real_key = key + "." + k + # "/" means referencing an existing key + if k[0] != "/": + if real_key[0] != '.': + real_key = "." + real_key + self.add_kv(real_key, v) + def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True) -> None: if vtype is None: vtype = GGUFValueType.get_type(val) @@ -181,6 +250,8 @@ def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool self.kv_data += self._pack("Q", len(val)) for item in val: self.add_val(item, add_vtype=False) + elif vtype == GGUFValueType.OBJ and isinstance(val, dict) and val: + self.add_val(list(val.keys()), GGUFValueType.ARRAY, False) else: raise ValueError("Invalid GGUF metadata value type or value") diff --git a/gguf-py/tests/test_constants.py b/gguf-py/tests/test_constants.py new file mode 100644 index 0000000000000..ac53f6d287576 --- /dev/null +++ b/gguf-py/tests/test_constants.py @@ -0,0 +1,34 @@ +import sys +from pathlib import Path +import numpy as np +import unittest + +# Necessary to load the local gguf package +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from gguf.constants import GGUFValueType # noqa: E402 + + +class TestGGUFValueType(unittest.TestCase): + + def test_get_type(self): + self.assertEqual(GGUFValueType.get_type("test"), GGUFValueType.STRING) + self.assertEqual(GGUFValueType.get_type([1, 2, 3]), GGUFValueType.ARRAY) + self.assertEqual(GGUFValueType.get_type(1.0), GGUFValueType.FLOAT32) + self.assertEqual(GGUFValueType.get_type(True), GGUFValueType.BOOL) + self.assertEqual(GGUFValueType.get_type(b"test"), GGUFValueType.STRING) + self.assertEqual(GGUFValueType.get_type(np.uint8(1)), GGUFValueType.UINT8) + self.assertEqual(GGUFValueType.get_type(np.uint16(1)), GGUFValueType.UINT16) + self.assertEqual(GGUFValueType.get_type(np.uint32(1)), GGUFValueType.UINT32) + self.assertEqual(GGUFValueType.get_type(np.uint64(1)), GGUFValueType.UINT64) + self.assertEqual(GGUFValueType.get_type(np.int8(-1)), GGUFValueType.INT8) + self.assertEqual(GGUFValueType.get_type(np.int16(-1)), GGUFValueType.INT16) + self.assertEqual(GGUFValueType.get_type(np.int32(-1)), GGUFValueType.INT32) + self.assertEqual(GGUFValueType.get_type(np.int64(-1)), GGUFValueType.INT64) + self.assertEqual(GGUFValueType.get_type(np.float32(1.0)), GGUFValueType.FLOAT32) + self.assertEqual(GGUFValueType.get_type(np.float64(1.0)), GGUFValueType.FLOAT64) + self.assertEqual(GGUFValueType.get_type({"k": 12}), GGUFValueType.OBJ) + + +if __name__ == '__main__': + unittest.main() diff --git a/gguf-py/tests/test_gguf.py b/gguf-py/tests/test_gguf.py index 0adeb7d55731a..35ce3794fffe3 100644 --- a/gguf-py/tests/test_gguf.py +++ b/gguf-py/tests/test_gguf.py @@ -1,7 +1,133 @@ -import gguf # noqa: F401 +import os +import sys +from pathlib import Path +import numpy as np +import unittest -# TODO: add tests +# Necessary to load the local gguf package +sys.path.insert(0, str(Path(__file__).parent.parent)) +from gguf import GGUFWriter, GGUFReader, GGUFValueType # noqa: E402 +model_file = os.path.join(Path(__file__).parent.parent.parent, "models", "test_writer.gguf") -def test_write_gguf() -> None: - pass + +class TestGGUFReaderWriter(unittest.TestCase): + + @classmethod + def setUpClass(cls): + gguf_writer = GGUFWriter(model_file, "llama") + + # gguf_writer.add_architecture() + gguf_writer.add_block_count(12) + gguf_writer.add_uint32("answer", 42) # Write a 32-bit integer + gguf_writer.add_float32("answer_in_float", 42.0) # Write a 32-bit float + gguf_writer.add_kv("uint8", 1) + gguf_writer.add_kv("nint8", np.int8(1)) + gguf_writer.add_dict("dict1", {"key1": 2, "key2": "hi", "obj": {"k": 1}}) + gguf_writer.add_array_ex("oArray", [{"k": 4, "o": {"o1": 6}}, {"k": 9}]) + gguf_writer.add_array_ex("cArray", [3, "hi", [1, 2]]) + gguf_writer.add_array_ex("arrayInArray", [[2, 3, 4], [5, 7, 8]]) + gguf_writer.add_kv("tokenizer.ggml.bos_token_id", "bos") + gguf_writer.add_kv("tokenizer.ggml.add_bos_token", True) + gguf_writer.add_dict("tokenizer_config", { + "/tokenizer.ggml.bos_token_id:bos_token": None, "/tokenizer.ggml.add_bos_token": None}) + gguf_writer.add_array("oldArray", [1, 2, 3]) + gguf_writer.add_custom_alignment(64) + + tensor1 = np.ones((32,), dtype=np.float32) * 100.0 + tensor2 = np.ones((64,), dtype=np.float32) * 101.0 + tensor3 = np.ones((96,), dtype=np.float32) * 102.0 + + gguf_writer.add_tensor("tensor1", tensor1) + gguf_writer.add_tensor("tensor2", tensor2) + gguf_writer.add_tensor("tensor3", tensor3) + + gguf_writer.write_header_to_file() + gguf_writer.write_kv_data_to_file() + gguf_writer.write_tensors_to_file() + + gguf_writer.close() + + def test_rw(self) -> None: + # test compatibility + gguf_reader = GGUFReader(model_file) + self.assertEqual(gguf_reader.alignment, 64) + v = gguf_reader.get_field("oldArray") + self.assertIsNotNone(v) + type, itype = v.getType() + self.assertEqual(type, GGUFValueType.ARRAY) + self.assertEqual(itype, GGUFValueType.INT32) + self.assertListEqual(v.get(), [1,2,3]) + + def test_rw_ex(self) -> None: + gguf_reader = GGUFReader(model_file) + self.assertEqual(gguf_reader.alignment, 64) + + v = gguf_reader.get_field("uint8") + self.assertEqual(v.get(), 1) + self.assertEqual(v.types[0], GGUFValueType.UINT8) + v = gguf_reader.get_field("nint8") + self.assertEqual(v.get(), 1) + self.assertEqual(v.types[0], GGUFValueType.INT8) + v = gguf_reader.get_field("dict1") + self.assertIsNotNone(v) + self.assertListEqual(v.get(), ['key1', 'key2', 'obj']) + v = gguf_reader.get_field(".dict1.key1") + self.assertEqual(v.get(), 2) + v = gguf_reader.get_field(".dict1.key2") + self.assertEqual(v.get(), "hi") + v = gguf_reader.get_field(".dict1.obj") + self.assertListEqual(v.get(), ['k']) + v = gguf_reader.get_field(".dict1.obj.k") + self.assertEqual(v.get(), 1) + + v = gguf_reader.get_field("oArray") + self.assertIsNotNone(v) + count = v.get() + self.assertEqual(count, 2) + type, itype = v.getType() + self.assertEqual(type, GGUFValueType.ARRAY) + self.assertEqual(itype, GGUFValueType.OBJ) + v = gguf_reader.get_field(".oArray[0].k") + self.assertIsNotNone(v) + self.assertEqual(v.get(), 4) + v = gguf_reader.get_field(".oArray[1].k") + self.assertEqual(v.get(), 9) + + v = gguf_reader.get_field("cArray") + self.assertIsNotNone(v) + count = v.get() + self.assertEqual(count, 3) + type, itype = v.getType() + self.assertEqual(type, GGUFValueType.ARRAY) + self.assertEqual(itype, GGUFValueType.OBJ) + v = gguf_reader.get_field(".cArray[0]") + self.assertEqual(v.get(), 3) + v = gguf_reader.get_field(".cArray[1]") + self.assertEqual(v.get(), "hi") + v = gguf_reader.get_field(".cArray[2]") + self.assertListEqual(v.get(), [1, 2]) + + v = gguf_reader.get_field("arrayInArray") + self.assertIsNotNone(v) + count = v.get() + self.assertEqual(count, 2) + type, itype = v.getType() + self.assertEqual(type, GGUFValueType.ARRAY) + self.assertEqual(itype, GGUFValueType.ARRAY) + v = gguf_reader.get_field(".arrayInArray[0]") + self.assertListEqual(v.get(), [2, 3, 4]) + v = gguf_reader.get_field(".arrayInArray[1]") + self.assertListEqual(v.get(), [5, 7, 8]) + + v = gguf_reader.get_field("tokenizer.ggml.bos_token_id") + self.assertEqual(v.get(), "bos") + v = gguf_reader.get_field("tokenizer.ggml.add_bos_token") + self.assertEqual(v.get(), True) + v = gguf_reader.get_field("tokenizer_config") + self.assertIsNotNone(v) + self.assertListEqual(v.get(), ["/tokenizer.ggml.bos_token_id:bos_token", "/tokenizer.ggml.add_bos_token"]) + + +if __name__ == '__main__': + unittest.main() diff --git a/llama.cpp b/llama.cpp index 65e399adca60e..03b5db20b0535 100644 --- a/llama.cpp +++ b/llama.cpp @@ -780,28 +780,87 @@ static std::string gguf_data_to_str(enum gguf_type type, const void * data, int } } -static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { +template< class CharT > +static std::string quoted_str(const CharT* s, const std::string delim="\"", const std::string escape="\\") { + std::string val = s; + replace_all(val, escape, escape + escape); + replace_all(val, delim, escape + delim); + val = delim + val + delim; + return val; +} + +static void gguf_kv_to_stream(const struct gguf_context * ctx_gguf, int i, std::ostringstream &ss, const std::string parent_name = "") { const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); switch (type) { case GGUF_TYPE_STRING: - return gguf_get_val_str(ctx_gguf, i); + { + ss << quoted_str(gguf_get_val_str(ctx_gguf, i)); + } break; + case GGUF_TYPE_OBJ: + { + ss << "{"; + int arr_n = gguf_get_arr_n(ctx_gguf, i); + for (int j = 0; j < arr_n; j++) { + std::string subkey_name = gguf_get_arr_str(ctx_gguf, i, j); + std::string key; + if (!subkey_name.empty() && subkey_name.at(0) == '/') { + std::size_t ix = subkey_name.find(':'); + if (ix != std::string::npos) { + key = subkey_name; + subkey_name = subkey_name.substr(ix+1); + key = key.substr(1, ix-1); + } else { + subkey_name = subkey_name.substr(1); + key = subkey_name; + ix = subkey_name.rfind('.'); + if (ix != std::string::npos) { + subkey_name = subkey_name.substr(ix+1); + } + } + } else { + if (parent_name.empty()) { + key = subkey_name; + } else { + key = parent_name; + key.append("."); + key.append(subkey_name); + } + if (key.at(0) != '.') {key = "." + key;} + } + ss << quoted_str(subkey_name.c_str()) << ":"; + int k_id = gguf_find_key(ctx_gguf, key.c_str()); + if (k_id != -1) { + gguf_kv_to_stream(ctx_gguf, k_id, ss, key); + } else { + ss << "undefined"; + } + if (j < arr_n - 1) { + ss << ", "; + } + } + ss << "}"; + } break; case GGUF_TYPE_ARRAY: { const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); int arr_n = gguf_get_arr_n(ctx_gguf, i); const void * data = gguf_get_arr_data(ctx_gguf, i); - std::stringstream ss; ss << "["; for (int j = 0; j < arr_n; j++) { if (arr_type == GGUF_TYPE_STRING) { - std::string val = gguf_get_arr_str(ctx_gguf, i, j); - // escape quotes - replace_all(val, "\\", "\\\\"); - replace_all(val, "\"", "\\\""); + std::string val = quoted_str(gguf_get_arr_str(ctx_gguf, i, j)); ss << '"' << val << '"'; - } else if (arr_type == GGUF_TYPE_ARRAY) { - ss << "???"; + } else if (arr_type == GGUF_TYPE_OBJ || arr_type == GGUF_TYPE_ARRAY) { + std::string s = "[" + std::to_string(j) + "]"; + std::string key = parent_name.empty() ? s : parent_name + s; + if (key.at(0) != '.') {key = "." + key;} + int k_id = gguf_find_key(ctx_gguf, key.c_str()); + if (k_id != -1) { + gguf_kv_to_stream(ctx_gguf, k_id, ss, key); + } else { + ss << "undefined"; + } } else { ss << gguf_data_to_str(arr_type, data, j); } @@ -810,13 +869,23 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { } } ss << "]"; - return ss.str(); - } + } break; default: - return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); + ss << gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); } } +static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i, const char* parent_name = nullptr) { + std::ostringstream ss; + gguf_kv_to_stream(ctx_gguf, i, ss, parent_name == NULL ? gguf_get_key(ctx_gguf, i) : parent_name); + return ss.str(); +} + +char * gguf_kv_to_c_str(const struct gguf_context * ctx_gguf, int i, const char* parent_name = nullptr) { + std::string result = gguf_kv_to_str(ctx_gguf, i, parent_name); + return strdup(result.c_str()); +} + // // ggml helpers // @@ -2184,7 +2253,7 @@ namespace GGUFMeta { static T get_kv(const gguf_context * ctx, const int k) { const enum gguf_type kt = gguf_get_kv_type(ctx, k); - if (kt != GKV::gt) { + if (kt != GKV::gt && kt != GGUF_TYPE_OBJ && GKV::gt != GGUF_TYPE_ARRAY) { throw std::runtime_error(format("key %s has wrong type %s but expected type %s", gguf_get_key(ctx, k), gguf_type_name(kt), gguf_type_name(GKV::gt))); } @@ -2411,13 +2480,16 @@ struct llama_model_loader { LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__); for (int i = 0; i < n_kv; i++) { const char * name = gguf_get_key(ctx_gguf, i); + // skip the subkeys. + if (name[0] == '.') { continue; } + const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); const std::string type_name = - type == GGUF_TYPE_ARRAY + type == GGUF_TYPE_ARRAY || type == GGUF_TYPE_OBJ ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx_gguf, i)), gguf_get_arr_n(ctx_gguf, i)) : gguf_type_name(type); - std::string value = gguf_kv_to_str(ctx_gguf, i); + std::string value = gguf_kv_to_str(ctx_gguf, i, name); const size_t MAX_VALUE_LEN = 40; if (value.size() > MAX_VALUE_LEN) { value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()); @@ -2426,6 +2498,7 @@ struct llama_model_loader { LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str()); } + LLAMA_LOG_INFO("%s: Dumping metadata keys/values Done.\n", __func__); // print type counts for (auto & kv : n_type) { @@ -2796,7 +2869,7 @@ static void llm_load_hparams( continue; } const char * name = gguf_get_key(ctx, i); - const std::string value = gguf_kv_to_str(ctx, i); + const std::string value = gguf_kv_to_str(ctx, i, name); model.gguf_kv.emplace(name, value); } diff --git a/llama.h b/llama.h index cec4158bc8e80..ac936404f52f5 100644 --- a/llama.h +++ b/llama.h @@ -898,6 +898,8 @@ extern "C" { LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx); + LLAMA_API char * gguf_kv_to_c_str(const struct gguf_context * ctx_gguf, int i, const char* parent_name); + #ifdef __cplusplus } #endif diff --git a/models/test_writer.gguf b/models/test_writer.gguf new file mode 100644 index 0000000000000..30fef3d64994d Binary files /dev/null and b/models/test_writer.gguf differ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3e40a78cdeac9..17677d4f6ecca 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -50,6 +50,9 @@ llama_test_executable (test-tokenizer-1-starcoder test-tokenizer-1-bpe.cp llama_test_executable (test-tokenizer-1-gpt2 test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf) # llama_test_executable (test-tokenizer-1-bloom test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG +llama_build_executable(test-gguf-meta.cpp) +llama_test_executable (test-gguf-meta test-gguf-meta.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/test_writer.gguf) + llama_build_and_test_executable(test-grammar-parser.cpp) llama_build_and_test_executable(test-llama-grammar.cpp) llama_build_and_test_executable(test-grad0.cpp) diff --git a/tests/test-gguf-meta.cpp b/tests/test-gguf-meta.cpp new file mode 100644 index 0000000000000..8b6a9889b0f57 --- /dev/null +++ b/tests/test-gguf-meta.cpp @@ -0,0 +1,83 @@ +#ifdef NDEBUG +#undef NDEBUG +#endif + +#include +#include +#include +#include + +#include "ggml.h" +#include "llama.h" +#include "get-model.h" + +// run `python3 gguf-py/tests/test_gguf.py` to generate test_writer.gguf file. +int main(int argc, char ** argv) +{ + char* fname = get_model_or_exit(argc, argv); + + struct gguf_context * ctx_gguf = NULL; + struct ggml_context * ctx_meta = NULL; + struct gguf_init_params params = { + /*.no_alloc = */ true, + /*.ctx = */ &ctx_meta, + }; + ctx_gguf = gguf_init_from_file(fname, params); + if (!ctx_gguf) { + fprintf(stderr, "%s: failed to load model from %s\n", __func__, fname); + return 1; + } + int n_kv = gguf_get_n_kv(ctx_gguf); + + for (int i = 0; i < n_kv; i++) { + const char * name = gguf_get_key(ctx_gguf, i); + // skip the subkeys. + if (name[0] == '.') { continue; } + const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); + char * value = gguf_kv_to_c_str(ctx_gguf, i, name); + printf("key: %s, type: %s, value: %s\n", name, gguf_type_name(type), value); + free(value); + } + + int k_id = gguf_find_key(ctx_gguf, "no_such_key"); + assert(k_id == -1); + k_id = gguf_find_key(ctx_gguf, "tokenizer_config"); + assert(k_id != -1); + + const char * name = gguf_get_key(ctx_gguf, k_id); + assert(strcmp(name, "tokenizer_config") == 0); + + enum gguf_type type = gguf_get_kv_type(ctx_gguf, k_id); + assert(type == GGUF_TYPE_OBJ); + char * value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL); + assert(strcmp(value, "{\"bos_token\":\"bos\", \"add_bos_token\":true}") == 0); + free(value); + + k_id = gguf_find_key(ctx_gguf, "dict1"); + assert(k_id != -1); + value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL); + assert(strcmp(value, "{\"key1\":2, \"key2\":\"hi\", \"obj\":{\"k\":1}}") == 0); + free(value); + + k_id = gguf_find_key(ctx_gguf, "oArray"); + assert(k_id != -1); + value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL); + assert(strcmp(value, "[{\"k\":4, \"o\":{\"o1\":6}}, {\"k\":9}]") == 0); + free(value); + + k_id = gguf_find_key(ctx_gguf, "cArray"); + assert(k_id != -1); + value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL); + assert(strcmp(value, "[3, \"hi\", [1, 2]]") == 0); + free(value); + + k_id = gguf_find_key(ctx_gguf, "arrayInArray"); + assert(k_id != -1); + type = gguf_get_kv_type(ctx_gguf, k_id); + assert(type == GGUF_TYPE_ARRAY); + value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL); + assert(strcmp(value, "[[2, 3, 4], [5, 7, 8]]") == 0); + free(value); + + printf("Done!\n"); +}