Skip to content

Commit d0cee0d

Browse files
ggerganovklosax
andauthored
gguf : add 64-bit support (GGUF v2) (#2821)
* gguf : bump version to 2 * gguf : add support for 64-bit (no backwards comp yet) * gguf : v1 backwards comp * gguf.py : bump GGUF version * gguf.py : uint64_t on all lengths, sizes and counts, enums still uint32_t * gguf.py : string lengths uint32_t * gguf : update all counts to 64-bit * gguf.py : string len uint64_t and n_dims uint32_t * gguf : fix typo * llama.cpp : print gguf version --------- Co-authored-by: klosax <[email protected]>
1 parent edd4c14 commit d0cee0d

File tree

5 files changed

+164
-27
lines changed

5 files changed

+164
-27
lines changed

examples/gguf/gguf.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ bool gguf_ex_write(const std::string & fname) {
3030
gguf_set_val_u32 (ctx, "some.parameter.uint32", 0x12345678);
3131
gguf_set_val_i32 (ctx, "some.parameter.int32", -0x12345679);
3232
gguf_set_val_f32 (ctx, "some.parameter.float32", 0.123456789f);
33+
gguf_set_val_u64 (ctx, "some.parameter.uint64", 0x123456789abcdef0ull);
34+
gguf_set_val_i64 (ctx, "some.parameter.int64", -0x123456789abcdef1ll);
35+
gguf_set_val_f64 (ctx, "some.parameter.float64", 0.1234567890123456789);
3336
gguf_set_val_bool(ctx, "some.parameter.bool", true);
3437
gguf_set_val_str (ctx, "some.parameter.string", "hello world");
3538

ggml.c

Lines changed: 119 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19394,7 +19394,7 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
1939419394
////////////////////////////////////////////////////////////////////////////////
1939519395

1939619396
struct gguf_str {
19397-
uint32_t n;
19397+
uint64_t n; // GGUFv2
1939819398
char * data;
1939919399
};
1940019400

@@ -19408,9 +19408,12 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
1940819408
[GGUF_TYPE_FLOAT32] = sizeof(float),
1940919409
[GGUF_TYPE_BOOL] = sizeof(bool),
1941019410
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
19411+
[GGUF_TYPE_UINT64] = sizeof(uint64_t),
19412+
[GGUF_TYPE_INT64] = sizeof(int64_t),
19413+
[GGUF_TYPE_FLOAT64] = sizeof(double),
1941119414
[GGUF_TYPE_ARRAY] = 0, // undefined
1941219415
};
19413-
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
19416+
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
1941419417

1941519418
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
1941619419
[GGUF_TYPE_UINT8] = "u8",
@@ -19423,8 +19426,11 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
1942319426
[GGUF_TYPE_BOOL] = "bool",
1942419427
[GGUF_TYPE_STRING] = "str",
1942519428
[GGUF_TYPE_ARRAY] = "arr",
19429+
[GGUF_TYPE_UINT64] = "u64",
19430+
[GGUF_TYPE_INT64] = "i64",
19431+
[GGUF_TYPE_FLOAT64] = "f64",
1942619432
};
19427-
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
19433+
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
1942819434

1942919435
union gguf_value {
1943019436
uint8_t uint8;
@@ -19434,39 +19440,40 @@ union gguf_value {
1943419440
uint32_t uint32;
1943519441
int32_t int32;
1943619442
float float32;
19443+
uint64_t uint64;
19444+
int64_t int64;
19445+
double float64;
1943719446
bool bool_;
1943819447

1943919448
struct gguf_str str;
1944019449

1944119450
struct {
1944219451
enum gguf_type type;
1944319452

19444-
uint32_t n;
19453+
uint64_t n; // GGUFv2
1944519454
void * data;
1944619455
} arr;
1944719456
};
1944819457

1944919458
struct gguf_kv {
1945019459
struct gguf_str key;
1945119460

19452-
uint32_t n_bytes; // TODO: is this actually needed?
19453-
1945419461
enum gguf_type type;
1945519462
union gguf_value value;
1945619463
};
1945719464

1945819465
struct gguf_header {
1945919466
uint32_t magic;
1946019467
uint32_t version;
19461-
uint32_t n_tensors;
19462-
uint32_t n_kv;
19468+
uint64_t n_tensors; // GGUFv2
19469+
uint64_t n_kv; // GGUFv2
1946319470
};
1946419471

1946519472
struct gguf_tensor_info {
1946619473
struct gguf_str name;
1946719474

1946819475
uint32_t n_dims;
19469-
uint32_t ne[GGML_MAX_DIMS];
19476+
uint64_t ne[GGML_MAX_DIMS];
1947019477

1947119478
enum ggml_type type;
1947219479

@@ -19497,19 +19504,32 @@ static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset)
1949719504
return n == size;
1949819505
}
1949919506

19500-
static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
19507+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
19508+
static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) {
1950119509
p->n = 0;
1950219510
p->data = NULL;
1950319511

1950419512
bool ok = true;
1950519513

19506-
// TODO: how to avoid mallocs for strings?
1950719514
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
1950819515
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
1950919516

1951019517
return ok;
1951119518
}
1951219519

19520+
static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) {
19521+
p->n = 0;
19522+
p->data = NULL;
19523+
19524+
bool ok = true;
19525+
19526+
uint32_t n = 0;
19527+
ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n;
19528+
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
19529+
19530+
return ok;
19531+
}
19532+
1951319533
struct gguf_context * gguf_init_empty(void) {
1951419534
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
1951519535

@@ -19565,8 +19585,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1956519585
ctx->data = NULL;
1956619586

1956719587
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
19568-
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
19569-
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
19588+
19589+
if (ctx->header.version == 1) {
19590+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
19591+
uint32_t n_tensors = 0;
19592+
uint32_t n_kv = 0;
19593+
19594+
ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset);
19595+
ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset);
19596+
19597+
ctx->header.n_tensors = n_tensors;
19598+
ctx->header.n_kv = n_kv;
19599+
} else {
19600+
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
19601+
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
19602+
}
1957019603

1957119604
if (!ok) {
1957219605
fprintf(stderr, "%s: failed to read header\n", __func__);
@@ -19576,6 +19609,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1957619609
}
1957719610
}
1957819611

19612+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
19613+
bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur;
19614+
if (ctx->header.version == 1) {
19615+
gguf_fread_str = gguf_fread_str_v1;
19616+
}
19617+
1957919618
// read the kv pairs
1958019619
{
1958119620
ctx->kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
@@ -19585,9 +19624,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1958519624

1958619625
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
1958719626

19588-
ok = ok && gguf_fread_str(file, &kv->key, &offset);
19589-
//ok = ok && gguf_fread_el (file, &kv->n_bytes, sizeof(kv->n_bytes), &offset);
19590-
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
19627+
ok = ok && gguf_fread_str(file, &kv->key, &offset);
19628+
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
1959119629

1959219630
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
1959319631

@@ -19599,12 +19637,23 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1959919637
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
1960019638
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
1960119639
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
19640+
case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
19641+
case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
19642+
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
1960219643
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
1960319644
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
1960419645
case GGUF_TYPE_ARRAY:
1960519646
{
1960619647
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
19607-
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
19648+
19649+
if (ctx->header.version == 1) {
19650+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
19651+
uint32_t n = 0;
19652+
ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset);
19653+
kv->value.arr.n = n;
19654+
} else {
19655+
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
19656+
}
1960819657

1960919658
switch (kv->value.arr.type) {
1961019659
case GGUF_TYPE_UINT8:
@@ -19614,6 +19663,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1961419663
case GGUF_TYPE_UINT32:
1961519664
case GGUF_TYPE_INT32:
1961619665
case GGUF_TYPE_FLOAT32:
19666+
case GGUF_TYPE_UINT64:
19667+
case GGUF_TYPE_INT64:
19668+
case GGUF_TYPE_FLOAT64:
1961719669
case GGUF_TYPE_BOOL:
1961819670
{
1961919671
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
@@ -19660,7 +19712,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1966019712
ok = ok && gguf_fread_str(file, &info->name, &offset);
1966119713
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
1966219714
for (uint32_t j = 0; j < info->n_dims; ++j) {
19663-
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
19715+
if (ctx->header.version == 1) {
19716+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
19717+
uint32_t t = 0;
19718+
ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset);
19719+
info->ne[j] = t;
19720+
} else {
19721+
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
19722+
}
1966419723
}
1966519724
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
1966619725
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
@@ -19954,6 +20013,18 @@ float gguf_get_val_f32(struct gguf_context * ctx, int i) {
1995420013
return ctx->kv[i].value.float32;
1995520014
}
1995620015

20016+
uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
20017+
return ctx->kv[i].value.uint64;
20018+
}
20019+
20020+
int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
20021+
return ctx->kv[i].value.int64;
20022+
}
20023+
20024+
double gguf_get_val_f64(struct gguf_context * ctx, int i) {
20025+
return ctx->kv[i].value.float64;
20026+
}
20027+
1995720028
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
1995820029
return ctx->kv[i].value.bool_;
1995920030
}
@@ -20056,6 +20127,27 @@ void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
2005620127
ctx->kv[idx].value.float32 = val;
2005720128
}
2005820129

20130+
void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
20131+
const int idx = gguf_get_or_add_key(ctx, key);
20132+
20133+
ctx->kv[idx].type = GGUF_TYPE_UINT64;
20134+
ctx->kv[idx].value.uint64 = val;
20135+
}
20136+
20137+
void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
20138+
const int idx = gguf_get_or_add_key(ctx, key);
20139+
20140+
ctx->kv[idx].type = GGUF_TYPE_INT64;
20141+
ctx->kv[idx].value.int64 = val;
20142+
}
20143+
20144+
void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
20145+
const int idx = gguf_get_or_add_key(ctx, key);
20146+
20147+
ctx->kv[idx].type = GGUF_TYPE_FLOAT64;
20148+
ctx->kv[idx].value.float64 = val;
20149+
}
20150+
2005920151
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
2006020152
const int idx = gguf_get_or_add_key(ctx, key);
2006120153

@@ -20106,6 +20198,9 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
2010620198
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
2010720199
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
2010820200
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
20201+
case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
20202+
case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
20203+
case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
2010920204
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
2011020205
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
2011120206
case GGUF_TYPE_ARRAY:
@@ -20267,6 +20362,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
2026720362
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
2026820363
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
2026920364
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
20365+
case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
20366+
case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
20367+
case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
2027020368
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
2027120369
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
2027220370
case GGUF_TYPE_ARRAY:
@@ -20282,6 +20380,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
2028220380
case GGUF_TYPE_UINT32:
2028320381
case GGUF_TYPE_INT32:
2028420382
case GGUF_TYPE_FLOAT32:
20383+
case GGUF_TYPE_UINT64:
20384+
case GGUF_TYPE_INT64:
20385+
case GGUF_TYPE_FLOAT64:
2028520386
case GGUF_TYPE_BOOL:
2028620387
{
2028720388
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);

ggml.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@
216216
#define GGML_EXIT_ABORTED 1
217217

218218
#define GGUF_MAGIC 0x46554747 // "GGUF"
219-
#define GGUF_VERSION 1
219+
#define GGUF_VERSION 2
220220

221221
#define GGUF_DEFAULT_ALIGNMENT 32
222222

@@ -1827,6 +1827,9 @@ extern "C" {
18271827
GGUF_TYPE_BOOL = 7,
18281828
GGUF_TYPE_STRING = 8,
18291829
GGUF_TYPE_ARRAY = 9,
1830+
GGUF_TYPE_UINT64 = 10,
1831+
GGUF_TYPE_INT64 = 11,
1832+
GGUF_TYPE_FLOAT64 = 12,
18301833
GGUF_TYPE_COUNT, // marks the end of the enum
18311834
};
18321835

@@ -1867,6 +1870,9 @@ extern "C" {
18671870
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
18681871
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
18691872
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
1873+
GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
1874+
GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
1875+
GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
18701876
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
18711877
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
18721878
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
@@ -1886,6 +1892,9 @@ extern "C" {
18861892
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
18871893
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
18881894
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
1895+
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
1896+
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
1897+
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
18891898
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
18901899
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
18911900
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);

0 commit comments

Comments
 (0)