@@ -19394,7 +19394,7 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
19394
19394
////////////////////////////////////////////////////////////////////////////////
19395
19395
19396
19396
struct gguf_str {
19397
- uint32_t n;
19397
+ uint64_t n; // GGUFv2
19398
19398
char * data;
19399
19399
};
19400
19400
@@ -19408,9 +19408,12 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
19408
19408
[GGUF_TYPE_FLOAT32] = sizeof(float),
19409
19409
[GGUF_TYPE_BOOL] = sizeof(bool),
19410
19410
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
19411
+ [GGUF_TYPE_UINT64] = sizeof(uint64_t),
19412
+ [GGUF_TYPE_INT64] = sizeof(int64_t),
19413
+ [GGUF_TYPE_FLOAT64] = sizeof(double),
19411
19414
[GGUF_TYPE_ARRAY] = 0, // undefined
19412
19415
};
19413
- static_assert(GGUF_TYPE_COUNT == 10 , "GGUF_TYPE_COUNT != 10 ");
19416
+ static_assert(GGUF_TYPE_COUNT == 13 , "GGUF_TYPE_COUNT != 13 ");
19414
19417
19415
19418
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
19416
19419
[GGUF_TYPE_UINT8] = "u8",
@@ -19423,8 +19426,11 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
19423
19426
[GGUF_TYPE_BOOL] = "bool",
19424
19427
[GGUF_TYPE_STRING] = "str",
19425
19428
[GGUF_TYPE_ARRAY] = "arr",
19429
+ [GGUF_TYPE_UINT64] = "u64",
19430
+ [GGUF_TYPE_INT64] = "i64",
19431
+ [GGUF_TYPE_FLOAT64] = "f64",
19426
19432
};
19427
- static_assert(GGUF_TYPE_COUNT == 10 , "GGUF_TYPE_COUNT != 10 ");
19433
+ static_assert(GGUF_TYPE_COUNT == 13 , "GGUF_TYPE_COUNT != 13 ");
19428
19434
19429
19435
union gguf_value {
19430
19436
uint8_t uint8;
@@ -19434,39 +19440,40 @@ union gguf_value {
19434
19440
uint32_t uint32;
19435
19441
int32_t int32;
19436
19442
float float32;
19443
+ uint64_t uint64;
19444
+ int64_t int64;
19445
+ double float64;
19437
19446
bool bool_;
19438
19447
19439
19448
struct gguf_str str;
19440
19449
19441
19450
struct {
19442
19451
enum gguf_type type;
19443
19452
19444
- uint32_t n;
19453
+ uint64_t n; // GGUFv2
19445
19454
void * data;
19446
19455
} arr;
19447
19456
};
19448
19457
19449
19458
struct gguf_kv {
19450
19459
struct gguf_str key;
19451
19460
19452
- uint32_t n_bytes; // TODO: is this actually needed?
19453
-
19454
19461
enum gguf_type type;
19455
19462
union gguf_value value;
19456
19463
};
19457
19464
19458
19465
struct gguf_header {
19459
19466
uint32_t magic;
19460
19467
uint32_t version;
19461
- uint32_t n_tensors;
19462
- uint32_t n_kv;
19468
+ uint64_t n_tensors; // GGUFv2
19469
+ uint64_t n_kv; // GGUFv2
19463
19470
};
19464
19471
19465
19472
struct gguf_tensor_info {
19466
19473
struct gguf_str name;
19467
19474
19468
19475
uint32_t n_dims;
19469
- uint32_t ne[GGML_MAX_DIMS];
19476
+ uint64_t ne[GGML_MAX_DIMS];
19470
19477
19471
19478
enum ggml_type type;
19472
19479
@@ -19497,19 +19504,32 @@ static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset)
19497
19504
return n == size;
19498
19505
}
19499
19506
19500
- static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
19507
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
19508
+ static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) {
19501
19509
p->n = 0;
19502
19510
p->data = NULL;
19503
19511
19504
19512
bool ok = true;
19505
19513
19506
- // TODO: how to avoid mallocs for strings?
19507
19514
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
19508
19515
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
19509
19516
19510
19517
return ok;
19511
19518
}
19512
19519
19520
+ static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) {
19521
+ p->n = 0;
19522
+ p->data = NULL;
19523
+
19524
+ bool ok = true;
19525
+
19526
+ uint32_t n = 0;
19527
+ ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n;
19528
+ ok = ok && gguf_fread_el(file, p->data, p->n, offset);
19529
+
19530
+ return ok;
19531
+ }
19532
+
19513
19533
struct gguf_context * gguf_init_empty(void) {
19514
19534
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
19515
19535
@@ -19565,8 +19585,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19565
19585
ctx->data = NULL;
19566
19586
19567
19587
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
19568
- ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
19569
- ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
19588
+
19589
+ if (ctx->header.version == 1) {
19590
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
19591
+ uint32_t n_tensors = 0;
19592
+ uint32_t n_kv = 0;
19593
+
19594
+ ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset);
19595
+ ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset);
19596
+
19597
+ ctx->header.n_tensors = n_tensors;
19598
+ ctx->header.n_kv = n_kv;
19599
+ } else {
19600
+ ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
19601
+ ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
19602
+ }
19570
19603
19571
19604
if (!ok) {
19572
19605
fprintf(stderr, "%s: failed to read header\n", __func__);
@@ -19576,6 +19609,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19576
19609
}
19577
19610
}
19578
19611
19612
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
19613
+ bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur;
19614
+ if (ctx->header.version == 1) {
19615
+ gguf_fread_str = gguf_fread_str_v1;
19616
+ }
19617
+
19579
19618
// read the kv pairs
19580
19619
{
19581
19620
ctx->kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
@@ -19585,9 +19624,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19585
19624
19586
19625
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
19587
19626
19588
- ok = ok && gguf_fread_str(file, &kv->key, &offset);
19589
- //ok = ok && gguf_fread_el (file, &kv->n_bytes, sizeof(kv->n_bytes), &offset);
19590
- ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
19627
+ ok = ok && gguf_fread_str(file, &kv->key, &offset);
19628
+ ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
19591
19629
19592
19630
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
19593
19631
@@ -19599,12 +19637,23 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19599
19637
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
19600
19638
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
19601
19639
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
19640
+ case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
19641
+ case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
19642
+ case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
19602
19643
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
19603
19644
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
19604
19645
case GGUF_TYPE_ARRAY:
19605
19646
{
19606
19647
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
19607
- ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
19648
+
19649
+ if (ctx->header.version == 1) {
19650
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
19651
+ uint32_t n = 0;
19652
+ ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset);
19653
+ kv->value.arr.n = n;
19654
+ } else {
19655
+ ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
19656
+ }
19608
19657
19609
19658
switch (kv->value.arr.type) {
19610
19659
case GGUF_TYPE_UINT8:
@@ -19614,6 +19663,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19614
19663
case GGUF_TYPE_UINT32:
19615
19664
case GGUF_TYPE_INT32:
19616
19665
case GGUF_TYPE_FLOAT32:
19666
+ case GGUF_TYPE_UINT64:
19667
+ case GGUF_TYPE_INT64:
19668
+ case GGUF_TYPE_FLOAT64:
19617
19669
case GGUF_TYPE_BOOL:
19618
19670
{
19619
19671
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
@@ -19660,7 +19712,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19660
19712
ok = ok && gguf_fread_str(file, &info->name, &offset);
19661
19713
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
19662
19714
for (uint32_t j = 0; j < info->n_dims; ++j) {
19663
- ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
19715
+ if (ctx->header.version == 1) {
19716
+ // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
19717
+ uint32_t t = 0;
19718
+ ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset);
19719
+ info->ne[j] = t;
19720
+ } else {
19721
+ ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
19722
+ }
19664
19723
}
19665
19724
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
19666
19725
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
@@ -19954,6 +20013,18 @@ float gguf_get_val_f32(struct gguf_context * ctx, int i) {
19954
20013
return ctx->kv[i].value.float32;
19955
20014
}
19956
20015
20016
+ uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
20017
+ return ctx->kv[i].value.uint64;
20018
+ }
20019
+
20020
+ int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
20021
+ return ctx->kv[i].value.int64;
20022
+ }
20023
+
20024
+ double gguf_get_val_f64(struct gguf_context * ctx, int i) {
20025
+ return ctx->kv[i].value.float64;
20026
+ }
20027
+
19957
20028
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
19958
20029
return ctx->kv[i].value.bool_;
19959
20030
}
@@ -20056,6 +20127,27 @@ void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
20056
20127
ctx->kv[idx].value.float32 = val;
20057
20128
}
20058
20129
20130
+ void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
20131
+ const int idx = gguf_get_or_add_key(ctx, key);
20132
+
20133
+ ctx->kv[idx].type = GGUF_TYPE_UINT64;
20134
+ ctx->kv[idx].value.uint64 = val;
20135
+ }
20136
+
20137
+ void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
20138
+ const int idx = gguf_get_or_add_key(ctx, key);
20139
+
20140
+ ctx->kv[idx].type = GGUF_TYPE_INT64;
20141
+ ctx->kv[idx].value.int64 = val;
20142
+ }
20143
+
20144
+ void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
20145
+ const int idx = gguf_get_or_add_key(ctx, key);
20146
+
20147
+ ctx->kv[idx].type = GGUF_TYPE_FLOAT64;
20148
+ ctx->kv[idx].value.float64 = val;
20149
+ }
20150
+
20059
20151
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
20060
20152
const int idx = gguf_get_or_add_key(ctx, key);
20061
20153
@@ -20106,6 +20198,9 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
20106
20198
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
20107
20199
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
20108
20200
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
20201
+ case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
20202
+ case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
20203
+ case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
20109
20204
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
20110
20205
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
20111
20206
case GGUF_TYPE_ARRAY:
@@ -20267,6 +20362,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
20267
20362
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
20268
20363
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
20269
20364
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
20365
+ case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
20366
+ case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
20367
+ case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
20270
20368
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
20271
20369
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
20272
20370
case GGUF_TYPE_ARRAY:
@@ -20282,6 +20380,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
20282
20380
case GGUF_TYPE_UINT32:
20283
20381
case GGUF_TYPE_INT32:
20284
20382
case GGUF_TYPE_FLOAT32:
20383
+ case GGUF_TYPE_UINT64:
20384
+ case GGUF_TYPE_INT64:
20385
+ case GGUF_TYPE_FLOAT64:
20285
20386
case GGUF_TYPE_BOOL:
20286
20387
{
20287
20388
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
0 commit comments