Skip to content

Commit 7ce2c77

Browse files
authored
gguf : add support for I64 and F64 arrays (#6062)
* gguf : add support for I64 and F64 arrays GGML currently does not support I64 or F64 arrays and they are not often used in machine learning, however if in the future the need arises, it would be nice to add them now, so that the types are next to the other types I8, I16, I32 in the enums, and it also reserves their type number. Furthermore, with this addition the GGUF format becomes very usable for most computational applications of NumPy (being compatible with the most common NumPy dtypes: i8, i16, i32, i64, f32, f64), providing a faster, and more versatile alternative to the `npz` format, and a simpler alternative to the `hdf5` format. The change in this PR seems small, not significantly increasing the maintenance burden. I tested this from Python using GGUFWriter/Reader and `gguf-dump`, as well as from C, everything seems to work. * Fix compiler warnings
1 parent aab606a commit 7ce2c77

File tree

5 files changed

+40
-7
lines changed

5 files changed

+40
-7
lines changed

ggml.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,19 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
470470
.type_size = sizeof(int32_t),
471471
.is_quantized = false,
472472
},
473+
[GGML_TYPE_I64] = {
474+
.type_name = "i64",
475+
.blck_size = 1,
476+
.type_size = sizeof(int64_t),
477+
.is_quantized = false,
478+
},
479+
[GGML_TYPE_F64] = {
480+
.type_name = "f64",
481+
.blck_size = 1,
482+
.type_size = sizeof(double),
483+
.is_quantized = false,
484+
.nrows = 1,
485+
},
473486
[GGML_TYPE_F32] = {
474487
.type_name = "f32",
475488
.blck_size = 1,
@@ -12418,6 +12431,8 @@ static void ggml_compute_forward_alibi(
1241812431
case GGML_TYPE_I8:
1241912432
case GGML_TYPE_I16:
1242012433
case GGML_TYPE_I32:
12434+
case GGML_TYPE_I64:
12435+
case GGML_TYPE_F64:
1242112436
case GGML_TYPE_COUNT:
1242212437
{
1242312438
GGML_ASSERT(false);
@@ -12504,6 +12519,8 @@ static void ggml_compute_forward_clamp(
1250412519
case GGML_TYPE_I8:
1250512520
case GGML_TYPE_I16:
1250612521
case GGML_TYPE_I32:
12522+
case GGML_TYPE_I64:
12523+
case GGML_TYPE_F64:
1250712524
case GGML_TYPE_COUNT:
1250812525
{
1250912526
GGML_ASSERT(false);

ggml.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,8 @@ extern "C" {
366366
GGML_TYPE_I8 = 24,
367367
GGML_TYPE_I16 = 25,
368368
GGML_TYPE_I32 = 26,
369+
GGML_TYPE_I64 = 27,
370+
GGML_TYPE_F64 = 28,
369371
GGML_TYPE_COUNT,
370372
};
371373

gguf-py/gguf/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,8 @@ class GGMLQuantizationType(IntEnum):
665665
I8 = 24
666666
I16 = 25
667667
I32 = 26
668+
I64 = 27
669+
F64 = 28
668670

669671

670672
class GGUFEndian(IntEnum):
@@ -734,6 +736,8 @@ def get_type(val: Any) -> GGUFValueType:
734736
GGMLQuantizationType.I8: (1, 1),
735737
GGMLQuantizationType.I16: (1, 2),
736738
GGMLQuantizationType.I32: (1, 4),
739+
GGMLQuantizationType.I64: (1, 8),
740+
GGMLQuantizationType.F64: (1, 8),
737741
}
738742

739743

gguf-py/gguf/gguf_reader.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,12 +242,15 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
242242
n_bytes = n_elems * type_size // block_size
243243
data_offs = int(start_offs + offset_tensor[0])
244244
item_type: npt.DTypeLike
245-
if ggml_type == GGMLQuantizationType.F32:
245+
if ggml_type == GGMLQuantizationType.F16:
246+
item_count = n_elems
247+
item_type = np.float16
248+
elif ggml_type == GGMLQuantizationType.F32:
246249
item_count = n_elems
247250
item_type = np.float32
248-
elif ggml_type == GGMLQuantizationType.F16:
251+
elif ggml_type == GGMLQuantizationType.F64:
249252
item_count = n_elems
250-
item_type = np.float16
253+
item_type = np.float64
251254
elif ggml_type == GGMLQuantizationType.I8:
252255
item_count = n_elems
253256
item_type = np.int8
@@ -257,6 +260,9 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
257260
elif ggml_type == GGMLQuantizationType.I32:
258261
item_count = n_elems
259262
item_type = np.int32
263+
elif ggml_type == GGMLQuantizationType.I64:
264+
item_count = n_elems
265+
item_type = np.int64
260266
else:
261267
item_count = n_bytes
262268
item_type = np.uint8

gguf-py/gguf/gguf_writer.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,18 +204,22 @@ def add_tensor_info(
204204
for i in range(n_dims):
205205
self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
206206
if raw_dtype is None:
207-
if tensor_dtype == np.float32:
208-
dtype = GGMLQuantizationType.F32
209-
elif tensor_dtype == np.float16:
207+
if tensor_dtype == np.float16:
210208
dtype = GGMLQuantizationType.F16
209+
elif tensor_dtype == np.float32:
210+
dtype = GGMLQuantizationType.F32
211+
elif tensor_dtype == np.float64:
212+
dtype = GGMLQuantizationType.F64
211213
elif tensor_dtype == np.int8:
212214
dtype = GGMLQuantizationType.I8
213215
elif tensor_dtype == np.int16:
214216
dtype = GGMLQuantizationType.I16
215217
elif tensor_dtype == np.int32:
216218
dtype = GGMLQuantizationType.I32
219+
elif tensor_dtype == np.int64:
220+
dtype = GGMLQuantizationType.I64
217221
else:
218-
raise ValueError("Only F32, F16, I8, I16, I32 tensors are supported for now")
222+
raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
219223
else:
220224
dtype = raw_dtype
221225
self.ti_data += self._pack("I", dtype)

0 commit comments

Comments
 (0)