Skip to content

Commit b83bab1

Browse files
authored
gguf-py : fix and simplify quantized shape round-trip (#7483)
* gguf-py : fix and simplify quantized shape round-trip * gguf-py : remove unused import
1 parent d041d2c commit b83bab1

File tree

5 files changed

+27
-14
lines changed

5 files changed

+27
-14
lines changed

convert-hf-to-gguf.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -313,11 +313,10 @@ def write_tensors(self):
313313
data = data.astype(np.float32)
314314
data_qtype = gguf.GGMLQuantizationType.F32
315315

316-
block_size, type_size = gguf.GGML_QUANT_SIZES[data_qtype]
316+
shape = gguf.quant_shape_from_byte_shape(data.shape, data_qtype) if data.dtype == np.uint8 else data.shape
317+
317318
# reverse shape to make it similar to the internal ggml dimension order
318-
shape_str = f"""{{{', '.join(str(n) for n in reversed(
319-
(*data.shape[:-1], data.shape[-1] * data.dtype.itemsize // type_size * block_size))
320-
)}}}"""
319+
shape_str = f"{{{', '.join(str(n) for n in reversed(shape))}}}"
321320

322321
# n_dims is implicit in the shape
323322
logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")

gguf-py/gguf/gguf_reader.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import numpy as np
1313
import numpy.typing as npt
1414

15+
from .quants import quant_shape_to_byte_shape
16+
1517
if __name__ == "__main__":
1618
import sys
1719
from pathlib import Path
@@ -251,6 +253,7 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
251253
tensor_names.add(tensor_name)
252254
ggml_type = GGMLQuantizationType(raw_dtype[0])
253255
n_elems = int(np.prod(dims))
256+
np_dims = tuple(reversed(dims.tolist()))
254257
block_size, type_size = GGML_QUANT_SIZES[ggml_type]
255258
n_bytes = n_elems * type_size // block_size
256259
data_offs = int(start_offs + offset_tensor[0])
@@ -279,14 +282,15 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
279282
else:
280283
item_count = n_bytes
281284
item_type = np.uint8
285+
np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
282286
tensors.append(ReaderTensor(
283287
name = tensor_name,
284288
tensor_type = ggml_type,
285289
shape = dims,
286290
n_elements = n_elems,
287291
n_bytes = n_bytes,
288292
data_offset = data_offs,
289-
data = self._get(data_offs, item_type, item_count),
293+
data = self._get(data_offs, item_type, item_count).reshape(np_dims),
290294
field = field,
291295
))
292296
self.tensors = tensors

gguf-py/gguf/gguf_writer.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import numpy as np
1414

1515
from .constants import (
16-
GGML_QUANT_SIZES,
1716
GGUF_DEFAULT_ALIGNMENT,
1817
GGUF_MAGIC,
1918
GGUF_VERSION,
@@ -26,6 +25,8 @@
2625
TokenType,
2726
)
2827

28+
from .quants import quant_shape_from_byte_shape
29+
2930
logger = logging.getLogger(__name__)
3031

3132

@@ -229,10 +230,7 @@ def add_tensor_info(
229230
else:
230231
dtype = raw_dtype
231232
if tensor_dtype == np.uint8:
232-
block_size, type_size = GGML_QUANT_SIZES[raw_dtype]
233-
if tensor_shape[-1] % type_size != 0:
234-
raise ValueError(f"Quantized tensor row size ({tensor_shape[-1]}) is not a multiple of {dtype.name} type size ({type_size})")
235-
tensor_shape = tuple(tensor_shape[:-1]) + (tensor_shape[-1] // type_size * block_size,)
233+
tensor_shape = quant_shape_from_byte_shape(tensor_shape, raw_dtype)
236234
n_dims = len(tensor_shape)
237235
self.ti_data += self._pack("I", n_dims)
238236
for i in range(n_dims):

gguf-py/gguf/quants.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from __future__ import annotations
2-
from typing import Callable
2+
from typing import Callable, Sequence
33

44
from numpy.typing import DTypeLike
55

@@ -9,6 +9,20 @@
99
import numpy as np
1010

1111

12+
def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
13+
block_size, type_size = GGML_QUANT_SIZES[quant_type]
14+
if shape[-1] % block_size != 0:
15+
raise ValueError(f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})")
16+
return (*shape[:-1], shape[-1] // block_size * type_size)
17+
18+
19+
def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType):
20+
block_size, type_size = GGML_QUANT_SIZES[quant_type]
21+
if shape[-1] % type_size != 0:
22+
raise ValueError(f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})")
23+
return (*shape[:-1], shape[-1] // type_size * block_size)
24+
25+
1226
# same as ggml_compute_fp32_to_bf16 in ggml-impl.h
1327
def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
1428
n = n.astype(np.float32, copy=False).view(np.int32)

gguf-py/scripts/gguf-new-metadata.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,7 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
118118

119119
for tensor in reader.tensors:
120120
total_bytes += tensor.n_bytes
121-
# Dimensions are written in reverse order, so flip them first
122-
shape = np.flipud(tensor.shape).tolist()
123-
writer.add_tensor_info(tensor.name, shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)
121+
writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)
124122

125123
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
126124

0 commit comments

Comments
 (0)