Skip to content

Commit 0996149

Browse files
committed
convert-hf : allow converting the weird BitNet 1.3B
Its FFN size is 5460 which is not convenient. The offending tensors are kept in F16, which makes the final model 5.01 bpw.
1 parent 961e293 commit 0996149

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

convert-hf-to-gguf.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -301,12 +301,16 @@ def write_tensors(self):
301301
if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
302302
# TODO: cleaner model-specific per-tensor types
303303
# NOTE: Q1_3 is only relevant for BitNet 1.58b
304-
if self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3 and not any(
305-
self.match_model_tensor_name(new_name, key, None)
306-
for key in [
307-
gguf.MODEL_TENSOR.TOKEN_EMBD,
308-
gguf.MODEL_TENSOR.OUTPUT,
309-
]
304+
if (
305+
self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3
306+
and gguf.can_quantize_to_q1_3(data)
307+
and not any(
308+
self.match_model_tensor_name(new_name, key, None)
309+
for key in [
310+
gguf.MODEL_TENSOR.TOKEN_EMBD,
311+
gguf.MODEL_TENSOR.OUTPUT,
312+
]
313+
)
310314
):
311315
data = gguf.quantize_q1_3(data)
312316
assert data.dtype == np.uint8

gguf-py/gguf/quants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ def quantize_q8_0(data: np.ndarray):
126126
__q1_3_block_size, __q1_3_type_size = GGML_QUANT_SIZES[GGMLQuantizationType.Q1_3]
127127

128128

129+
def can_quantize_to_q1_3(n: np.ndarray) -> bool:
130+
return n.shape[-1] % __q1_3_block_size == 0
131+
132+
129133
def __quantize_q1_3_shape_change(s: tuple[int, ...]) -> tuple[int, ...]:
130134
return (*s[:-1], s[-1] // __q1_3_block_size * __q1_3_type_size)
131135

0 commit comments

Comments
 (0)