diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index dde4fa9c80ca3..23c260522fe19 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -384,6 +384,12 @@ def prepare_metadata(self, vocab_only: bool): self.set_type() + # Generate sha256 based on tensor content if required + if not vocab_only: + hash_sha256 = self.gguf_writer.calculate_tensor_hash_sha256() + self.gguf_writer.add_hash_sha256(hash_sha256) + logger.info(f"tensor hash (sha256): {hash_sha256}") + logger.info("Set meta model") self.metadata.set_gguf_meta_model(self.gguf_writer) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index e343c2ef1659a..8fca4e3eb3982 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -25,6 +25,9 @@ class General: ALIGNMENT = "general.alignment" FILE_TYPE = "general.file_type" + # Tensor Hash + HASH_SHA256 = "general.hash.sha256" + # Authorship Metadata NAME = "general.name" AUTHOR = "general.author" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index ba6f53cda25a1..8e615ffc193c3 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -2,6 +2,7 @@ import logging import os +import hashlib import shutil import struct import tempfile @@ -417,6 +418,18 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None: self.state = WriterState.WEIGHTS + def calculate_tensor_hash_sha256(self) -> str: + sha256 = hashlib.sha256() + + for tensors in self.tensors: + # relying on the fact that Python dicts preserve insertion order (since 3.7) + for _, ti in tensors.items(): + assert ti.tensor is not None + assert ti.tensor.nbytes == ti.nbytes + sha256.update(ti.tensor.tobytes('C')) + + return sha256.hexdigest() + def write_tensors_to_file(self, *, progress: bool = False) -> None: self.write_ti_data_to_file() @@ -491,6 +504,9 @@ def add_custom_alignment(self, alignment: int) -> None: def add_file_type(self, ftype: int) -> None: self.add_uint32(Keys.General.FILE_TYPE, ftype) + def add_hash_sha256(self, hash: str) -> None: + self.add_string(Keys.General.HASH_SHA256, hash) + def add_name(self, name: str) -> None: self.add_string(Keys.General.NAME, name)