diff --git a/convert.py b/convert.py index e720889fd515a..ac0f3bb9e627e 100644 --- a/convert.py +++ b/convert.py @@ -16,12 +16,14 @@ import re import signal import struct +import subprocess import sys import zipfile import numpy as np from abc import ABCMeta, abstractmethod from dataclasses import dataclass +from datetime import datetime from pathlib import Path from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, TypeVar, Union) from sentencepiece import SentencePieceProcessor # type: ignore @@ -734,6 +736,8 @@ def __init__(self, fname_out: Path) -> None: def add_meta_arch(self, params: Params) -> None: self.gguf.add_name ("LLaMA") + self.gguf.add_date (datetime.today().isoformat()) + self.gguf.add_commit_hash (get_git_revision_short_hash()) self.gguf.add_context_length (params.n_ctx) self.gguf.add_embedding_length (params.n_embd) self.gguf.add_block_count (params.n_layer) @@ -1000,6 +1004,9 @@ def do_dump_model(model_plus: ModelPlus) -> None: print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") +def get_git_revision_short_hash() -> str: + return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip() + def main(args_in: Optional[List[str]] = None) -> None: parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file") parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model") diff --git a/gguf.py b/gguf.py index 4657467182328..89cbbf93d43bb 100644 --- a/gguf.py +++ b/gguf.py @@ -27,6 +27,8 @@ KEY_GENERAL_SOURCE_URL = "general.source.url" KEY_GENERAL_SOURCE_HF_REPO = "general.source.hugginface.repository" KEY_GENERAL_FILE_TYPE = "general.file_type" +KEY_GENERAL_DATE = "general.date" +KEY_GENERAL_COMMIT_HASH = "general.commit_hash" # LLM KEY_LLM_CONTEXT_LENGTH = "{arch}.context_length" @@ -599,6 +601,12 @@ def add_source_hf_repo(self, repo: str): def add_file_type(self, ftype: int): self.add_uint32(KEY_GENERAL_FILE_TYPE, ftype) + def add_date(self, date: str): + self.add_string(KEY_GENERAL_DATE, date) + + def add_commit_hash(self, commit_hash: str): + self.add_string(KEY_GENERAL_COMMIT_HASH, commit_hash) + def add_name(self, name: str): self.add_string(KEY_GENERAL_NAME, name) diff --git a/llama.cpp b/llama.cpp index 6c5da130926fc..a18795f859672 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1279,6 +1279,8 @@ static void llama_model_load_internal( std::string general_name = "n/a"; std::string general_arch = "n/a"; + std::string general_date = "n/a"; + std::string general_commit_hash = "n/a"; // read hparams { @@ -1336,6 +1338,8 @@ static void llama_model_load_internal( // get general kv GGUF_GET(general_name, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.name"); GGUF_GET(general_arch, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.architecture"); + GGUF_GET(general_date, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.date"); + GGUF_GET(general_commit_hash, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.commit_hash"); // special tokens GGUF_GET(vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.bos_token_id"); @@ -1445,6 +1449,8 @@ static void llama_model_load_internal( // general kv LLAMA_LOG_INFO("%s: general.name = %s\n", __func__, general_name.c_str()); + LLAMA_LOG_INFO("%s: general.date = %s\n", __func__, general_date.c_str()); + LLAMA_LOG_INFO("%s: general.commit_hash = %s\n", __func__, general_commit_hash.c_str()); // special tokens if (vocab.special_bos_id != -1) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].text.c_str() ); }