Skip to content

Commit 8cf19d6

Browse files
chenqinyggerganov
andauthored
gguf : support big endian platform (#3552)
* check whether platform is 390x if yes->do not import immintrin.h * support s390x big endian * support --bigendian option for s390x 1. verified with baichuan7b-chat with float 16 on s390x 2. verified with baichuan7b-chat 3. verified with chinese-alpaca-2-13b-f16 * update format based on editor-config checker result * Update convert-baichuan-hf-to-gguf.py * 1. check in ggml.c if endianess is not match 2. update GGUF version 3. change get_pack_prefix to property 4. update information log * always use "GGUF" as beginng of GGUF file * Compare "GGUF" with file header char by char 1. Set GGUF_MAGIC to "GGUF" string instead of int value 2. Compare "GGUF" char by char to ensure its byte order 3. Move bytes swap code from convert.py to gguf.py write_tensor_data --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent a0edf73 commit 8cf19d6

File tree

9 files changed

+84
-49
lines changed

9 files changed

+84
-49
lines changed

convert-baichuan-hf-to-gguf.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def parse_args() -> argparse.Namespace:
7676
"ftype", type=int, choices=[0, 1], default=1, nargs='?',
7777
help="output format - use 0 for float32, 1 for float16",
7878
)
79+
parser.add_argument("--bigendian", action="store_true", help="model is executed on big endian machine")
7980
return parser.parse_args()
8081

8182
args = parse_args()
@@ -86,6 +87,11 @@ def parse_args() -> argparse.Namespace:
8687
print(f'Error: {args.model} is not a directory', file = sys.stderr)
8788
sys.exit(1)
8889

90+
endianess = gguf.GGUFEndian.LITTLE
91+
if args.bigendian:
92+
endianess = gguf.GGUFEndian.BIG
93+
endianess_str = "Big Endian" if args.bigendian else "Little Endian"
94+
print(f"gguf: Conversion Endianess {endianess}")
8995
# possible tensor data types
9096
# ftype == 0 -> float32
9197
# ftype == 1 -> float16
@@ -113,7 +119,7 @@ def parse_args() -> argparse.Namespace:
113119
num_parts = count_model_parts(dir_model)
114120
print(f"num_parts:{num_parts}\n")
115121
ARCH=gguf.MODEL_ARCH.BAICHUAN
116-
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
122+
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
117123

118124
print("gguf: get model metadata")
119125

convert.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -803,8 +803,8 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:
803803

804804

805805
class OutputFile:
806-
def __init__(self, fname_out: Path) -> None:
807-
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
806+
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian=gguf.GGUFEndian.LITTLE) -> None:
807+
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
808808

809809
def add_meta_arch(self, params: Params) -> None:
810810
name = "LLaMA"
@@ -875,10 +875,10 @@ def close(self) -> None:
875875
self.gguf.close()
876876

877877
@staticmethod
878-
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab) -> None:
878+
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian=gguf.GGUFEndian.LITTLE) -> None:
879879
check_vocab_size(params, vocab)
880880

881-
of = OutputFile(fname_out)
881+
of = OutputFile(fname_out, endianess=endianess)
882882

883883
# meta data
884884
of.add_meta_arch(params)
@@ -903,10 +903,10 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
903903
return dt.quantize(arr)
904904

905905
@staticmethod
906-
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY) -> None:
906+
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None:
907907
check_vocab_size(params, vocab)
908908

909-
of = OutputFile(fname_out)
909+
of = OutputFile(fname_out, endianess=endianess)
910910

911911
# meta data
912912
of.add_meta_arch(params)
@@ -1123,8 +1123,9 @@ def main(args_in: list[str] | None = None) -> None:
11231123
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
11241124
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
11251125
parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY)
1126-
args = parser.parse_args(args_in)
1126+
parser.add_argument("--bigendian", action="store_true", help="model is executed on big endian machine")
11271127

1128+
args = parser.parse_args(args_in)
11281129
if args.dump_single:
11291130
model_plus = lazy_load_file(args.model)
11301131
do_dump_model(model_plus)
@@ -1138,6 +1139,9 @@ def main(args_in: list[str] | None = None) -> None:
11381139
if args.dump:
11391140
do_dump_model(model_plus)
11401141
return
1142+
endianess = gguf.GGUFEndian.LITTLE
1143+
if args.bigendian:
1144+
endianess = gguf.GGUFEndian.BIG
11411145

11421146
params = Params.load(model_plus)
11431147
if params.n_ctx == -1:
@@ -1185,7 +1189,7 @@ def main(args_in: list[str] | None = None) -> None:
11851189
params.ftype = ftype
11861190
print(f"Writing {outfile}, format {ftype}")
11871191

1188-
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency)
1192+
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianess=endianess)
11891193
print(f"Wrote {outfile}")
11901194

11911195

examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ static bool is_ggml_file(const char * filename) {
536536
if (file.size < 4) {
537537
return false;
538538
}
539-
uint32_t magic = file.read_u32();
539+
std::string magic = file.read_string(4);
540540
return magic == GGUF_MAGIC;
541541
}
542542

ggml.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20845,7 +20845,7 @@ struct gguf_kv {
2084520845
};
2084620846

2084720847
struct gguf_header {
20848-
uint32_t magic;
20848+
char magic[4];
2084920849
uint32_t version;
2085020850
uint64_t n_tensors; // GGUFv2
2085120851
uint64_t n_kv; // GGUFv2
@@ -20915,7 +20915,7 @@ static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset)
2091520915
struct gguf_context * gguf_init_empty(void) {
2091620916
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
2091720917

20918-
ctx->header.magic = GGUF_MAGIC;
20918+
memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
2091920919
ctx->header.version = GGUF_VERSION;
2092020920
ctx->header.n_tensors = 0;
2092120921
ctx->header.n_kv = 0;
@@ -20941,16 +20941,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
2094120941
// offset from start of file
2094220942
size_t offset = 0;
2094320943

20944-
uint32_t magic = 0;
20944+
char magic[4];
2094520945

2094620946
// check the magic before making allocations
2094720947
{
2094820948
gguf_fread_el(file, &magic, sizeof(magic), &offset);
2094920949

20950-
if (magic != GGUF_MAGIC) {
20951-
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
20952-
fclose(file);
20953-
return NULL;
20950+
for (uint32_t i = 0; i < sizeof(magic); i++) {
20951+
if (magic[i] != GGUF_MAGIC[i]) {
20952+
fprintf(stderr, "%s: invalid magic characters %s.\n", __func__, magic);
20953+
fclose(file);
20954+
return NULL;
20955+
}
2095420956
}
2095520957
}
2095620958

@@ -20960,7 +20962,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
2096020962

2096120963
// read the header
2096220964
{
20963-
ctx->header.magic = magic;
20965+
strncpy(ctx->header.magic, magic, 4);
20966+
2096420967

2096520968
ctx->kv = NULL;
2096620969
ctx->infos = NULL;

ggml.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,9 @@
231231
#define GGML_EXIT_SUCCESS 0
232232
#define GGML_EXIT_ABORTED 1
233233

234-
#define GGUF_MAGIC 0x46554747 // "GGUF"
235-
#define GGUF_VERSION 2
234+
#define GGUF_MAGIC "GGUF"
235+
236+
#define GGUF_VERSION 3
236237

237238
#define GGUF_DEFAULT_ALIGNMENT 32
238239

gguf-py/gguf/gguf.py

Lines changed: 46 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@
1919
#
2020

2121
GGUF_MAGIC = 0x46554747
22-
GGUF_VERSION = 2
22+
GGUF_VERSION = 3
2323
GGUF_DEFAULT_ALIGNMENT = 32
2424

25+
2526
# general
2627
KEY_GENERAL_ARCHITECTURE = "general.architecture"
2728
KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
@@ -597,6 +598,10 @@ class GGMLQuantizationType(IntEnum):
597598
Q6_K = 14
598599
Q8_K = 15
599600

601+
class GGUFEndian(IntEnum):
602+
LITTLE = 0
603+
BIG = 1
604+
600605

601606
class GGUFValueType(IntEnum):
602607
UINT8 = 0
@@ -644,18 +649,41 @@ class GGUFWriter:
644649
temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
645650
tensors: list[tuple[np.ndarray[Any, Any], int]]
646651

647-
def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True):
652+
@property
653+
def pack_prefix(self):
654+
if self.endianess==GGUFEndian.LITTLE:
655+
return "<"
656+
else:
657+
return ">"
658+
659+
def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True, endianess=GGUFEndian.LITTLE):
648660
self.fout = open(path, "wb")
649661
self.arch = arch
662+
self.endianess = endianess
663+
self._simple_value_packing = {
664+
GGUFValueType.UINT8: f"{self.pack_prefix}B",
665+
GGUFValueType.INT8: f"{self.pack_prefix}b",
666+
GGUFValueType.UINT16: f"{self.pack_prefix}H",
667+
GGUFValueType.INT16: f"{self.pack_prefix}h",
668+
GGUFValueType.UINT32: f"{self.pack_prefix}I",
669+
GGUFValueType.INT32: f"{self.pack_prefix}i",
670+
GGUFValueType.FLOAT32: f"{self.pack_prefix}f",
671+
GGUFValueType.UINT64: f"{self.pack_prefix}Q",
672+
GGUFValueType.INT64: f"{self.pack_prefix}q",
673+
GGUFValueType.FLOAT64: f"{self.pack_prefix}d",
674+
GGUFValueType.BOOL: "?" ,
675+
}
650676
self.add_architecture()
651677
self.use_temp_file = use_temp_file
652678
self.tensors = []
679+
endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian"
680+
print(f"This gguf file is for {endianess_str} only")
653681

654682
def write_header_to_file(self):
655683
self.fout.write(struct.pack("<I", GGUF_MAGIC))
656-
self.fout.write(struct.pack("<I", GGUF_VERSION))
657-
self.fout.write(struct.pack("<Q", self.ti_data_count))
658-
self.fout.write(struct.pack("<Q", self.kv_data_count))
684+
self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION))
685+
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count))
686+
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count))
659687
self.flush()
660688
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
661689

@@ -727,40 +755,27 @@ def add_array(self, key: str, val: Sequence[Any]):
727755
self.add_key(key)
728756
self.add_val(val, GGUFValueType.ARRAY)
729757

730-
_simple_value_packing = {
731-
GGUFValueType.UINT8: "<B",
732-
GGUFValueType.INT8: "<b",
733-
GGUFValueType.UINT16: "<H",
734-
GGUFValueType.INT16: "<h",
735-
GGUFValueType.UINT32: "<I",
736-
GGUFValueType.INT32: "<i",
737-
GGUFValueType.FLOAT32: "<f",
738-
GGUFValueType.UINT64: "<Q",
739-
GGUFValueType.INT64: "<q",
740-
GGUFValueType.FLOAT64: "<d",
741-
GGUFValueType.BOOL: "?" ,
742-
}
743758
def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True):
744759
if vtype is None:
745760
vtype = GGUFValueType.get_type(val)
746761

747762
if add_vtype:
748-
self.kv_data += struct.pack("<I", vtype)
763+
self.kv_data += struct.pack(f"{self.pack_prefix}I", vtype)
749764
self.kv_data_count += 1
750765

751766
pack_fmt = self._simple_value_packing.get(vtype)
752767
if pack_fmt is not None:
753768
self.kv_data += struct.pack(pack_fmt, val)
754769
elif vtype == GGUFValueType.STRING:
755770
encoded_val = val.encode("utf8") if isinstance(val, str) else val
756-
self.kv_data += struct.pack("<Q", len(encoded_val))
771+
self.kv_data += struct.pack(f"{self.pack_prefix}Q", len(encoded_val))
757772
self.kv_data += encoded_val
758773
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0:
759774
ltype = GGUFValueType.get_type(val[0])
760775
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
761776
raise ValueError("All items in a GGUF array should be of the same type")
762-
self.kv_data += struct.pack("<I", ltype)
763-
self.kv_data += struct.pack("<Q", len(val))
777+
self.kv_data += struct.pack(f"{self.pack_prefix}I", ltype)
778+
self.kv_data += struct.pack(f"{self.pack_prefix}Q", len(val))
764779
for item in val:
765780
self.add_val(item, add_vtype=False)
766781
else:
@@ -774,22 +789,24 @@ def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype:
774789
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
775790

776791
encoded_name = name.encode("utf8")
777-
self.ti_data += struct.pack("<Q", len(encoded_name))
792+
self.ti_data += struct.pack(f"{self.pack_prefix}Q", len(encoded_name))
778793
self.ti_data += encoded_name
779794
n_dims = len(tensor_shape)
780-
self.ti_data += struct.pack("<I", n_dims)
795+
self.ti_data += struct.pack(f"{self.pack_prefix}I", n_dims)
781796
for i in range(n_dims):
782-
self.ti_data += struct.pack("<Q", tensor_shape[n_dims - 1 - i])
797+
self.ti_data += struct.pack(f"{self.pack_prefix}Q", tensor_shape[n_dims - 1 - i])
783798
if raw_dtype is None:
784799
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
785800
else:
786801
dtype = raw_dtype
787-
self.ti_data += struct.pack("<I", dtype)
788-
self.ti_data += struct.pack("<Q", self.offset_tensor)
802+
self.ti_data += struct.pack(f"{self.pack_prefix}I", dtype)
803+
self.ti_data += struct.pack(f"{self.pack_prefix}Q", self.offset_tensor)
789804
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
790805
self.ti_data_count += 1
791806

792807
def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None):
808+
if self.endianess == GGUFEndian.BIG:
809+
tensor.byteswap(inplace=True)
793810
if self.use_temp_file and self.temp_file is None:
794811
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
795812
fp.seek(0)
@@ -815,6 +832,8 @@ def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
815832
fp.write(bytes([0] * pad))
816833

817834
def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
835+
if self.endianess==GGUFEndian.BIG:
836+
tensor.byteswap(inplace=True)
818837
self.write_padding(self.fout, self.fout.tell())
819838
tensor.tofile(self.fout)
820839
self.write_padding(self.fout, tensor.nbytes)

gguf-py/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "gguf"
3-
version = "0.4.4"
3+
version = "0.4.5"
44
description = "Write ML models in GGUF for GGML"
55
authors = ["GGML <[email protected]>"]
66
packages = [

k_quants.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ inline static int32_t vaddvq_s32(int32x4_t v) {
4646
#if defined(_MSC_VER) || defined(__MINGW32__)
4747
#include <intrin.h>
4848
#else
49-
#if !defined(__riscv)
49+
#if !defined(__riscv) && !defined(__s390__)
5050
#include <immintrin.h>
5151
#endif
5252
#endif

tests/test-double-float.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44

55
#undef NDEBUG
66
#include <cassert>
7+
#if !defined(__riscv) && !defined(__s390__)
78
#include <immintrin.h>
9+
#endif
810
#include <cmath>
911
#include <cstdint>
1012
#include <cstring>

0 commit comments

Comments
 (0)