Skip to content

Commit e7f6997

Browse files
authored
Don't crash on ftype (formerly f16) == 4 (#917)
1 parent f76cb3a commit e7f6997

File tree

2 files changed

+4
-1
lines changed

2 files changed

+4
-1
lines changed

llama.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,9 @@ static const char *llama_ftype_name(enum llama_ftype ftype) {
827827
case LLAMA_FTYPE_MOSTLY_F16: return "mostly F16";
828828
case LLAMA_FTYPE_MOSTLY_Q4_0: return "mostly Q4_0";
829829
case LLAMA_FTYPE_MOSTLY_Q4_1: return "mostly Q4_1";
830-
default: LLAMA_ASSERT(false);
830+
case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16:
831+
return "mostly Q4_1, some F16";
832+
default: return "unknown, may not work";
831833
}
832834
}
833835

llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ extern "C" {
7171
LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
7272
LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
7373
LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
74+
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
7475
};
7576

7677
LLAMA_API struct llama_context_params llama_context_default_params();

0 commit comments

Comments
 (0)