Skip to content

Commit bfd2f21

Browse files
committed
bitnet : replace 1.58b with b1.58, as in the paper
1 parent 0996149 commit bfd2f21

File tree

5 files changed

+7
-7
lines changed

5 files changed

+7
-7
lines changed

convert-hf-to-gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ def write_tensors(self):
300300

301301
if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
302302
# TODO: cleaner model-specific per-tensor types
303-
# NOTE: Q1_3 is only relevant for BitNet 1.58b
303+
# NOTE: Q1_3 is only relevant for BitNet b1.58
304304
if (
305305
self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3
306306
and gguf.can_quantize_to_q1_3(data)

examples/quantize/quantize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
2626
{ "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization", },
2727
{ "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization", },
2828
{ "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization", },
29-
{ "Q1_3", LLAMA_FTYPE_MOSTLY_Q1_3, " 1.63 bpw for BitNet 1.58b", },
30-
{ "Q2_2", LLAMA_FTYPE_MOSTLY_Q2_2, " 2.00 bpw for BitNet 1.58b", },
29+
{ "Q1_3", LLAMA_FTYPE_MOSTLY_Q1_3, " 1.63 bpw for BitNet b1.58", },
30+
{ "Q2_2", LLAMA_FTYPE_MOSTLY_Q2_2, " 2.00 bpw for BitNet b1.58", },
3131
{ "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.96G, +3.5199 ppl @ Llama-3-8B", },
3232
{ "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B", },
3333
{ "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization", },

ggml/src/ggml-common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ typedef sycl::half2 ggml_half2;
137137

138138
#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
139139

140-
// 1.625 bpw for BitNet 1.58b models
140+
// 1.625 bpw for BitNet b1.58 models
141141
#define QK1_3 64
142142
typedef struct {
143143
uint8_t q[(QK1_3 - 4*QK1_3/64)/5]; // 5 elements per byte (3^5 = 243 < 256)

ggml/src/ggml-quants.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3366,7 +3366,7 @@ size_t quantize_q2_2(const float * restrict src, void * restrict dst, int64_t nr
33663366
return nrow * row_size;
33673367
}
33683368

3369-
// ====================== 1.625 bpw (de)-quantization (BitNet 1.58b)
3369+
// ====================== 1.625 bpw (de)-quantization (BitNet b1.58)
33703370

33713371
void quantize_row_q1_3_reference(const float * restrict x, block_q1_3 * restrict y, int64_t k) {
33723372
assert(k % QK1_3 == 0);

src/llama.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4186,8 +4186,8 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
41864186
case LLAMA_FTYPE_ALL_F32: return "all F32";
41874187
case LLAMA_FTYPE_MOSTLY_F16: return "F16";
41884188
case LLAMA_FTYPE_MOSTLY_BF16: return "BF16";
4189-
case LLAMA_FTYPE_MOSTLY_Q1_3: return "Q1_3 - 1.625 bpw for BitNet 1.58b";
4190-
case LLAMA_FTYPE_MOSTLY_Q2_2: return "Q2_2 - 2.000 bpw for BitNet 1.58b";
4189+
case LLAMA_FTYPE_MOSTLY_Q1_3: return "Q1_3 - 1.625 bpw for BitNet b1.58";
4190+
case LLAMA_FTYPE_MOSTLY_Q2_2: return "Q2_2 - 2.000 bpw for BitNet b1.58";
41914191
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
41924192
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
41934193
case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16:

0 commit comments

Comments
 (0)