Skip to content

Commit e112b61

Browse files
Eddie-Wang1120root
and
root
authored
llama : add support for BitnetForCausalLM (#7931)
* hf bitnet v1 * hf bitnet e2e v2 * finish bitnet e2e * finish f16 hf bitnet e2e * remove unsed * finish bitnet i2 e2e * move i2s to quantize v1 * move i2 to quantize * clean code * clean code 2 * fix codestyle * fix code * fix * fix code * fix merge * remove unused * change table name * fix whitespace * delete redundant * i2_s to absmax * finish i2_s/i8_s vec_dot x86 simd * i2s->q22 * fix code * remove block scale * add dequantize * fix seq * update avx2 * remove q2_2 * remove q22_grid * fix whitespace * reuse llm_build_kv * fix bo --------- Co-authored-by: root <root@wangjinheng>
1 parent 6a2f298 commit e112b61

File tree

4 files changed

+307
-1
lines changed

4 files changed

+307
-1
lines changed

convert-hf-to-gguf.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,6 +1404,48 @@ def write_tensors(self):
14041404
raise ValueError(f"Unprocessed experts: {experts}")
14051405

14061406

1407+
@Model.register("BitnetForCausalLM")
1408+
class BitnetModel(Model):
1409+
model_arch = gguf.MODEL_ARCH.BITNET
1410+
1411+
def set_vocab(self):
1412+
self._set_vocab_sentencepiece()
1413+
1414+
def set_gguf_parameters(self):
1415+
super().set_gguf_parameters()
1416+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
1417+
self.gguf_writer.add_rope_scaling_factor(1.0)
1418+
1419+
def weight_quant(self, weight):
1420+
dtype = weight.dtype
1421+
weight = weight.float()
1422+
s = 1 / weight.abs().mean().clamp(min=1e-5)
1423+
weight = (weight * s).round().clamp(-1, 1) / s
1424+
scale = weight.abs().max().unsqueeze(0)
1425+
weight = torch.where(weight.abs().less(1e-6), 0, weight).type(dtype)
1426+
weight = torch.sign(weight).type(dtype)
1427+
return weight.type(dtype), scale.type(torch.float32)
1428+
1429+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
1430+
new_name = self.map_tensor_name(name)
1431+
1432+
if any(self.match_model_tensor_name(new_name, key, bid) for key in [
1433+
gguf.MODEL_TENSOR.ATTN_Q,
1434+
gguf.MODEL_TENSOR.ATTN_K,
1435+
gguf.MODEL_TENSOR.ATTN_V,
1436+
gguf.MODEL_TENSOR.ATTN_OUT,
1437+
gguf.MODEL_TENSOR.FFN_UP,
1438+
gguf.MODEL_TENSOR.FFN_DOWN,
1439+
gguf.MODEL_TENSOR.FFN_GATE,
1440+
]):
1441+
# transform weight into 1/0/-1 (in fp32)
1442+
weight_torch, scale_torch = self.weight_quant(data_torch)
1443+
yield (new_name, weight_torch)
1444+
yield (new_name.removesuffix(".weight") + ".scale", scale_torch)
1445+
else:
1446+
yield (new_name, data_torch)
1447+
1448+
14071449
@Model.register("GrokForCausalLM")
14081450
class GrokModel(Model):
14091451
model_arch = gguf.MODEL_ARCH.GROK

gguf-py/gguf/constants.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ class MODEL_ARCH(IntEnum):
149149
OLMO = auto()
150150
ARCTIC = auto()
151151
DEEPSEEK2 = auto()
152+
BITNET = auto()
152153

153154

154155
class MODEL_TENSOR(IntEnum):
@@ -200,6 +201,8 @@ class MODEL_TENSOR(IntEnum):
200201
ATTN_KV_B = auto()
201202
ATTN_Q_A_NORM = auto()
202203
ATTN_KV_A_NORM = auto()
204+
FFN_SUB_NORM = auto()
205+
ATTN_SUB_NORM = auto()
203206

204207

205208
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
@@ -237,6 +240,7 @@ class MODEL_TENSOR(IntEnum):
237240
MODEL_ARCH.OLMO: "olmo",
238241
MODEL_ARCH.ARCTIC: "arctic",
239242
MODEL_ARCH.DEEPSEEK2: "deepseek2",
243+
MODEL_ARCH.BITNET: "bitnet",
240244
}
241245

242246
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@@ -288,6 +292,8 @@ class MODEL_TENSOR(IntEnum):
288292
MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
289293
MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
290294
MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
295+
MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
296+
MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
291297
}
292298

293299
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -808,6 +814,21 @@ class MODEL_TENSOR(IntEnum):
808814
MODEL_TENSOR.FFN_DOWN_SHEXP,
809815
MODEL_TENSOR.FFN_UP_SHEXP,
810816
],
817+
MODEL_ARCH.BITNET: [
818+
MODEL_TENSOR.ATTN_Q,
819+
MODEL_TENSOR.ATTN_K,
820+
MODEL_TENSOR.ATTN_V,
821+
MODEL_TENSOR.TOKEN_EMBD,
822+
MODEL_TENSOR.OUTPUT_NORM,
823+
MODEL_TENSOR.ATTN_NORM,
824+
MODEL_TENSOR.ATTN_OUT,
825+
MODEL_TENSOR.FFN_NORM,
826+
MODEL_TENSOR.FFN_GATE,
827+
MODEL_TENSOR.FFN_DOWN,
828+
MODEL_TENSOR.FFN_UP,
829+
MODEL_TENSOR.ATTN_SUB_NORM,
830+
MODEL_TENSOR.FFN_SUB_NORM,
831+
],
811832
# TODO
812833
}
813834

gguf-py/gguf/tensor_mapping.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,14 @@ class TensorNameMap:
413413
MODEL_TENSOR.ATTN_KV_A_NORM: (
414414
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
415415
),
416+
417+
MODEL_TENSOR.ATTN_SUB_NORM: (
418+
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
419+
),
420+
421+
MODEL_TENSOR.FFN_SUB_NORM: (
422+
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet
423+
),
416424
}
417425

418426
# architecture-specific block mappings

0 commit comments

Comments
 (0)