Skip to content

v0.3 pre版本 使用AMX yaml报错 #617

Open
@cunfate

Description

@cunfate

commmand:

python3 -m ktransformers.local_chat --model_path ~/ktransformers/deepseek-r1/ --gguf_path /models/unsloth --optimize_rule_path  /root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-amx.yaml --cpu_infer 92 --max_new_tokens 1000
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/local_chat.py", line 267, in <module>
    fire.Fire(local_chat)
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/fire/core.py", line 135, in Fire
    component_trace = _Fire(component, args, parsed_flag_args, context, name)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/fire/core.py", line 468, in _Fire
    component, remaining_args = _CallAndUpdateTrace(
                                ^^^^^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/fire/core.py", line 684, in _CallAndUpdateTrace
    component = fn(*varargs, **kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/local_chat.py", line 214, in local_chat
    optimize_and_load_gguf(model, optimize_rule_path, gguf_path, config)
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/optimize/optimize.py", line 129, in optimize_and_load_gguf
    load_weights(module, gguf_loader)
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/util/utils.py", line 83, in load_weights
    load_weights(child, gguf_loader, prefix+name+".")
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/util/utils.py", line 85, in load_weights
    module.load()
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/operators/base_operator.py", line 60, in load
    utils.load_weights(child, self.gguf_loader, self.key+".")
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/util/utils.py", line 83, in load_weights
    load_weights(child, gguf_loader, prefix+name+".")
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/util/utils.py", line 83, in load_weights
    load_weights(child, gguf_loader, prefix+name+".")
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/util/utils.py", line 83, in load_weights
    load_weights(child, gguf_loader, prefix+name+".")
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/util/utils.py", line 85, in load_weights
    module.load()
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/operators/base_operator.py", line 60, in load
    utils.load_weights(child, self.gguf_loader, self.key+".")
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/util/utils.py", line 83, in load_weights
    load_weights(child, gguf_loader, prefix+name+".")
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/util/utils.py", line 85, in load_weights
    module.load()
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/operators/experts.py", line 520, in load
    self.generate_experts.load(w)
  File "/root/miniconda3/envs/ktransformers/lib/python3.11/site-packages/ktransformers/operators/experts.py", line 201, in load
    assert self.gate_type == GGMLQuantizationType.BF16
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions