Skip to content

Commit 954c544

Browse files
authored
FEAT: [Model] Support DeepSeek-V3.1 Quantization and tool (#4022)
1 parent d1a7ced commit 954c544

File tree

3 files changed

+126
-1
lines changed

3 files changed

+126
-1
lines changed

xinference/model/llm/llm_family.json

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4767,6 +4767,7 @@
47674767
{
47684768
"model_format": "pytorch",
47694769
"model_size_in_billions": 671,
4770+
"activated_size_in_billions": 37,
47704771
"model_src": {
47714772
"huggingface": {
47724773
"quantizations": [
@@ -4846,6 +4847,7 @@
48464847
{
48474848
"model_format": "pytorch",
48484849
"model_size_in_billions": 671,
4850+
"activated_size_in_billions": 37,
48494851
"model_src": {
48504852
"huggingface": {
48514853
"quantizations": [
@@ -4866,6 +4868,7 @@
48664868
{
48674869
"model_format": "awq",
48684870
"model_size_in_billions": 671,
4871+
"activated_size_in_billions": 37,
48694872
"model_src": {
48704873
"huggingface": {
48714874
"quantizations": [
@@ -4885,6 +4888,7 @@
48854888
{
48864889
"model_format": "ggufv2",
48874890
"model_size_in_billions": 671,
4891+
"activated_size_in_billions": 37,
48884892
"model_src": {
48894893
"huggingface": {
48904894
"quantizations": [
@@ -5215,6 +5219,7 @@
52155219
{
52165220
"model_format": "mlx",
52175221
"model_size_in_billions": 671,
5222+
"activated_size_in_billions": 37,
52185223
"model_src": {
52195224
"huggingface": {
52205225
"quantizations": [
@@ -5263,6 +5268,7 @@
52635268
{
52645269
"model_format": "pytorch",
52655270
"model_size_in_billions": 671,
5271+
"activated_size_in_billions": 37,
52665272
"model_src": {
52675273
"huggingface": {
52685274
"quantizations": [
@@ -5281,6 +5287,7 @@
52815287
{
52825288
"model_format": "gptq",
52835289
"model_size_in_billions": 671,
5290+
"activated_size_in_billions": 37,
52845291
"model_src": {
52855292
"huggingface": {
52865293
"quantizations": [
@@ -5311,6 +5318,116 @@
53115318
"reasoning_start_tag": "<think>",
53125319
"reasoning_end_tag": "</think>"
53135320
},
5321+
{
5322+
"version": 2,
5323+
"context_length": 131072,
5324+
"model_name": "Deepseek-V3.1",
5325+
"model_lang": [
5326+
"en",
5327+
"zh"
5328+
],
5329+
"model_ability": [
5330+
"chat",
5331+
"reasoning",
5332+
"hybrid",
5333+
"tools"
5334+
],
5335+
"model_description": "DeepSeek-V3.1 is a hybrid model that supports both thinking mode and non-thinking mode.",
5336+
"model_specs": [
5337+
{
5338+
"model_format": "pytorch",
5339+
"model_size_in_billions": 671,
5340+
"activated_size_in_billions": 37,
5341+
"model_src": {
5342+
"huggingface": {
5343+
"quantizations": [
5344+
"none"
5345+
],
5346+
"model_id": "deepseek-ai/DeepSeek-V3.1"
5347+
},
5348+
"modelscope": {
5349+
"quantizations": [
5350+
"none"
5351+
],
5352+
"model_id": "deepseek-ai/DeepSeek-V3.1"
5353+
}
5354+
}
5355+
},
5356+
{
5357+
"model_format": "gptq",
5358+
"model_size_in_billions": 671,
5359+
"activated_size_in_billions": 37,
5360+
"model_src": {
5361+
"huggingface": {
5362+
"quantizations": [
5363+
"Int4"
5364+
],
5365+
"model_id": "cpatonn/DeepSeek-V3.1-GPTQ-4bit"
5366+
},
5367+
"modelscope": {
5368+
"quantizations": [
5369+
"Int4"
5370+
],
5371+
"model_id": "cpatonn/DeepSeek-V3.1-GPTQ-4bit"
5372+
}
5373+
}
5374+
},
5375+
{
5376+
"model_format": "awq",
5377+
"model_size_in_billions": 671,
5378+
"activated_size_in_billions": 37,
5379+
"model_src": {
5380+
"huggingface": {
5381+
"quantizations": [
5382+
"Int4"
5383+
],
5384+
"model_id": "QuantTrio/DeepSeek-V3.1-AWQ"
5385+
},
5386+
"modelscope": {
5387+
"quantizations": [
5388+
"Int4"
5389+
],
5390+
"model_id": "tclf90/DeepSeek-V3.1-AWQ"
5391+
}
5392+
}
5393+
},
5394+
{
5395+
"model_format": "mlx",
5396+
"model_size_in_billions": 671,
5397+
"activated_size_in_billions": 37,
5398+
"model_src": {
5399+
"huggingface": {
5400+
"quantizations": [
5401+
"8bit",
5402+
"4bit"
5403+
],
5404+
"model_id": "mlx-community/DeepSeek-V3.1-{quantization}"
5405+
},
5406+
"modelscope": {
5407+
"quantizations": [
5408+
"8bit",
5409+
"4bit"
5410+
],
5411+
"model_id": "mlx-community/DeepSeek-V3.1-{quantization}"
5412+
}
5413+
}
5414+
}
5415+
],
5416+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<|Assistant|></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}} {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}",
5417+
"stop_token_ids": [
5418+
1
5419+
],
5420+
"stop": [
5421+
"<|end▁of▁sentence|>"
5422+
],
5423+
"reasoning_start_tag": "<think>",
5424+
"reasoning_end_tag": "</think>",
5425+
"virtualenv": {
5426+
"packages": [
5427+
"transformers==4.53.0"
5428+
]
5429+
}
5430+
},
53145431
{
53155432
"version": 2,
53165433
"context_length": 131072,
@@ -6242,6 +6359,7 @@
62426359
{
62436360
"model_format": "pytorch",
62446361
"model_size_in_billions": 671,
6362+
"activated_size_in_billions": 37,
62456363
"model_src": {
62466364
"huggingface": {
62476365
"quantizations": [
@@ -6262,6 +6380,7 @@
62626380
{
62636381
"model_format": "awq",
62646382
"model_size_in_billions": 671,
6383+
"activated_size_in_billions": 37,
62656384
"model_src": {
62666385
"huggingface": {
62676386
"quantizations": [
@@ -6281,6 +6400,7 @@
62816400
{
62826401
"model_format": "ggufv2",
62836402
"model_size_in_billions": 671,
6403+
"activated_size_in_billions": 37,
62846404
"model_src": {
62856405
"huggingface": {
62866406
"quantizations": [
@@ -6475,6 +6595,7 @@
64756595
{
64766596
"model_format": "mlx",
64776597
"model_size_in_billions": 671,
6598+
"activated_size_in_billions": 37,
64786599
"model_src": {
64796600
"huggingface": {
64806601
"quantizations": [
@@ -6517,6 +6638,7 @@
65176638
{
65186639
"model_format": "pytorch",
65196640
"model_size_in_billions": 671,
6641+
"activated_size_in_billions": 37,
65206642
"model_src": {
65216643
"huggingface": {
65226644
"quantizations": [
@@ -6535,6 +6657,7 @@
65356657
{
65366658
"model_format": "awq",
65376659
"model_size_in_billions": 671,
6660+
"activated_size_in_billions": 37,
65386661
"model_src": {
65396662
"huggingface": {
65406663
"quantizations": [
@@ -6553,6 +6676,7 @@
65536676
{
65546677
"model_format": "mlx",
65556678
"model_size_in_billions": 671,
6679+
"activated_size_in_billions": 37,
65566680
"model_src": {
65576681
"huggingface": {
65586682
"quantizations": [

xinference/model/llm/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@
8282
"HuatuoGPT-o1-LLaMA-3.1",
8383
]
8484

85-
DEEPSEEK_TOOL_CALL_FAMILY = ["deepseek-v3", "deepseek-r1-0528"]
85+
DEEPSEEK_TOOL_CALL_FAMILY = ["deepseek-v3", "deepseek-r1-0528", "Deepseek-V3.1"]
8686

8787
TOOL_CALL_FAMILY = (
8888
QWEN_TOOL_CALL_FAMILY

xinference/model/llm/vllm/core.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ class VLLMGenerateConfig(TypedDict, total=False):
273273
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Instruct")
274274
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Thinking")
275275
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Coder")
276+
VLLM_SUPPORTED_CHAT_MODELS.append("Deepseek-V3.1")
276277

277278
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
278279
VLLM_SUPPORTED_CHAT_MODELS.append("glm-4.5")

0 commit comments

Comments
 (0)