diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index 70b17daa61..471b4febc3 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -6874,7 +6874,7 @@ "model_id":"Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8", "model_revision":"3d152a77eaccfd72d59baedb0b183a1b8fd56e48" }, - { + { "model_format":"gptq", "model_size_in_billions":7, "quantizations":[ @@ -6883,7 +6883,7 @@ "model_id":"Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4", "model_revision":"5ab897112fa83b9699826be8753ef9184585c77d" }, - { + { "model_format":"awq", "model_size_in_billions":7, "quantizations":[ @@ -6891,6 +6891,31 @@ ], "model_id":"Qwen/Qwen2-VL-7B-Instruct-AWQ", "model_revision":"f94216e8b513933bccd567bcd9b7350199f32538" + }, + { + "model_format":"pytorch", + "model_size_in_billions":72, + "quantizations":[ + "none" + ], + "model_id":"Qwen/Qwen2-VL-72B-Instruct" + }, + { + "model_format":"awq", + "model_size_in_billions":72, + "quantizations":[ + "Int4" + ], + "model_id":"Qwen/Qwen2-VL-72B-Instruct-AWQ" + }, + { + "model_format":"gptq", + "model_size_in_billions":72, + "quantizations":[ + "Int4", + "Int8" + ], + "model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}" } ], "prompt_style":{ @@ -7247,7 +7272,99 @@ }, { "version": 1, - "context_length": 131072, + "context_length": 32768, + "model_name": "qwen2.5", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "generate" + ], + "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": "0_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-0.5B", + "model_revision": "2630d3d2321bc1f1878f702166d1b2af019a7310" + }, + { + "model_format": "pytorch", + "model_size_in_billions": "1_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-1.5B", + "model_revision": "e5dfabbcffd9b0c7b31d89b82c5a6b72e663f32c" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 3, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-3B", + "model_revision": "e4aa5ac50aa507415cda96cc99eb77ad0a3d2d34" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-7B", + "model_revision": "09a0bac5707b43ec44508eab308b0846320c1ed4" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 14, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-14B", + "model_revision": "d02b64ba1ce86bf9948668a13f82709600431ccc" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 32, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-32B", + "model_revision": "ff23665d01c3665be5fdb271d18a62090b65c06d" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 72, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-72B", + "model_revision": "587cc4061cf6a7cc0d429d05c109447e5cf063af" + } + ] + }, + { + "version": 1, + "context_length": 32768, "model_name": "qwen2.5-instruct", "model_lang": [ "en", @@ -7459,11 +7576,10 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "Qwen/Qwen2.5-0.5B-Instruct-GGUF", - "model_file_name_template": "qwen2_5-0_5b-instruct-{quantization}.gguf" + "model_file_name_template": "qwen2.5-0.5b-instruct-{quantization}.gguf" }, { "model_format": "ggufv2", @@ -7476,11 +7592,10 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "Qwen/Qwen2.5-1.5B-Instruct-GGUF", - "model_file_name_template": "qwen2_5-1_5b-instruct-{quantization}.gguf" + "model_file_name_template": "qwen2.5-1.5b-instruct-{quantization}.gguf" }, { "model_format": "ggufv2", @@ -7493,11 +7608,10 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "Qwen/Qwen2.5-3B-Instruct-GGUF", - "model_file_name_template": "qwen2_5-3b-instruct-{quantization}.gguf" + "model_file_name_template": "qwen2.5-3b-instruct-{quantization}.gguf" }, { "model_format": "ggufv2", @@ -7510,11 +7624,37 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "Qwen/Qwen2.5-7B-Instruct-GGUF", - "model_file_name_template": "qwen2_5-7b-instruct-{quantization}.gguf" + "model_file_name_template": "qwen2.5-7b-instruct-{quantization}.gguf", + "model_file_name_split_template": "qwen2.5-7b-instruct-{quantization}-{part}.gguf", + "quantization_parts": { + "q4_0": [ + "00001-of-00002", + "00002-of-00002" + ], + "q4_k_m": [ + "00001-of-00002", + "00002-of-00002" + ], + "q5_0": [ + "00001-of-00002", + "00002-of-00002" + ], + "q5_k_m": [ + "00001-of-00002", + "00002-of-00002" + ], + "q6_k": [ + "00001-of-00002", + "00002-of-00002" + ], + "q8_0": [ + "00001-of-00002", + "00002-of-00002" + ] + } }, { "model_format": "ggufv2", @@ -7527,11 +7667,53 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "Qwen/Qwen2.5-14B-Instruct-GGUF", - "model_file_name_template": "qwen2_5-14b-instruct-{quantization}.gguf" + "model_file_name_template": "qwen2.5-14b-instruct-{quantization}.gguf", + "model_file_name_split_template": "qwen2.5-14b-instruct-{quantization}-{part}.gguf", + "quantization_parts": { + "q2_k": [ + "00001-of-00002", + "00002-of-00002" + ], + "q3_k_m": [ + "00001-of-00002", + "00002-of-00002" + ], + "q4_0": [ + "00001-of-00003", + "00002-of-00003", + "00003-of-00003" + ], + "q4_k_m": [ + "00001-of-00003", + "00002-of-00003", + "00003-of-00003" + ], + "q5_0": [ + "00001-of-00003", + "00002-of-00003", + "00003-of-00003" + ], + "q5_k_m": [ + "00001-of-00003", + "00002-of-00003", + "00003-of-00003" + ], + "q6_k": [ + "00001-of-00004", + "00002-of-00004", + "00003-of-00004", + "00004-of-00004" + ], + "q8_0": [ + "00001-of-00004", + "00002-of-00004", + "00003-of-00004", + "00004-of-00004" + ] + } }, { "model_format": "ggufv2", @@ -7544,11 +7726,76 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "Qwen/Qwen2.5-32B-Instruct-GGUF", - "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf" + "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf", + "model_file_name_split_template": "qwen2.5-32b-instruct-{quantization}-{part}.gguf", + "quantization_parts": { + "q2_k": [ + "00001-of-00004", + "00002-of-00004", + "00003-of-00004", + "00004-of-00004" + ], + "q3_k_m": [ + "00001-of-00005", + "00002-of-00005", + "00003-of-00005", + "00004-of-00005", + "00005-of-00005" + ], + "q4_0": [ + "00001-of-00005", + "00002-of-00005", + "00003-of-00005", + "00004-of-00005", + "00005-of-00005" + ], + "q4_k_m": [ + "00001-of-00005", + "00002-of-00005", + "00003-of-00005", + "00004-of-00005", + "00005-of-00005" + ], + "q5_0": [ + "00001-of-00006", + "00002-of-00006", + "00003-of-00006", + "00004-of-00006", + "00005-of-00006", + "00006-of-00006" + ], + "q5_k_m": [ + "00001-of-00006", + "00002-of-00006", + "00003-of-00006", + "00004-of-00006", + "00005-of-00006", + "00006-of-00006" + ], + "q6_k": [ + "00001-of-00007", + "00002-of-00007", + "00003-of-00007", + "00004-of-00007", + "00005-of-00007", + "00006-of-00007", + "00007-of-00007" + ], + "q8_0": [ + "00001-of-00009", + "00002-of-00009", + "00003-of-00009", + "00004-of-00009", + "00005-of-00009", + "00006-of-00009", + "00007-of-00009", + "00008-of-00009", + "00009-of-00009" + ] + } }, { "model_format": "ggufv2", @@ -7566,8 +7813,254 @@ ], "model_id": "Qwen/Qwen2.5-72B-Instruct-GGUF", "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf", - "model_file_name_split_template": "qwen2_5-72b-instruct-{quantization}-{part}.gguf", + "model_file_name_split_template": "qwen2.5-72b-instruct-{quantization}-{part}.gguf", + "quantization_parts": { + "q2_k": [ + "00001-of-00007", + "00002-of-00007", + "00003-of-00007", + "00004-of-00007", + "00005-of-00007", + "00006-of-00007", + "00007-of-00007" + ], + "q3_k_m": [ + "00001-of-00009", + "00002-of-00009", + "00003-of-00009", + "00004-of-00009", + "00005-of-00009", + "00006-of-00009", + "00007-of-00009", + "00008-of-00009", + "00009-of-00009" + ], + "q4_0": [ + "00001-of-00011", + "00002-of-00011", + "00003-of-00011", + "00004-of-00011", + "00005-of-00011", + "00006-of-00011", + "00007-of-00011", + "00008-of-00011", + "00009-of-00011", + "00010-of-00011", + "00011-of-00011" + ], + "q4_k_m": [ + "00001-of-00012", + "00002-of-00012", + "00003-of-00012", + "00004-of-00012", + "00005-of-00012", + "00006-of-00012", + "00007-of-00012", + "00008-of-00012", + "00009-of-00012", + "00010-of-00012", + "00011-of-00012", + "00012-of-00012" + ], + "q5_0": [ + "00001-of-00013", + "00002-of-00013", + "00003-of-00013", + "00004-of-00013", + "00005-of-00013", + "00006-of-00013", + "00007-of-00013", + "00008-of-00013", + "00009-of-00013", + "00010-of-00013", + "00011-of-00013", + "00012-of-00013", + "00013-of-00013" + ], + "q5_k_m": [ + "00001-of-00014", + "00002-of-00014", + "00003-of-00014", + "00004-of-00014", + "00005-of-00014", + "00006-of-00014", + "00007-of-00014", + "00008-of-00014", + "00009-of-00014", + "00010-of-00014", + "00011-of-00014", + "00012-of-00014", + "00013-of-00014", + "00014-of-00014" + ], + "q6_k": [ + "00001-of-00016", + "00002-of-00016", + "00003-of-00016", + "00004-of-00016", + "00005-of-00016", + "00006-of-00016", + "00007-of-00016", + "00008-of-00016", + "00009-of-00016", + "00010-of-00016", + "00011-of-00016", + "00012-of-00016", + "00013-of-00016", + "00014-of-00016", + "00015-of-00016", + "00016-of-00016" + ], + "q8_0": [ + "00001-of-00021", + "00002-of-00021", + "00003-of-00021", + "00004-of-00021", + "00005-of-00021", + "00006-of-00021", + "00007-of-00021", + "00008-of-00021", + "00009-of-00021", + "00010-of-00021", + "00011-of-00021", + "00012-of-00021", + "00013-of-00021", + "00014-of-00021", + "00015-of-00021", + "00016-of-00021", + "00017-of-00021", + "00018-of-00021", + "00019-of-00021", + "00020-of-00021", + "00021-of-00021" + ] + } + } + ], + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{{\\\"name\\\": , \\\"arguments\\\": }}\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "stop_token_ids": [ + 151643, + 151644, + 151645 + ], + "stop": [ + "<|endoftext|>", + "<|im_start|>", + "<|im_end|>" + ] + }, + { + "version": 1, + "context_length": 32768, + "model_name": "qwen2.5-coder", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "generate" + ], + "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": "1_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-Coder-1.5B", + "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-Coder-7B", + "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08" + } + ] + }, + { + "version": 1, + "context_length": 32768, + "model_name": "qwen2.5-coder-instruct", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "chat", + "tools" + ], + "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": "1_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": "1_5", + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0" + ], + "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF", + "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0" + ], + "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF", + "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf", + "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf", "quantization_parts": { + "q4_0": [ + "00001-of-00002", + "00002-of-00002" + ], + "q4_k_m": [ + "00001-of-00002", + "00002-of-00002" + ], "q5_0": [ "00001-of-00002", "00002-of-00002" @@ -7581,19 +8074,14 @@ "00002-of-00002" ], "q8_0": [ - "00001-of-00002", - "00002-of-00002" - ], - "fp16": [ - "00001-of-00004", - "00002-of-00004", - "00003-of-00004", - "00004-of-00004" + "00001-of-00003", + "00002-of-00003", + "00003-of-00003" ] } } ], - "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{{\\\"name\\\": , \\\"arguments\\\": }}\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{{\\\"name\\\": , \\\"arguments\\\": }}\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", "stop_token_ids": [ 151643, 151644, diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json index 7309ee9651..daf726e8c7 100644 --- a/xinference/model/llm/llm_family_modelscope.json +++ b/xinference/model/llm/llm_family_modelscope.json @@ -4602,6 +4602,34 @@ "model_hub": "modelscope", "model_id":"qwen/Qwen2-VL-2B-Instruct-AWQ", "model_revision":"master" + }, + { + "model_format":"pytorch", + "model_size_in_billions":72, + "quantizations":[ + "none" + ], + "model_id":"qwen/Qwen2-VL-72B-Instruct", + "model_hub": "modelscope" + }, + { + "model_format":"awq", + "model_size_in_billions":72, + "quantizations":[ + "Int4" + ], + "model_id":"qwen/Qwen2-VL-72B-Instruct-AWQ", + "model_hub": "modelscope" + }, + { + "model_format":"gptq", + "model_size_in_billions":72, + "quantizations":[ + "Int4", + "Int8" + ], + "model_id":"qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}", + "model_hub": "modelscope" } ], "prompt_style": { @@ -4960,7 +4988,106 @@ }, { "version": 1, - "context_length": 131072, + "context_length": 32768, + "model_name": "qwen2.5", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "generate" + ], + "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": "0_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-0.5B", + "model_revision": "master", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": "1_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-1.5B", + "model_revision": "master", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 3, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-3B", + "model_revision": "master", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-7B", + "model_revision": "master", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 14, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-14B", + "model_revision": "master", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 32, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-32B", + "model_revision": "master", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 72, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-72B", + "model_revision": "master", + "model_hub": "modelscope" + } + ] + }, + { + "version": 1, + "context_length": 32768, "model_name": "qwen2.5-instruct", "model_lang": [ "en", @@ -5193,11 +5320,10 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "qwen/Qwen2.5-0.5B-Instruct-GGUF", - "model_file_name_template": "qwen2_5-0_5b-instruct-{quantization}.gguf", + "model_file_name_template": "qwen2.5-0.5b-instruct-{quantization}.gguf", "model_hub": "modelscope" }, { @@ -5211,11 +5337,10 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "qwen/Qwen2.5-1.5B-Instruct-GGUF", - "model_file_name_template": "qwen2_5-1_5b-instruct-{quantization}.gguf", + "model_file_name_template": "qwen2.5-1.5b-instruct-{quantization}.gguf", "model_hub": "modelscope" }, { @@ -5229,11 +5354,10 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "qwen/Qwen2.5-3B-Instruct-GGUF", - "model_file_name_template": "qwen2_5-3b-instruct-{quantization}.gguf", + "model_file_name_template": "qwen2.5-3b-instruct-{quantization}.gguf", "model_hub": "modelscope" }, { @@ -5247,12 +5371,38 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "qwen/Qwen2.5-7B-Instruct-GGUF", "model_file_name_template": "qwen2_5-7b-instruct-{quantization}.gguf", - "model_hub": "modelscope" + "model_hub": "modelscope", + "model_file_name_split_template": "qwen2.5-7b-instruct-{quantization}-{part}.gguf", + "quantization_parts": { + "q4_0": [ + "00001-of-00002", + "00002-of-00002" + ], + "q4_k_m": [ + "00001-of-00002", + "00002-of-00002" + ], + "q5_0": [ + "00001-of-00002", + "00002-of-00002" + ], + "q5_k_m": [ + "00001-of-00002", + "00002-of-00002" + ], + "q6_k": [ + "00001-of-00002", + "00002-of-00002" + ], + "q8_0": [ + "00001-of-00002", + "00002-of-00002" + ] + } }, { "model_format": "ggufv2", @@ -5265,11 +5415,53 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "qwen/Qwen2.5-14B-Instruct-GGUF", - "model_file_name_template": "qwen2_5-14b-instruct-{quantization}.gguf", + "model_file_name_template": "qwen2.5-14b-instruct-{quantization}.gguf", + "model_file_name_split_template": "qwen2.5-14b-instruct-{quantization}-{part}.gguf", + "quantization_parts": { + "q2_k": [ + "00001-of-00002", + "00002-of-00002" + ], + "q3_k_m": [ + "00001-of-00002", + "00002-of-00002" + ], + "q4_0": [ + "00001-of-00003", + "00002-of-00003", + "00003-of-00003" + ], + "q4_k_m": [ + "00001-of-00003", + "00002-of-00003", + "00003-of-00003" + ], + "q5_0": [ + "00001-of-00003", + "00002-of-00003", + "00003-of-00003" + ], + "q5_k_m": [ + "00001-of-00003", + "00002-of-00003", + "00003-of-00003" + ], + "q6_k": [ + "00001-of-00004", + "00002-of-00004", + "00003-of-00004", + "00004-of-00004" + ], + "q8_0": [ + "00001-of-00004", + "00002-of-00004", + "00003-of-00004", + "00004-of-00004" + ] + }, "model_hub": "modelscope" }, { @@ -5283,11 +5475,76 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "qwen/Qwen2.5-32B-Instruct-GGUF", "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf", + "model_file_name_split_template": "qwen2.5-32b-instruct-{quantization}-{part}.gguf", + "quantization_parts": { + "q2_k": [ + "00001-of-00004", + "00002-of-00004", + "00003-of-00004", + "00004-of-00004" + ], + "q3_k_m": [ + "00001-of-00005", + "00002-of-00005", + "00003-of-00005", + "00004-of-00005", + "00005-of-00005" + ], + "q4_0": [ + "00001-of-00005", + "00002-of-00005", + "00003-of-00005", + "00004-of-00005", + "00005-of-00005" + ], + "q4_k_m": [ + "00001-of-00005", + "00002-of-00005", + "00003-of-00005", + "00004-of-00005", + "00005-of-00005" + ], + "q5_0": [ + "00001-of-00006", + "00002-of-00006", + "00003-of-00006", + "00004-of-00006", + "00005-of-00006", + "00006-of-00006" + ], + "q5_k_m": [ + "00001-of-00006", + "00002-of-00006", + "00003-of-00006", + "00004-of-00006", + "00005-of-00006", + "00006-of-00006" + ], + "q6_k": [ + "00001-of-00007", + "00002-of-00007", + "00003-of-00007", + "00004-of-00007", + "00005-of-00007", + "00006-of-00007", + "00007-of-00007" + ], + "q8_0": [ + "00001-of-00009", + "00002-of-00009", + "00003-of-00009", + "00004-of-00009", + "00005-of-00009", + "00006-of-00009", + "00007-of-00009", + "00008-of-00009", + "00009-of-00009" + ] + }, "model_hub": "modelscope" }, { @@ -5301,40 +5558,288 @@ "q5_0", "q5_k_m", "q6_k", - "q8_0", - "fp16" + "q8_0" ], "model_id": "qwen/Qwen2.5-72B-Instruct-GGUF", "model_hub": "modelscope", "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf", - "model_file_name_split_template": "qwen2_5-72b-instruct-{quantization}-{part}.gguf", + "model_file_name_split_template": "qwen2.5-72b-instruct-{quantization}-{part}.gguf", "quantization_parts": { + "q2_k": [ + "00001-of-00007", + "00002-of-00007", + "00003-of-00007", + "00004-of-00007", + "00005-of-00007", + "00006-of-00007", + "00007-of-00007" + ], + "q3_k_m": [ + "00001-of-00009", + "00002-of-00009", + "00003-of-00009", + "00004-of-00009", + "00005-of-00009", + "00006-of-00009", + "00007-of-00009", + "00008-of-00009", + "00009-of-00009" + ], + "q4_0": [ + "00001-of-00011", + "00002-of-00011", + "00003-of-00011", + "00004-of-00011", + "00005-of-00011", + "00006-of-00011", + "00007-of-00011", + "00008-of-00011", + "00009-of-00011", + "00010-of-00011", + "00011-of-00011" + ], + "q4_k_m": [ + "00001-of-00012", + "00002-of-00012", + "00003-of-00012", + "00004-of-00012", + "00005-of-00012", + "00006-of-00012", + "00007-of-00012", + "00008-of-00012", + "00009-of-00012", + "00010-of-00012", + "00011-of-00012", + "00012-of-00012" + ], "q5_0": [ + "00001-of-00013", + "00002-of-00013", + "00003-of-00013", + "00004-of-00013", + "00005-of-00013", + "00006-of-00013", + "00007-of-00013", + "00008-of-00013", + "00009-of-00013", + "00010-of-00013", + "00011-of-00013", + "00012-of-00013", + "00013-of-00013" + ], + "q5_k_m": [ + "00001-of-00014", + "00002-of-00014", + "00003-of-00014", + "00004-of-00014", + "00005-of-00014", + "00006-of-00014", + "00007-of-00014", + "00008-of-00014", + "00009-of-00014", + "00010-of-00014", + "00011-of-00014", + "00012-of-00014", + "00013-of-00014", + "00014-of-00014" + ], + "q6_k": [ + "00001-of-00016", + "00002-of-00016", + "00003-of-00016", + "00004-of-00016", + "00005-of-00016", + "00006-of-00016", + "00007-of-00016", + "00008-of-00016", + "00009-of-00016", + "00010-of-00016", + "00011-of-00016", + "00012-of-00016", + "00013-of-00016", + "00014-of-00016", + "00015-of-00016", + "00016-of-00016" + ], + "q8_0": [ + "00001-of-00021", + "00002-of-00021", + "00003-of-00021", + "00004-of-00021", + "00005-of-00021", + "00006-of-00021", + "00007-of-00021", + "00008-of-00021", + "00009-of-00021", + "00010-of-00021", + "00011-of-00021", + "00012-of-00021", + "00013-of-00021", + "00014-of-00021", + "00015-of-00021", + "00016-of-00021", + "00017-of-00021", + "00018-of-00021", + "00019-of-00021", + "00020-of-00021", + "00021-of-00021" + ] + } + } + ], + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{{\\\"name\\\": , \\\"arguments\\\": }}\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "stop_token_ids": [ + 151643, + 151644, + 151645 + ], + "stop": [ + "<|endoftext|>", + "<|im_start|>", + "<|im_end|>" + ] + }, + { + "version": 1, + "context_length": 32768, + "model_name": "qwen2.5-coder", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "generate" + ], + "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": "1_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-Coder-1.5B", + "model_revision": "master", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-Coder-7B", + "model_revision": "master", + "model_hub": "modelscope" + } + ] + }, + { + "version": 1, + "context_length": 32768, + "model_name": "qwen2.5-coder-instruct", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "chat", + "tools" + ], + "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": "1_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct", + "model_revision": "master", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-Coder-7B-Instruct", + "model_revision": "master", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": "1_5", + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0" + ], + "model_hub": "modelscope", + "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF", + "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0" + ], + "model_hub": "modelscope", + "model_id": "qwen/Qwen2.5-Coder-7B-Instruct-GGUF", + "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf", + "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf", + "quantization_parts": { + "q4_0": [ "00001-of-00002", "00002-of-00002" ], - "q5_k_m": [ + "q4_k_m": [ "00001-of-00002", "00002-of-00002" ], - "q6_k": [ + "q5_0": [ "00001-of-00002", "00002-of-00002" ], - "q8_0": [ + "q5_k_m": [ "00001-of-00002", "00002-of-00002" ], - "fp16": [ - "00001-of-00004", - "00002-of-00004", - "00003-of-00004", - "00004-of-00004" + "q6_k": [ + "00001-of-00002", + "00002-of-00002" + ], + "q8_0": [ + "00001-of-00003", + "00002-of-00003", + "00003-of-00003" ] } } ], - "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{{\\\"name\\\": , \\\"arguments\\\": }}\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{{\\\"name\\\": , \\\"arguments\\\": }}\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", "stop_token_ids": [ 151643, 151644, diff --git a/xinference/model/llm/sglang/core.py b/xinference/model/llm/sglang/core.py index 621b9b0a59..a413f2ad0f 100644 --- a/xinference/model/llm/sglang/core.py +++ b/xinference/model/llm/sglang/core.py @@ -68,6 +68,8 @@ class SGLANGGenerateConfig(TypedDict, total=False): "llama-3.1", "mistral-v0.1", "mixtral-v0.1", + "qwen2.5", + "qwen2.5-coder", ] SGLANG_SUPPORTED_CHAT_MODELS = [ "llama-2-chat", @@ -85,6 +87,8 @@ class SGLANGGenerateConfig(TypedDict, total=False): "deepseek-v2.5", "deepseek-v2-chat", "deepseek-v2-chat-0628", + "qwen2.5-instruct", + "qwen2.5-coder-instruct", ] diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py index 3aaee0738f..8b28701778 100644 --- a/xinference/model/llm/vllm/core.py +++ b/xinference/model/llm/vllm/core.py @@ -138,7 +138,11 @@ class VLLMGenerateConfig(TypedDict, total=False): VLLM_SUPPORTED_MODELS.append("codeqwen1.5") VLLM_SUPPORTED_CHAT_MODELS.append("codeqwen1.5-chat") VLLM_SUPPORTED_CHAT_MODELS.append("qwen2-instruct") + VLLM_SUPPORTED_MODELS.append("qwen2.5") VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-instruct") + VLLM_SUPPORTED_MODELS.append("qwen2.5-coder") + VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-coder-instruct") + if VLLM_INSTALLED and vllm.__version__ >= "0.3.2": VLLM_SUPPORTED_CHAT_MODELS.append("gemma-it")