diff --git a/doc/source/models/builtin/index.rst b/doc/source/models/builtin/index.rst index dc60921fc2..b50560c3f2 100644 --- a/doc/source/models/builtin/index.rst +++ b/doc/source/models/builtin/index.rst @@ -48,6 +48,7 @@ Code Generation Models - :ref:`Code-Llama ` - :ref:`CodeLlama-Instruct ` - :ref:`Code-Llama-Python ` +- :ref:`WizardCoder-Python-v1.0 ` Code Assistant Models @@ -88,6 +89,7 @@ Code Assistant Models vicuna-v1.5-16k wizardlm-v1.0 wizardmath-v1.0 + wizardcoder-python-v1.0 Embedding Models diff --git a/doc/source/models/builtin/wizardcoder-python-v1.0.rst b/doc/source/models/builtin/wizardcoder-python-v1.0.rst new file mode 100644 index 0000000000..1970bfc81d --- /dev/null +++ b/doc/source/models/builtin/wizardcoder-python-v1.0.rst @@ -0,0 +1,106 @@ +.. _models_builtin_wizardcoder_python_v1_0: + +======================= +WizardCoder-Python-v1.0 +======================= + +- **Context Length:** 100000 +- **Model Name:** wizardcoder-python-v1.0 +- **Languages:** en +- **Abilities:** generate, chat + +Specifications +^^^^^^^^^^^^^^ + +Model Spec 1 (pytorch, 7 Billion) ++++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 7 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** WizardLM/WizardCoder-Python-7B-V1.0 + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 7 --model-format pytorch --quantization ${quantization} + +.. note:: + + 4-bit quantization is not supported on macOS. + + +Model Spec 2 (pytorch, 13 Billion) +++++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 13 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** WizardLM/WizardCoder-Python-13B-V1.0 + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 13 --model-format pytorch --quantization ${quantization} + +.. note:: + + 4-bit quantization is not supported on macOS. + +Model Spec 3 (pytorch, 34 Billion) +++++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 34 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** WizardLM/WizardCoder-Python-34B-V1.0 + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 34 --model-format pytorch --quantization ${quantization} + +.. note:: + + 4-bit quantization is not supported on macOS. + +Model Spec 4 (ggufv2, 7 Billion) +++++++++++++++++++++++++++++++++ + +- **Model Format:** ggufv2 +- **Model Size (in billions):** 7 +- **Quantizations:** Q2_K, Q3_K_L, Q3_K_M, Q3_K_S, Q4_0, Q4_K_M, Q4_K_S, Q5_0, Q5_K_M, Q5_K_S, Q6_K, Q8_0 +- **Model ID:** TheBloke/WizardCoder-Python-7B-V1.0-GGUF + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 7 --model-format ggufv2 --quantization ${quantization} + +Model Spec 5 (ggufv2, 13 Billion) ++++++++++++++++++++++++++++++++++ + +- **Model Format:** ggufv2 +- **Model Size (in billions):** 13 +- **Quantizations:** Q2_K, Q3_K_L, Q3_K_M, Q3_K_S, Q4_0, Q4_K_M, Q4_K_S, Q5_0, Q5_K_M, Q5_K_S, Q6_K, Q8_0 +- **Model ID:** TheBloke/WizardCoder-Python-13B-V1.0-GGUF +- **File Name Template:** wizardcoder-python-13b-v1.0.{quantization}.gguf + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 13 --model-format ggufv2 --quantization ${quantization} + +Model Spec 6 (ggufv2, 34 Billion) ++++++++++++++++++++++++++++++++++ + +- **Model Format:** ggufv2 +- **Model Size (in billions):** 34 +- **Quantizations:** Q2_K, Q3_K_L, Q3_K_M, Q3_K_S, Q4_0, Q4_K_M, Q4_K_S, Q5_0, Q5_K_M, Q5_K_S, Q6_K, Q8_0 +- **Model ID:** TheBloke/WizardCoder-Python-34B-V1.0-GGUF +- **File Name Template:** wizardcoder-python-34b-v1.0.{quantization}.gguf + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 34 --model-format ggufv2 --quantization ${quantization} \ No newline at end of file diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index e5552c3ccc..4d02d638fa 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -1905,12 +1905,15 @@ "prompt_style": { "style_name": "INSTRUCTION", "system_prompt": "You are a professional translator. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. Do not translate person's name. Do not add any additional text to the translation. Do not give me any comments or suggestions.\nUser:\n\n{0}\nAssistant:", - "roles": ["User", "Assistant"], + "roles": [ + "User", + "Assistant" + ], "intra_message_sep": "", "inter_message_sep": "" } }, - { + { "version": 1, "context_length": 100000, "model_name": "glaive-coder", @@ -1947,5 +1950,124 @@ 2 ] } + }, + { + "version": 1, + "context_length": 100000, + "model_name": "wizardcoder-python-v1.0", + "model_lang": [ + "en" + ], + "model_ability": [ + "generate", + "chat" + ], + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "WizardLM/WizardCoder-Python-7B-V1.0", + "model_revision": "e40673a27a4aefcff2c6d2b3b1e0681a38703e4e" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 13, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "WizardLM/WizardCoder-Python-13B-V1.0", + "model_revision": "d920d26e2108377de0f676a3c4be666f5212f4a1" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 34, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "WizardLM/WizardCoder-Python-34B-V1.0", + "model_revision": "d869ce178715f8d6e8141e2ed50e6290985eedb0" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "Q2_K", + "Q3_K_L", + "Q3_K_M", + "Q3_K_S", + "Q4_0", + "Q4_K_M", + "Q4_K_S", + "Q5_0", + "Q5_K_M", + "Q5_K_S", + "Q6_K", + "Q8_0" + ], + "model_id": "TheBloke/WizardCoder-Python-7B-V1.0-GGUF", + "model_file_name_template": "wizardcoder-python-7b-v1.0.{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 13, + "quantizations": [ + "Q2_K", + "Q3_K_L", + "Q3_K_M", + "Q3_K_S", + "Q4_0", + "Q4_K_M", + "Q4_K_S", + "Q5_0", + "Q5_K_M", + "Q5_K_S", + "Q6_K", + "Q8_0" + ], + "model_id": "TheBloke/WizardCoder-Python-13B-V1.0-GGUF", + "model_file_name_template": "wizardcoder-python-13b-v1.0.{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 34, + "quantizations": [ + "Q2_K", + "Q3_K_L", + "Q3_K_M", + "Q3_K_S", + "Q4_0", + "Q4_K_M", + "Q4_K_S", + "Q5_0", + "Q5_K_M", + "Q5_K_S", + "Q6_K", + "Q8_0" + ], + "model_id": "TheBloke/WizardCoder-Python-34B-V1.0-GGUF", + "model_file_name_template": "wizardcoder-python-34b-v1.0.{quantization}.gguf" + } + ], + "prompt_style": { + "style_name": "ADD_COLON_SINGLE", + "system_prompt": "Below is an instruction that describes a task. Write a response that appropriately completes the request.", + "roles": [ + "Instruction", + "Response" + ], + "intra_message_sep": "\n\n### ", + "stop": [ + "" + ] + } } ] diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json index 7fbe0d360f..594b27af5e 100644 --- a/xinference/model/llm/llm_family_modelscope.json +++ b/xinference/model/llm/llm_family_modelscope.json @@ -424,5 +424,55 @@ "" ] } + }, + { + "version": 1, + "context_length": 100000, + "model_name": "wizardcoder-python-v1.0", + "model_lang": [ + "en" + ], + "model_ability": [ + "generate", + "chat" + ], + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 13, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_hub": "modelscope", + "model_id": "AI-ModelScope/WizardCoder-Python-13B-V1.0", + "model_revision": "v1.0.0" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 34, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_hub": "modelscope", + "model_id": "AI-ModelScope/WizardCoder-Python-34B-V1.0", + "model_revision": "v1.0.0" + } + ], + "prompt_style": { + "style_name": "ADD_COLON_SINGLE", + "system_prompt": "Below is an instruction that describes a task. Write a response that appropriately completes the request.", + "roles": [ + "Instruction", + "Response" + ], + "intra_message_sep": "\n\n### ", + "stop": [ + "" + ] + } } ]