FEAT: wizardcoder python (#539)

xorbitsai · Oct 16, 2023 · f22977b · f22977b
1 parent 09b5ae5
commit f22977b
Show file tree

Hide file tree

Showing 4 changed files with 282 additions and 2 deletions.
diff --git a/doc/source/models/builtin/index.rst b/doc/source/models/builtin/index.rst
@@ -48,6 +48,7 @@ Code Generation Models
 - :ref:`Code-Llama <models_builtin_code_llama>`
 - :ref:`CodeLlama-Instruct <models_builtin_code_llama_instruct>`
 - :ref:`Code-Llama-Python <models_builtin_code_llama_python>`
+- :ref:`WizardCoder-Python-v1.0 <models_builtin_wizardcoder_python_v1_0>`
 
 
 Code Assistant Models
@@ -88,6 +89,7 @@ Code Assistant Models
    vicuna-v1.5-16k
    wizardlm-v1.0
    wizardmath-v1.0
+   wizardcoder-python-v1.0
 
 
 Embedding Models

diff --git a/doc/source/models/builtin/wizardcoder-python-v1.0.rst b/doc/source/models/builtin/wizardcoder-python-v1.0.rst
@@ -0,0 +1,106 @@
+.. _models_builtin_wizardcoder_python_v1_0:
+
+=======================
+WizardCoder-Python-v1.0
+=======================
+
+- **Context Length:** 100000
+- **Model Name:** wizardcoder-python-v1.0
+- **Languages:** en
+- **Abilities:** generate, chat
+
+Specifications
+^^^^^^^^^^^^^^
+
+Model Spec 1 (pytorch, 7 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 7
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** WizardLM/WizardCoder-Python-7B-V1.0
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 7 --model-format pytorch --quantization ${quantization}
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+
+Model Spec 2 (pytorch, 13 Billion)
+++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 13
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** WizardLM/WizardCoder-Python-13B-V1.0
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 13 --model-format pytorch --quantization ${quantization}
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+Model Spec 3 (pytorch, 34 Billion)
+++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 34
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** WizardLM/WizardCoder-Python-34B-V1.0
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 34 --model-format pytorch --quantization ${quantization}
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+Model Spec 4 (ggufv2, 7 Billion)
+++++++++++++++++++++++++++++++++
+
+- **Model Format:** ggufv2
+- **Model Size (in billions):** 7
+- **Quantizations:** Q2_K, Q3_K_L, Q3_K_M, Q3_K_S, Q4_0, Q4_K_M, Q4_K_S, Q5_0, Q5_K_M, Q5_K_S, Q6_K, Q8_0
+- **Model ID:** TheBloke/WizardCoder-Python-7B-V1.0-GGUF
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 7 --model-format ggufv2 --quantization ${quantization}
+
+Model Spec 5 (ggufv2, 13 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** ggufv2
+- **Model Size (in billions):** 13
+- **Quantizations:** Q2_K, Q3_K_L, Q3_K_M, Q3_K_S, Q4_0, Q4_K_M, Q4_K_S, Q5_0, Q5_K_M, Q5_K_S, Q6_K, Q8_0
+- **Model ID:** TheBloke/WizardCoder-Python-13B-V1.0-GGUF
+- **File Name Template:** wizardcoder-python-13b-v1.0.{quantization}.gguf
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 13 --model-format ggufv2 --quantization ${quantization}
+
+Model Spec 6 (ggufv2, 34 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** ggufv2
+- **Model Size (in billions):** 34
+- **Quantizations:** Q2_K, Q3_K_L, Q3_K_M, Q3_K_S, Q4_0, Q4_K_M, Q4_K_S, Q5_0, Q5_K_M, Q5_K_S, Q6_K, Q8_0
+- **Model ID:** TheBloke/WizardCoder-Python-34B-V1.0-GGUF
+- **File Name Template:** wizardcoder-python-34b-v1.0.{quantization}.gguf
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name wizardcoder-python-v1.0 --size-in-billions 34 --model-format ggufv2 --quantization ${quantization}
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -1905,12 +1905,15 @@
     "prompt_style": {
       "style_name": "INSTRUCTION",
       "system_prompt": "You are a professional translator. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. Do not translate person's name. Do not add any additional text to the translation. Do not give me any comments or suggestions.\nUser:\n\n{0}\nAssistant:",
-      "roles": ["User", "Assistant"],
+      "roles": [
+        "User",
+        "Assistant"
+      ],
       "intra_message_sep": "",
       "inter_message_sep": ""
     }
   },
-    {
+  {
     "version": 1,
     "context_length": 100000,
     "model_name": "glaive-coder",
@@ -1947,5 +1950,124 @@
         2
       ]
     }
+  },
+  {
+    "version": 1,
+    "context_length": 100000,
+    "model_name": "wizardcoder-python-v1.0",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "generate",
+      "chat"
+    ],
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "WizardLM/WizardCoder-Python-7B-V1.0",
+        "model_revision": "e40673a27a4aefcff2c6d2b3b1e0681a38703e4e"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "WizardLM/WizardCoder-Python-13B-V1.0",
+        "model_revision": "d920d26e2108377de0f676a3c4be666f5212f4a1"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "WizardLM/WizardCoder-Python-34B-V1.0",
+        "model_revision": "d869ce178715f8d6e8141e2ed50e6290985eedb0"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/WizardCoder-Python-7B-V1.0-GGUF",
+        "model_file_name_template": "wizardcoder-python-7b-v1.0.{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/WizardCoder-Python-13B-V1.0-GGUF",
+        "model_file_name_template": "wizardcoder-python-13b-v1.0.{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/WizardCoder-Python-34B-V1.0-GGUF",
+        "model_file_name_template": "wizardcoder-python-34b-v1.0.{quantization}.gguf"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_SINGLE",
+      "system_prompt": "Below is an instruction that describes a task. Write a response that appropriately completes the request.",
+      "roles": [
+        "Instruction",
+        "Response"
+      ],
+      "intra_message_sep": "\n\n### ",
+      "stop": [
+        "</s>"
+      ]
+    }
   }
 ]
diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
@@ -424,5 +424,55 @@
         "<eoa>"
       ]
     }
+  },
+  {
+    "version": 1,
+    "context_length": 100000,
+    "model_name": "wizardcoder-python-v1.0",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "generate",
+      "chat"
+    ],
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "AI-ModelScope/WizardCoder-Python-13B-V1.0",
+        "model_revision": "v1.0.0"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "AI-ModelScope/WizardCoder-Python-34B-V1.0",
+        "model_revision": "v1.0.0"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_SINGLE",
+      "system_prompt": "Below is an instruction that describes a task. Write a response that appropriately completes the request.",
+      "roles": [
+        "Instruction",
+        "Response"
+      ],
+      "intra_message_sep": "\n\n### ",
+      "stop": [
+        "</s>"
+      ]
+    }
   }
 ]