FEAT: internlm 20b (#486)

xorbitsai · Sep 22, 2023 · ea3b336 · ea3b336
1 parent b3dcd15
commit ea3b336
Show file tree

Hide file tree

Showing 5 changed files with 85 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -32,6 +32,8 @@ potential of cutting-edge AI models.
 - Multi-GPU support for PyTorch models: [#226](https://github.com/xorbitsai/inference/issues/226)
 - Xinference dashboard: [#93](https://github.com/xorbitsai/inference/issues/93)
 ### New Models
+- Built-in support for [internlm-20b](https://huggingface.co/internlm/internlm-20b/commits/main): [#486](https://github.com/xorbitsai/inference/pull/486)
+- Built-in support for [internlm-chat-20b](https://huggingface.co/internlm/internlm-chat-20b): [#486](https://github.com/xorbitsai/inference/pull/486)
 - Built-in support for [CodeLLama](https://github.com/facebookresearch/codellama): [#414](https://github.com/xorbitsai/inference/pull/414) [#402](https://github.com/xorbitsai/inference/pull/402)
 ### Integrations
 - [Dify](https://docs.dify.ai/advanced/model-configuration/xinference): an LLMOps platform that enables developers (and even non-developers) to quickly build useful applications based on large language models, ensuring they are visual, operable, and improvable.
@@ -208,8 +210,10 @@ $ xinference registrations
 | LLM  | falcon-instruct     | ['en']       | ['embed', 'chat']      |
 | LLM  | gpt-2               | ['en']       | ['generate']           |
 | LLM  | internlm            | ['en', 'zh'] | ['embed', 'generate']  |
+| LLM  | internlm-16k        | ['en', 'zh'] | ['embed', 'generate']  |
 | LLM  | internlm-chat       | ['en', 'zh'] | ['embed', 'chat']      |
 | LLM  | internlm-chat-8k    | ['en', 'zh'] | ['embed', 'chat']      |
+| LLM  | internlm-chat-16k   | ['en', 'zh'] | ['embed', 'chat']      |
 | LLM  | llama-2             | ['en']       | ['embed', 'generate']  |
 | LLM  | llama-2-chat        | ['en']       | ['embed', 'chat']      |
 | LLM  | opt                 | ['en']       | ['embed', 'generate']  |

diff --git a/README_ja_JP.md b/README_ja_JP.md
@@ -187,8 +187,10 @@ $ xinference registrations
 | LLM  | falcon-instruct     | ['en']       | ['embed', 'chat']      |
 | LLM  | gpt-2               | ['en']       | ['generate']           |
 | LLM  | internlm            | ['en', 'zh'] | ['embed', 'generate']  |
+| LLM  | internlm-16k        | ['en', 'zh'] | ['embed', 'generate']  |
 | LLM  | internlm-chat       | ['en', 'zh'] | ['embed', 'chat']      |
 | LLM  | internlm-chat-8k    | ['en', 'zh'] | ['embed', 'chat']      |
+| LLM  | internlm-chat-16k   | ['en', 'zh'] | ['embed', 'chat']      |
 | LLM  | llama-2             | ['en']       | ['embed', 'generate']  |
 | LLM  | llama-2-chat        | ['en']       | ['embed', 'chat']      |
 | LLM  | opt                 | ['en']       | ['embed', 'generate']  |

diff --git a/README_zh_CN.md b/README_zh_CN.md
@@ -29,6 +29,8 @@ Xorbits Inference（Xinference）是一个性能强大且功能全面的分布
 - PyTorch 模型多 GPU 支持: [#226](https://github.com/xorbitsai/inference/issues/226)
 - Xinference 仪表盘: [#93](https://github.com/xorbitsai/inference/issues/93)
 ### 新模型
+- 内置 [internlm-20b](https://huggingface.co/internlm/internlm-20b/commits/main): [#486](https://github.com/xorbitsai/inference/pull/486)
+- 内置 [internlm-chat-20b](https://huggingface.co/internlm/internlm-chat-20b): [#486](https://github.com/xorbitsai/inference/pull/486)
 - 内置 [CodeLLama](https://github.com/facebookresearch/codellama): [#414](https://github.com/xorbitsai/inference/pull/414) [#402](https://github.com/xorbitsai/inference/pull/402)
 ### 集成
 - [Dify](https://docs.dify.ai/advanced/model-configuration/xinference): 一个涵盖了大型语言模型开发、部署、维护和优化的 LLMOps 平台。
@@ -190,8 +192,10 @@ $ xinference registrations
 | LLM  | falcon-instruct     | ['en']       | ['embed', 'chat']      |
 | LLM  | gpt-2               | ['en']       | ['generate']           |
 | LLM  | internlm            | ['en', 'zh'] | ['embed', 'generate']  |
+| LLM  | internlm-16k        | ['en', 'zh'] | ['embed', 'generate']  |
 | LLM  | internlm-chat       | ['en', 'zh'] | ['embed', 'chat']      |
 | LLM  | internlm-chat-8k    | ['en', 'zh'] | ['embed', 'chat']      |
+| LLM  | internlm-chat-16k   | ['en', 'zh'] | ['embed', 'chat']      |
 | LLM  | llama-2             | ['en']       | ['embed', 'generate']  |
 | LLM  | llama-2-chat        | ['en']       | ['embed', 'chat']      |
 | LLM  | opt                 | ['en']       | ['embed', 'generate']  |

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -1127,6 +1127,77 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 16384,
+    "model_name": "internlm-16k",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "embed",
+      "generate"
+    ],
+    "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 20,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "internlm/internlm-20b",
+        "model_revision": "f0433b0db933a9adfa169f756ab8547f67ccef1d"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 16384,
+    "model_name": "internlm-chat-16k",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "embed",
+      "chat"
+    ],
+    "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data. The Chat version has undergone SFT and RLHF training.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 20,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "internlm/internlm-chat-20b",
+        "model_revision": "84969e0447c3807207e9acdd92c9302309ec64fc"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "INTERNLM",
+      "system_prompt": "",
+      "roles": [
+        "<|User|>",
+        "<|Bot|>"
+      ],
+      "intra_message_sep": "<eoh>\n",
+      "inter_message_sep": "<eoa>\n",
+      "stop_token_ids": [
+        1,
+        103028
+      ],
+      "stop": [
+        "<eoa>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 4096,

diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py
@@ -65,12 +65,15 @@ class VLLMGenerateConfig(TypedDict, total=False):
 except ImportError:
     VLLM_INSTALLED = False
 
-VLLM_SUPPORTED_MODELS = ["llama-2", "baichuan"]
+VLLM_SUPPORTED_MODELS = ["llama-2", "baichuan", "internlm-16k"]
 VLLM_SUPPORTED_CHAT_MODELS = [
     "llama-2-chat",
     "vicuna-v1.3",
     "vicuna-v1.5",
     "baichuan-chat",
+    "internlm-chat",
+    "internlm-chat-8k",
+    "internlm-chat-16k",
 ]