Feat: Support internvl2 and internvl stream (#2079)

xorbitsai · Aug 16, 2024 · e4d2257 · e4d2257
1 parent f5229a2
commit e4d2257
Show file tree

Hide file tree

Showing 10 changed files with 1,278 additions and 271 deletions.
diff --git a/xinference/model/llm/__init__.py b/xinference/model/llm/__init__.py
@@ -127,7 +127,7 @@ def _install():
     from .transformers.minicpmv26 import MiniCPMV26Model
     from .transformers.qwen_vl import QwenVLChatModel
     from .transformers.yi_vl import YiVLChatModel
-    from .vllm.core import VLLMChatModel, VLLMModel
+    from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
 
     try:
         from .transformers.omnilmm import OmniLMMModel
@@ -145,7 +145,7 @@ def _install():
         ]
     )
     SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
-    VLLM_CLASSES.extend([VLLMModel, VLLMChatModel])
+    VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
     MLX_CLASSES.extend([MLXModel, MLXChatModel])
     TRANSFORMERS_CLASSES.extend(
         [

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -7083,43 +7083,210 @@
           "model_format": "pytorch",
           "model_size_in_billions": 2,
           "quantizations": [
-              "none"
+            "4-bit",
+            "8-bit",
+            "none"
           ],
           "model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
-          "model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
+          "model_revision": "ecbbd21dcf38caa74d925967b997167b0c7b3f47"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 4,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/Mini-InternVL-Chat-4B-V1-5",
+          "model_revision": "ce1559ddf9d87f5130aa5233b0e93b95e4e4161a"
         },
         {
           "model_format": "pytorch",
           "model_size_in_billions": 26,
           "quantizations": [
-              "none"
+            "4-bit",
+            "8-bit",
+            "none"
           ],
           "model_id": "OpenGVLab/InternVL-Chat-V1-5",
-          "model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
+          "model_revision": "9db32d9127cac0c85961e169d75da57a18a847b1"
+        }
+    ],
+    "prompt_style": {
+        "style_name": "INTERNVL",
+        "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
+        "roles": [
+            "<|im_start|>user",
+            "<|im_start|>assistant"
+        ],
+        "intra_message_sep": "<|im_end|>",
+        "stop_token_ids": [
+            2,
+            92543,
+            92542
+        ],
+        "stop": [
+            "</s>",
+            "<|im_end|>",
+            "<|im_start|>"
+        ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internvl2",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "InternVL 2 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
+    "model_specs": [
+      {
+          "model_format": "pytorch",
+          "model_size_in_billions": 1,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2-1B",
+          "model_revision": "a9fc14aea824b6ea1d44f8778cad6b35512c4ce1"
         },
         {
           "model_format": "pytorch",
+          "model_size_in_billions": 2,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2-2B",
+          "model_revision": "422ad7c6335917bfb514958233955512338485a6"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 2,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2-2B-AWQ",
+          "model_revision": "701bc3fc098a8a3b686b3b4135cfb77202be89e0"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 4,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2-4B",
+          "model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 4,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2-8B-AWQ",
+          "model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 8,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2-8B",
+          "model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 26,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2-26B",
+          "model_revision": "b9f3c7e6d575b0115e076a3ffc46fd20b7586899"
+        },
+        {
+          "model_format": "awq",
           "model_size_in_billions": 26,
           "quantizations": [
-              "Int8"
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2-26B-AWQ",
+          "model_revision": "469e0019ffd251e22ff6501a5c2321964e86ef0d"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 40,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2-40B",
+          "model_revision": "725a12063bb855c966e30a0617d0ccd9e870d772"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 40,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2-40B-AWQ",
+          "model_revision": "d92e140f6dfe8ea9679924c6a31898f42c4e1846"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 76,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2-Llama3-76B",
+          "model_revision": "cf7914905f78e9e3560ddbd6f5dfc39becac494f"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 76,
+          "quantizations": [
+            "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
-          "model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
+          "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
+          "model_revision": "1bc796bf80f2ebc7d6a14c15f55217a4600d50a4"
         }
     ],
     "prompt_style": {
-        "style_name": "INTERNLM2",
+        "style_name": "INTERNVL",
         "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
         "roles": [
             "<|im_start|>user",
             "<|im_start|>assistant"
         ],
         "intra_message_sep": "<|im_end|>",
         "stop_token_ids": [
+            2,
+            92543,
             92542
         ],
         "stop": [
-            "<|im_end|>"
+            "</s>",
+            "<|im_end|>",
+            "<|im_start|>"
         ]
     }
   },