Add gpt4all local models, including an embedding provider (jupyterlab…

…#454) * Added models for gpt4all. * Removed replit model. * Updated docs for gpt4all. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updated docs. * A fix to stop download of embeddings model. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Marchlak · Oct 28, 2024 · 8d2c139 · 8d2c139
1 parent a3fca97
commit 8d2c139
Show file tree

Hide file tree

Showing 6 changed files with 56 additions and 6 deletions.
diff --git a/docs/source/users/index.md b/docs/source/users/index.md
@@ -120,6 +120,7 @@ Jupyter AI supports the following model providers:
 | Bedrock             | `bedrock`            | N/A                        | `boto3`                         |
 | Bedrock (chat)      | `bedrock-chat`       | N/A                        | `boto3`                         |
 | Cohere              | `cohere`             | `COHERE_API_KEY`           | `cohere`                        |
+| GPT4All             | `gpt4all`            | N/A                        | `gpt4all`                       |
 | Hugging Face Hub    | `huggingface_hub`    | `HUGGINGFACEHUB_API_TOKEN` | `huggingface_hub`, `ipywidgets`, `pillow` |
 | OpenAI              | `openai`             | `OPENAI_API_KEY`           | `openai`                        |
 | OpenAI (chat)       | `openai-chat`        | `OPENAI_API_KEY`           | `openai`                        |
@@ -352,13 +353,25 @@ response.  In this example, the endpoint returns an object with the schema
 ### GPT4All usage (early-stage)
 
 Currently, we offer experimental support for GPT4All. To get started, first
-decide which models you will use. We currently offer three models from GPT4All:
+decide which models you will use. We currently offer the following models from GPT4All:
 
 | Model name                   | Model size | Model bin URL                                              |
-|------------------------------|------------|------------------------------------------------------------|
-| `ggml-gpt4all-l13b-snoozy`   | 7.6 GB     | `http://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin`    |
-| `ggml-gpt4all-j-v1.2-jazzy`  | 3.8 GB     | `https://gpt4all.io/models/ggml-gpt4all-j-v1.2-jazzy.bin`  |
-| `ggml-gpt4all-j-v1.3-groovy` | 3.8 GB     | `https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin` |
+|---------------------------------|------------|------------------------------------------------------------|
+| `ggml-gpt4all-l13b-snoozy`      | 7.6 GB     | `http://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin`    |
+| `ggml-gpt4all-j-v1.2-jazzy`     | 3.8 GB     | `https://gpt4all.io/models/ggml-gpt4all-j-v1.2-jazzy.bin`  |
+| `ggml-gpt4all-j-v1.3-groovy`    | 3.8 GB     | `https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin` |
+| `mistral-7b-openorca.Q4_0`      | 3.8 GB     | `https://gpt4all.io/models/gguf/mistral-7b-openorca.Q4_0.gguf` |
+| `mistral-7b-instruct-v0.1.Q4_0` | 3.8 GB     | `https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf` |
+| `gpt4all-falcon-q4_0`           | 3.9 GB     | `https://gpt4all.io/models/gguf/gpt4all-falcon-q4_0.gguf` |
+| `wizardlm-13b-v1.2.Q4_0`        | 6.9 GB     | `https://gpt4all.io/models/gguf/wizardlm-13b-v1.2.Q4_0.gguf` |
+| `nous-hermes-llama2-13b.Q4_0`   | 6.9 GB     | `https://gpt4all.io/models/gguf/nous-hermes-llama2-13b.Q4_0.gguf` |
+| `gpt4all-13b-snoozy-q4_0`       | 6.9 GB     | `https://gpt4all.io/models/gguf/gpt4all-13b-snoozy-q4_0.gguf` |
+| `mpt-7b-chat-merges-q4_0`       | 3.5 GB     | `https://gpt4all.io/models/gguf/mpt-7b-chat-merges-q4_0.gguf` |
+| `orca-mini-3b-gguf2-q4_0`       | 1.8 GB     | `https://gpt4all.io/models/gguf/orca-mini-3b-gguf2-q4_0.gguf` |
+| `starcoder-q4_0`                | 8.4 GB     | `https://gpt4all.io/models/gguf/starcoder-q4_0.gguf` |
+| `rift-coder-v0-7b-q4_0`         | 3.6 GB     | `https://gpt4all.io/models/gguf/rift-coder-v0-7b-q4_0.gguf` |
+| `all-MiniLM-L6-v2-f16`          | 44 MB      | `https://gpt4all.io/models/gguf/all-MiniLM-L6-v2-f16.gguf` |
+| `em_german_mistral_v01.Q4_0`    | 3.8 GB     | `https://huggingface.co/TheBloke/em_german_mistral_v01-GGUF/resolve/main/em_german_mistral_v01.Q4_0.gguf` |
 
 
 Note that each model comes with its own license, and that users are themselves

diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/__init__.py b/packages/jupyter-ai-magics/jupyter_ai_magics/__init__.py
@@ -4,6 +4,7 @@
 from .embedding_providers import (
     BedrockEmbeddingsProvider,
     CohereEmbeddingsProvider,
+    GPT4AllEmbeddingsProvider,
     HfHubEmbeddingsProvider,
     OpenAIEmbeddingsProvider,
 )

diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/embedding_providers.py b/packages/jupyter-ai-magics/jupyter_ai_magics/embedding_providers.py
@@ -1,3 +1,4 @@
+import os
 from typing import ClassVar, List
 
 from jupyter_ai_magics.providers import (
@@ -9,6 +10,7 @@
 from langchain.embeddings import (
     BedrockEmbeddings,
     CohereEmbeddings,
+    GPT4AllEmbeddings,
     HuggingFaceHubEmbeddings,
     OpenAIEmbeddings,
 )
@@ -103,3 +105,25 @@ class BedrockEmbeddingsProvider(BaseEmbeddingsProvider, BedrockEmbeddings):
     model_id_key = "model_id"
     pypi_package_deps = ["boto3"]
     auth_strategy = AwsAuthStrategy()
+
+
+class GPT4AllEmbeddingsProvider(BaseEmbeddingsProvider, GPT4AllEmbeddings):
+    def __init__(self, **kwargs):
+        from gpt4all import GPT4All
+
+        model_name = kwargs.get("model_id").split(":")[-1]
+
+        # GPT4AllEmbeddings doesn't allow any kwargs at the moment
+        # This will cause the class to start downloading the model
+        # if the model file is not present. Calling retrieve_model
+        # here will throw an exception if the file is not present.
+        GPT4All.retrieve_model(model_name=model_name, allow_download=False)
+
+        kwargs["allow_download"] = False
+        super().__init__(**kwargs)
+
+    id = "gpt4all"
+    name = "GPT4All Embeddings"
+    models = ["all-MiniLM-L6-v2-f16"]
+    model_id_key = "model_id"
+    pypi_package_deps = ["gpt4all"]
diff --git a/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py b/packages/jupyter-ai-magics/jupyter_ai_magics/providers.py
@@ -342,6 +342,17 @@ def __init__(self, **kwargs):
         "ggml-gpt4all-j-v1.3-groovy",
         # this one needs llama backend and has licence restriction
         "ggml-gpt4all-l13b-snoozy",
+        "mistral-7b-openorca.Q4_0",
+        "mistral-7b-instruct-v0.1.Q4_0",
+        "gpt4all-falcon-q4_0",
+        "wizardlm-13b-v1.2.Q4_0",
+        "nous-hermes-llama2-13b.Q4_0",
+        "gpt4all-13b-snoozy-q4_0",
+        "mpt-7b-chat-merges-q4_0",
+        "orca-mini-3b-gguf2-q4_0",
+        "starcoder-q4_0",
+        "rift-coder-v0-7b-q4_0",
+        "em_german_mistral_v01.Q4_0",
     ]
     model_id_key = "model"
     pypi_package_deps = ["gpt4all"]

diff --git a/packages/jupyter-ai-magics/pyproject.toml b/packages/jupyter-ai-magics/pyproject.toml
@@ -72,6 +72,7 @@ amazon-bedrock-chat = "jupyter_ai_magics:BedrockChatProvider"
 [project.entry-points."jupyter_ai.embeddings_model_providers"]
 bedrock = "jupyter_ai_magics:BedrockEmbeddingsProvider"
 cohere = "jupyter_ai_magics:CohereEmbeddingsProvider"
+gpt4all = "jupyter_ai_magics:GPT4AllEmbeddingsProvider"
 huggingface_hub = "jupyter_ai_magics:HfHubEmbeddingsProvider"
 openai = "jupyter_ai_magics:OpenAIEmbeddingsProvider"
 

diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py
@@ -68,7 +68,7 @@ async def handle_exc(self, e: Exception, message: HumanChatMessage):
         implementation is provided, however chat handlers (subclasses) should
         implement this method to provide a more helpful error response.
         """
-        self._default_handle_exc(e, message)
+        await self._default_handle_exc(e, message)
 
     async def _default_handle_exc(self, e: Exception, message: HumanChatMessage):
         """