Allow custom Modelfile for llama integration

Use `Jinja2` to create default `Modelfile` Add a feature flag to disable ollama Move all docs to wiki page
thevickypedia · Jun 3, 2024 · fb1f620 · fb1f620
1 parent 4523ccd
commit fb1f620
Show file tree

Hide file tree

Showing 11 changed files with 106 additions and 64 deletions.
diff --git a/.gitignore b/.gitignore
@@ -46,8 +46,7 @@ ffmpeg
 
 # future work
 hidden
-temp*
-speech_recognition_values.yaml
+temp*.*
 
 # pypi package files
 dist

diff --git a/docs/genindex.html b/docs/genindex.html
@@ -2018,15 +2018,19 @@ <h2 id="L">L</h2>
 </li>
       <li><a href="index.html#jarvis.modules.audio.listener.listen">listen() (in module jarvis.modules.audio.listener)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="index.html#jarvis.modules.audio.listener.listen_recursive">listen_recursive() (in module jarvis.modules.audio.listener)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="index.html#jarvis.executors.listener_controls.listener_control">listener_control() (in module jarvis.executors.listener_controls)</a>
 </li>
       <li><a href="index.html#jarvis.modules.models.classes.EnvConfig.listener_phrase_limit">listener_phrase_limit (jarvis.modules.models.classes.EnvConfig attribute)</a>
 </li>
       <li><a href="index.html#jarvis.modules.models.classes.EnvConfig.listener_timeout">listener_timeout (jarvis.modules.models.classes.EnvConfig attribute)</a>
+</li>
+      <li><a href="index.html#jarvis.modules.templates.templates.Llama">Llama (class in jarvis.modules.templates.templates)</a>
+</li>
+      <li><a href="index.html#jarvis.modules.templates.templates.llama">llama (in module jarvis.modules.templates.templates)</a>
 </li>
       <li><a href="index.html#jarvis.modules.facenet.face.FaceNet.load_dataset">load_dataset() (jarvis.modules.facenet.face.FaceNet method)</a>
 </li>
@@ -2482,15 +2486,19 @@ <h2 id="O">O</h2>
       <li><a href="index.html#jarvis.api.models.modals.OfflineCommunicatorModal">OfflineCommunicatorModal (class in jarvis.api.models.modals)</a>
 </li>
       <li><a href="index.html#jarvis.modules.transformer.gpt.Ollama">Ollama (class in jarvis.modules.transformer.gpt)</a>
+</li>
+      <li><a href="index.html#jarvis.modules.models.classes.EnvConfig.ollama">ollama (jarvis.modules.models.classes.EnvConfig attribute)</a>
 </li>
       <li><a href="index.html#jarvis.modules.models.classes.EnvConfig.ollama_model">ollama_model (jarvis.modules.models.classes.EnvConfig attribute)</a>
 </li>
-      <li><a href="index.html#jarvis.modules.models.classes.EnvConfig.ollama_reuse_threshold">ollama_reuse_threshold (jarvis.modules.models.classes.EnvConfig attribute)</a>
+      <li><a href="index.html#jarvis.modules.models.classes.FileIO.ollama_model_file">ollama_model_file (jarvis.modules.models.classes.FileIO attribute)</a>
 </li>
-      <li><a href="index.html#jarvis.modules.models.classes.EnvConfig.ollama_timeout">ollama_timeout (jarvis.modules.models.classes.EnvConfig attribute)</a>
+      <li><a href="index.html#jarvis.modules.models.classes.EnvConfig.ollama_reuse_threshold">ollama_reuse_threshold (jarvis.modules.models.classes.EnvConfig attribute)</a>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#jarvis.modules.models.classes.EnvConfig.ollama_timeout">ollama_timeout (jarvis.modules.models.classes.EnvConfig attribute)</a>
+</li>
       <li><a href="index.html#jarvis.executors.offline.ondemand_offline_automation">ondemand_offline_automation() (in module jarvis.executors.offline)</a>
 </li>
       <li><a href="index.html#jarvis.modules.models.classes.EnvConfig.open_gmail_pass">open_gmail_pass (jarvis.modules.models.classes.EnvConfig attribute)</a>

diff --git a/docs/index.html b/docs/index.html
@@ -6679,6 +6679,11 @@ <h1>Crontab<a class="headerlink" href="#id2" title="Permalink to this heading">
 <span class="sig-name descname"><span class="pre">news_api</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#jarvis.modules.models.classes.EnvConfig.news_api" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="jarvis.modules.models.classes.EnvConfig.ollama">
+<span class="sig-name descname"><span class="pre">ollama</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#jarvis.modules.models.classes.EnvConfig.ollama" title="Permalink to this definition">¶</a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="jarvis.modules.models.classes.EnvConfig.ollama_model">
 <span class="sig-name descname"><span class="pre">ollama_model</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#jarvis.modules.models.classes.EnvConfig.ollama_model" title="Permalink to this definition">¶</a></dt>
@@ -7285,6 +7290,11 @@ <h1>Crontab<a class="headerlink" href="#id2" title="Permalink to this heading">
 <span class="sig-name descname"><span class="pre">uploads</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Path</span></em><a class="headerlink" href="#jarvis.modules.models.classes.FileIO.uploads" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="jarvis.modules.models.classes.FileIO.ollama_model_file">
+<span class="sig-name descname"><span class="pre">ollama_model_file</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Path</span></em><a class="headerlink" href="#jarvis.modules.models.classes.FileIO.ollama_model_file" title="Permalink to this definition">¶</a></dt>
+<dd></dd></dl>
+
 </dd></dl>
 
 <hr class="docutils" />
@@ -10074,6 +10084,15 @@ <h1>Telegram<a class="headerlink" href="#id4" title="Permalink to this heading">
 </div>
 </dd></dl>
 
+<dl class="py class">
+<dt class="sig sig-object py" id="jarvis.modules.templates.templates.Llama">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">jarvis.modules.templates.templates.</span></span><span class="sig-name descname"><span class="pre">Llama</span></span><a class="headerlink" href="#jarvis.modules.templates.templates.Llama" title="Permalink to this definition">¶</a></dt>
+<dd><p>Modelfile template for ollama SDK.</p>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">Llama</span>
+</pre></div>
+</div>
+</dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="jarvis.modules.templates.templates.email">
 <span class="sig-prename descclassname"><span class="pre">jarvis.modules.templates.templates.</span></span><span class="sig-name descname"><span class="pre">email</span></span><a class="headerlink" href="#jarvis.modules.templates.templates.email" title="Permalink to this definition">¶</a></dt>
@@ -10092,6 +10111,12 @@ <h1>Telegram<a class="headerlink" href="#id4" title="Permalink to this heading">
 <dd><p>alias of <a class="reference internal" href="#jarvis.modules.templates.templates.EndpointTemplates" title="jarvis.modules.templates.templates.EndpointTemplates"><code class="xref py py-class docutils literal notranslate"><span class="pre">EndpointTemplates</span></code></a></p>
 </dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="jarvis.modules.templates.templates.llama">
+<span class="sig-prename descclassname"><span class="pre">jarvis.modules.templates.templates.</span></span><span class="sig-name descname"><span class="pre">llama</span></span><a class="headerlink" href="#jarvis.modules.templates.templates.llama" title="Permalink to this definition">¶</a></dt>
+<dd><p>alias of <a class="reference internal" href="#jarvis.modules.templates.templates.Llama" title="jarvis.modules.templates.templates.Llama"><code class="xref py py-class docutils literal notranslate"><span class="pre">Llama</span></code></a></p>
+</dd></dl>
+
 </section>
 <section id="module-jarvis.modules.timeout.timeout">
 <span id="timeout"></span><h1>Timeout<a class="headerlink" href="#module-jarvis.modules.timeout.timeout" title="Permalink to this heading">¶</a></h1>
@@ -10130,11 +10155,19 @@ <h1>Telegram<a class="headerlink" href="#id4" title="Permalink to this heading">
 <p class="admonition-title">Warning</p>
 <ul class="simple">
 <li><p>This module uses a pre-trained transformer to generate predictive responses.</p></li>
-<li><p>Due to the size of machine learning models, this feature will be disabled in limited mode.</p></li>
+<li><p>Although this feature is enabled by default, please note that machine learning models are memory beasts.</p></li>
+<li><p>Please refer the following minimum requirements before choosing the right model.</p></li>
+<li><p>This feature can be disabled by setting the env var <code class="docutils literal notranslate"><span class="pre">ollama=False</span></code> in the <code class="docutils literal notranslate"><span class="pre">env_file</span></code></p></li>
 </ul>
 </div>
+<p class="rubric">Notes</p>
+<p>There are quite a few parameters that can be adjusted, to customize the model usage and interaction with Jarvis.</p>
+<ul class="simple">
+<li><p><a class="reference external" href="https://github.com/thevickypedia/Jarvis/wiki/2.-Environment-Variables#ollama-gpt-integration">Params for Jarvis</a></p></li>
+<li><p><a class="reference external" href="https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">Params for Ollama API (Modelfile)</a></p></li>
+</ul>
 <dl class="simple">
-<dt>RAM Requirements:</dt><dd><ul class="simple">
+<dt><strong>RAM Requirements</strong></dt><dd><ul class="simple">
 <li><p>8 GB to run the 7B models</p></li>
 <li><p>16 GB to run the 13B models</p></li>
 <li><p>32 GB to run the 33B models</p></li>
@@ -10154,7 +10187,7 @@ <h1>Telegram<a class="headerlink" href="#id4" title="Permalink to this heading">
 <dl class="py function">
 <dt class="sig sig-object py" id="jarvis.modules.transformer.gpt.dump_history">
 <span class="sig-prename descclassname"><span class="pre">jarvis.modules.transformer.gpt.</span></span><span class="sig-name descname"><span class="pre">dump_history</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">request</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">response</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#jarvis.modules.transformer.gpt.dump_history" title="Permalink to this definition">¶</a></dt>
-<dd><p>Dump responses from GPT to a yaml file for future response.</p>
+<dd><p>Dump responses from GPT into a yaml file for future response.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
@@ -10213,7 +10246,7 @@ <h1>Telegram<a class="headerlink" href="#id4" title="Permalink to this heading">
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">Customizer</span>
 </pre></div>
 </div>
-<p>Initializes the model name.</p>
+<p>Initializes the custom model name and the <code class="docutils literal notranslate"><span class="pre">Modelfile</span></code> used to customize it.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="jarvis.modules.transformer.gpt.Customizer.run">
 <span class="sig-name descname"><span class="pre">run</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#jarvis.modules.transformer.gpt.Customizer.run" title="Permalink to this definition">¶</a></dt>

diff --git a/docs/objects.inv b/docs/objects.inv
diff --git a/docs/searchindex.js b/docs/searchindex.js
diff --git a/jarvis/executors/static_responses.py b/jarvis/executors/static_responses.py
@@ -7,8 +7,6 @@
 from jarvis.modules.models import models
 from jarvis.modules.utils import util
 
-# TODO: Move most if not all of these to Modelfile
-
 
 def form(*args) -> None:
     """Response for form."""

diff --git a/jarvis/modules/models/classes.py b/jarvis/modules/models/classes.py
@@ -375,6 +375,7 @@ def channel_validator(
         raise InvalidEnvVars(f"value should be one of {complicated}")
 
 
+# noinspection PyMethodParameters
 class EnvConfig(BaseSettings):
     """Configure all env vars and validate using ``pydantic`` to share across modules.
 
@@ -431,6 +432,9 @@ class EnvConfig(BaseSettings):
     weather_api: str | None = None
     maps_api: str | None = None
     news_api: str | None = None
+
+    # Machine learning model config
+    ollama: bool = True
     ollama_model: str = "llama3"
     ollama_timeout: int = Field(5, le=30, ge=1)
     ollama_reuse_threshold: float | None = Field(None, le=0.9, ge=0.5)
@@ -751,6 +755,9 @@ class FileIO(BaseModel):
     # On demand storage
     uploads: DirectoryPath = os.path.join(root, "uploads")
 
+    # Ollama
+    ollama_model_file: FilePath = os.path.join(root, "Modelfile")
+
 
 fileio = FileIO()
 

diff --git a/jarvis/modules/templates/templates.py b/jarvis/modules/templates/templates.py
@@ -77,6 +77,19 @@ class GenericTemplates:
             win_wifi_xml = file.read()
 
 
+class Llama:
+    """Modelfile template for ollama SDK.
+
+    >>> Llama
+
+    """
+
+    if models.settings.invoker != "sphinx-build":
+        with open(os.path.join(os.path.dirname(__file__), "Modelfile")) as file:
+            modelfile = file.read()
+
+
 email = EmailTemplates
 generic = GenericTemplates
 endpoint = EndpointTemplates
+llama = Llama
diff --git a/jarvis/modules/transformer/Modelfile b/jarvis/modules/transformer/Modelfile
diff --git a/jarvis/modules/transformer/gpt.md b/jarvis/modules/transformer/gpt.md
diff --git a/jarvis/modules/transformer/gpt.py b/jarvis/modules/transformer/gpt.py
@@ -2,9 +2,20 @@
 
 Warnings:
     - This module uses a pre-trained transformer to generate predictive responses.
-    - Due to the size of machine learning models, this feature will be disabled in limited mode.
+    - Although this feature is enabled by default, please note that machine learning models are memory beasts.
+    - Please refer the following minimum requirements before choosing the right model.
+    - This feature can be disabled by setting the env var ``ollama=False`` in the ``env_file``
 
-RAM Requirements:
+Notes:
+    There are quite a few parameters that can be adjusted, to customize the model usage and interaction with Jarvis.
+
+    - `Params for Jarvis <https://github.com/thevickypedia/Jarvis/wiki/2.-Environment-Variables
+      #ollama-gpt-integration>`__
+
+    - `Params for Ollama API (Modelfile) <https://github.com/ollama/ollama/blob/main/docs/modelfile.md
+      #valid-parameters-and-values>`__
+
+**RAM Requirements**
     - 8 GB to run the 7B models
     - 16 GB to run the 13B models
     - 32 GB to run the 33B models
@@ -33,17 +44,19 @@
 
 import httpcore
 import httpx
+import jinja2
 import ollama
 
 from jarvis.executors import files, static_responses
 from jarvis.modules.audio import speaker
 from jarvis.modules.logger import logger
 from jarvis.modules.models import models
+from jarvis.modules.templates import templates
 from jarvis.modules.utils import support
 
 
 def dump_history(request: str, response: str) -> None:
-    """Dump responses from GPT to a yaml file for future response.
+    """Dump responses from GPT into a yaml file for future response.
 
     Args:
         request: Request from user.
@@ -134,8 +147,22 @@ class Customizer:
     """
 
     def __init__(self):
-        """Initializes the model name."""
+        """Initializes the custom model name and the ``Modelfile`` used to customize it."""
         self.model_name = "jarvis"
+        if not os.path.isfile(models.fileio.ollama_model_file):
+            logger.info(
+                "'%s' not found, creating one at '%s'",
+                os.path.basename(models.fileio.ollama_model_file),
+                os.path.basename(models.fileio.root),
+            )
+            logger.info(
+                "Feel free to modify this file in the future for custom instructions"
+            )
+            template = jinja2.Template(source=templates.llama.modelfile)
+            rendered = template.render(MODEL_NAME=models.env.ollama_model)
+            with open(models.fileio.ollama_model_file, "w") as file:
+                file.write(rendered)
+                file.flush()
 
     def run(self) -> str:
         """Runs the customizer with SDK as the primary option and CLI as secondary.
@@ -158,10 +185,8 @@ def run(self) -> str:
 
     def customize_model_cli(self) -> None:
         """Uses the CLI to customize the model."""
-        model_file = os.path.join(os.path.dirname(__file__), "Modelfile")
-        model_file = model_file.lstrip(os.getcwd())
         process = subprocess.Popen(
-            f"ollama create {self.model_name} -f {model_file}",
+            f"ollama create {self.model_name} -f {models.fileio.ollama_model_file}",
             shell=True,
             universal_newlines=True,
             text=True,
@@ -181,9 +206,10 @@ def customize_model_sdk(self) -> None:
         Warnings:
             `Model creation with SDK is currently broke <https://github.com/ollama/ollama-python/issues/171>`__.
         """
-        model_file = os.path.join(os.path.dirname(__file__), "Modelfile")
         for res in ollama.create(
-            model=self.model_name, modelfile=model_file, stream=True
+            model=self.model_name,
+            modelfile=models.fileio.ollama_model_file,
+            stream=True,
         ):
             logger.info(res["response"])
             if res["done"]: