mozilla-ai · daavoo · Nov 25, 2024 · Nov 14, 2024 · Nov 15, 2024 · Nov 15, 2024
diff --git a/.github/.devcontainer.json b/.github/.devcontainer.json
@@ -7,5 +7,5 @@
       },
       "packages": ["libgl1-mesa-dev"]
     },
-    "postCreateCommand": "pip install -e '.[demo]'"
+    "postCreateCommand": "pip install -e . --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
 }
diff --git a/demo/app.py b/demo/app.py
@@ -1,9 +1,28 @@
 from pathlib import Path
 
 import streamlit as st
+from huggingface_hub import list_repo_files
 
 from opennotebookllm.preprocessing import DATA_LOADERS, DATA_CLEANERS
+from opennotebookllm.inference import load_llama_cpp_model
+from opennotebookllm.inference import text_to_text
 
+PODCAST_PROMPT = """
+Convert this text into a podcast script.
+The conversation should be between 2 speakers.
+Use [SPEAKER1] and [SPEAKER2] to limit sections.
+Do not include [INTRO], [OUTRO] or any other [SECTION].
+Text:
+"""
+
+CURATED_REPOS = [
+    "allenai/OLMoE-1B-7B-0924-Instruct-GGUF",
+    "MaziyarPanahi/SmolLM2-1.7B-Instruct-GGUF",
+    "microsoft/Phi-3-mini-4k-instruct-gguf",
+    "HuggingFaceTB/SmolLM2-360M-Instruct-GGUF",
+    "Qwen/Qwen2.5-1.5B-Instruct-GGUF",
+    "Qwen/Qwen2.5-3B-Instruct-GGUF",
+]
 
 uploaded_file = st.file_uploader(
     "Choose a file", type=["pdf", "html", "txt", "docx", "md"]
@@ -17,9 +36,43 @@
     raw_text = DATA_LOADERS[extension](uploaded_file)
     with col1:
         st.title("Raw Text")
-        st.write(raw_text[:200])
+        st.text_area(f"Total Length: {len(raw_text)}", f"{raw_text[:500]} . . .")
 
     clean_text = DATA_CLEANERS[extension](raw_text)
     with col2:
         st.title("Cleaned Text")
-        st.write(clean_text[:200])
+        st.text_area(f"Total Length: {len(clean_text)}", f"{clean_text[:500]} . . .")
+
+    # I set this value as a quick safeguard but we should actually tokenize the text and count the number of real tokens.
+    if len(clean_text) > 4096 * 3:
+        st.warning(
+            f"Input text is too big ({len(clean_text)}). Using only a subset of it ({4096 * 3})."
+        )
+        clean_text = clean_text[: 4096 * 3]
+
+    repo_name = st.selectbox("Select Repo", CURATED_REPOS)
+    model_name = st.selectbox(
+        "Select Model",
+        [
+            x
+            for x in list_repo_files(repo_name)
+            if ".gguf" in x.lower() and ("q8" in x.lower() or "fp16" in x.lower())
+        ],
+        index=None,
+    )
+    if model_name:
+        with st.spinner("Downloading and Loading Model..."):
+            model = load_llama_cpp_model(model_id=f"{repo_name}/{model_name}")
+
+        system_prompt = st.text_area("Podcast generation prompt", value=PODCAST_PROMPT)
+
+        if st.button("Generate Podcast Script"):
+            with st.spinner("Generating Podcast Script..."):
+                text = ""
+                for chunk in text_to_text(
+                    clean_text, model, system_prompt=system_prompt.strip(), stream=True
+                ):
+                    text += chunk
+                    if text.endswith("\n"):
+                        st.write(text)
+                        text = ""
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,9 +10,12 @@ requires-python = ">=3.10"
 dynamic = ["version"]
 dependencies = [
   "beautifulsoup4",
+  "huggingface-hub",
+  "llama-cpp-python",
   "loguru",
   "PyPDF2[crypto]",
-  "python-docx"
+  "python-docx",
+  "streamlit",
 ]
 
 [project.optional-dependencies]
@@ -27,10 +30,6 @@ tests = [
   "pytest-sugar>=0.9.6",
 ]
 
-demo = [
-  "streamlit"
-]
-
 [project.urls]
 Documentation = "https://mozilla-ai.github.io/OpenNotebookLLM/"
 Issues = "https://github.com/mozilla-ai/OpenNotebookLLM/issues"

diff --git a/src/opennotebookllm/inference/__init__.py b/src/opennotebookllm/inference/__init__.py
@@ -0,0 +1,2 @@
+from .model_loaders import load_llama_cpp_model as load_llama_cpp_model
+from .text_to_text import text_to_text as text_to_text
diff --git a/src/opennotebookllm/inference/model_loaders.py b/src/opennotebookllm/inference/model_loaders.py
@@ -0,0 +1,28 @@
+from llama_cpp import Llama
+
+
+def load_llama_cpp_model(
+    model_id: str,
+) -> Llama:
+    """
+    Loads the given model_id using Llama.from_pretrained.
+
+    Args:
+        model_id (str): The model id to load.
+        Format is expected to be `{org}/{repo}/{filename}`.
+
+    Returns:
+        Llama: The loaded model.
+
+    Example:
+        >>> model = load_model(
+            "allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf")
+    """
+    org, repo, filename = model_id.split("/")
+    model = Llama.from_pretrained(
+        repo_id=f"{org}/{repo}",
+        filename=filename,
+        # 0 means that the model limit will be used, instead of the default (512) or other hardcoded value
+        n_ctx=0,
+    )
+    return model
diff --git a/src/opennotebookllm/inference/text_to_text.py b/src/opennotebookllm/inference/text_to_text.py
@@ -0,0 +1,34 @@
+from llama_cpp import Llama
+
+
+def text_to_text(
+    input_text: str, model: Llama, system_prompt: str, stream: bool = False
+):
+    """
+    Transforms input_text using the given model and system prompt.
+
+    Args:
+        input_text (str): The text to be transformed.
+        model (Llama): The model to use for conversion.
+        system_prompt (str): The system prompt to use for conversion.
+        stream (bool, optional): Whether to stream the response. Defaults to False.
+
+    Yields:
+        str: Chunks of the transformed text as they are available. If stream=True
+
+    Returns:
+        str: The full transformed text. If stream=False.
+    """
+    response = model.create_chat_completion(
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": input_text},
+        ],
+        stream=stream,
+    )
+    if stream:
+        for item in response:
+            if item["choices"][0].get("delta", {}).get("content", None):
+                yield item["choices"][0].get("delta", {}).get("content", None)
+    else:
+        return response["choices"][0]["message"]["content"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .model_loaders import load_llama_cpp_model as load_llama_cpp_model
		from .text_to_text import text_to_text as text_to_text