From db968ecfd0529f34a0684b4d8048b4ee92983ec5 Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 28 Nov 2024 15:59:26 +0100 Subject: [PATCH 1/3] Rename to document_to_podcast --- .gitignore | 2 ++ demo/app.py | 6 +++--- docs/api.md | 6 +++--- pyproject.toml | 2 +- .../inference/__init__.py | 0 .../inference/model_loaders.py | 0 .../inference/text_to_text.py | 0 .../preprocessing/__init__.py | 0 .../preprocessing/data_cleaners.py | 4 ++-- .../preprocessing/data_loaders.py | 0 tests/integration/test_data_load_and_clean.py | 4 ++-- tests/integration/test_model_load_and_inference.py | 4 ++-- tests/unit/inference/test_model_loaders.py | 2 +- tests/unit/preprocessing/test_data_cleaners.py | 2 +- tests/unit/preprocessing/test_data_loaders.py | 2 +- 15 files changed, 18 insertions(+), 16 deletions(-) rename src/{opennotebookllm => document_to_podcast}/inference/__init__.py (100%) rename src/{opennotebookllm => document_to_podcast}/inference/model_loaders.py (100%) rename src/{opennotebookllm => document_to_podcast}/inference/text_to_text.py (100%) rename src/{opennotebookllm => document_to_podcast}/preprocessing/__init__.py (100%) rename src/{opennotebookllm => document_to_podcast}/preprocessing/data_cleaners.py (88%) rename src/{opennotebookllm => document_to_podcast}/preprocessing/data_loaders.py (100%) diff --git a/.gitignore b/.gitignore index 82f9275..e5bc50f 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +.vscode diff --git a/demo/app.py b/demo/app.py index 3a4e611..cc4d875 100644 --- a/demo/app.py +++ b/demo/app.py @@ -3,9 +3,9 @@ import streamlit as st from huggingface_hub import list_repo_files -from opennotebookllm.preprocessing import DATA_LOADERS, DATA_CLEANERS -from opennotebookllm.inference.model_loaders import load_llama_cpp_model -from opennotebookllm.inference.text_to_text import text_to_text_stream +from document_to_podcast.preprocessing import DATA_LOADERS, DATA_CLEANERS +from document_to_podcast.inference.model_loaders import load_llama_cpp_model +from document_to_podcast.inference.text_to_text import text_to_text_stream PODCAST_PROMPT = """ You are a podcast scriptwriter generating engaging and natural-sounding conversations in JSON format. The script features two speakers: diff --git a/docs/api.md b/docs/api.md index a7c1c65..c6da42c 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,7 +1,7 @@ # API Reference -::: opennotebookllm.preprocessing.data_cleaners +::: document_to_podcast.preprocessing.data_cleaners -::: opennotebookllm.inference.model_loaders +::: document_to_podcast.inference.model_loaders -::: opennotebookllm.inference.text_to_text +::: document_to_podcast.inference.text_to_text diff --git a/pyproject.toml b/pyproject.toml index ac35def..7e02e7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=48", "setuptools_scm[toml]>=6.3.1"] build-backend = "setuptools.build_meta" [project] -name = "opennotebookllm" +name = "document_to_podcast" readme = "README.md" license = {text = "Apache-2.0"} requires-python = ">=3.10" diff --git a/src/opennotebookllm/inference/__init__.py b/src/document_to_podcast/inference/__init__.py similarity index 100% rename from src/opennotebookllm/inference/__init__.py rename to src/document_to_podcast/inference/__init__.py diff --git a/src/opennotebookllm/inference/model_loaders.py b/src/document_to_podcast/inference/model_loaders.py similarity index 100% rename from src/opennotebookllm/inference/model_loaders.py rename to src/document_to_podcast/inference/model_loaders.py diff --git a/src/opennotebookllm/inference/text_to_text.py b/src/document_to_podcast/inference/text_to_text.py similarity index 100% rename from src/opennotebookllm/inference/text_to_text.py rename to src/document_to_podcast/inference/text_to_text.py diff --git a/src/opennotebookllm/preprocessing/__init__.py b/src/document_to_podcast/preprocessing/__init__.py similarity index 100% rename from src/opennotebookllm/preprocessing/__init__.py rename to src/document_to_podcast/preprocessing/__init__.py diff --git a/src/opennotebookllm/preprocessing/data_cleaners.py b/src/document_to_podcast/preprocessing/data_cleaners.py similarity index 88% rename from src/opennotebookllm/preprocessing/data_cleaners.py rename to src/document_to_podcast/preprocessing/data_cleaners.py index 2b35b1f..b018eef 100644 --- a/src/opennotebookllm/preprocessing/data_cleaners.py +++ b/src/document_to_podcast/preprocessing/data_cleaners.py @@ -42,7 +42,7 @@ def clean_html(text: str) -> str: - links - meta tags - In addition, it calls [clean_with_regex][opennotebookllm.preprocessing.data_cleaners.clean_with_regex]. + In addition, it calls [clean_with_regex][document_to_podcast.preprocessing.data_cleaners.clean_with_regex]. Examples: >>> clean_html("

Hello, world!

"") @@ -67,7 +67,7 @@ def clean_markdown(text: str) -> str: This function removes: - markdown images - In addition, it calls [clean_with_regex][opennotebookllm.preprocessing.data_cleaners.clean_with_regex]. + In addition, it calls [clean_with_regex][document_to_podcast.preprocessing.data_cleaners.clean_with_regex]. Examples: >>> clean_markdown('# Title with image ![alt text](image.jpg "Image Title")') diff --git a/src/opennotebookllm/preprocessing/data_loaders.py b/src/document_to_podcast/preprocessing/data_loaders.py similarity index 100% rename from src/opennotebookllm/preprocessing/data_loaders.py rename to src/document_to_podcast/preprocessing/data_loaders.py diff --git a/tests/integration/test_data_load_and_clean.py b/tests/integration/test_data_load_and_clean.py index 9322753..eecae93 100644 --- a/tests/integration/test_data_load_and_clean.py +++ b/tests/integration/test_data_load_and_clean.py @@ -1,9 +1,9 @@ -from opennotebookllm.preprocessing.data_cleaners import ( +from document_to_podcast.preprocessing.data_cleaners import ( clean_html, clean_with_regex, clean_markdown, ) -from opennotebookllm.preprocessing.data_loaders import load_pdf, load_txt +from document_to_podcast.preprocessing.data_loaders import load_pdf, load_txt def test_load_and_clean_pdf(example_data): diff --git a/tests/integration/test_model_load_and_inference.py b/tests/integration/test_model_load_and_inference.py index ecdba1e..50a523a 100644 --- a/tests/integration/test_model_load_and_inference.py +++ b/tests/integration/test_model_load_and_inference.py @@ -3,8 +3,8 @@ import pytest -from opennotebookllm.inference.model_loaders import load_llama_cpp_model -from opennotebookllm.inference.text_to_text import text_to_text, text_to_text_stream +from document_to_podcast.inference.model_loaders import load_llama_cpp_model +from document_to_podcast.inference.text_to_text import text_to_text, text_to_text_stream def test_model_load_and_inference_text_to_text(): diff --git a/tests/unit/inference/test_model_loaders.py b/tests/unit/inference/test_model_loaders.py index 630c62a..e78a8f5 100644 --- a/tests/unit/inference/test_model_loaders.py +++ b/tests/unit/inference/test_model_loaders.py @@ -1,6 +1,6 @@ from llama_cpp import Llama -from opennotebookllm.inference.model_loaders import load_llama_cpp_model +from document_to_podcast.inference.model_loaders import load_llama_cpp_model def test_load_llama_cpp_model(): diff --git a/tests/unit/preprocessing/test_data_cleaners.py b/tests/unit/preprocessing/test_data_cleaners.py index ce11020..1aecb2f 100644 --- a/tests/unit/preprocessing/test_data_cleaners.py +++ b/tests/unit/preprocessing/test_data_cleaners.py @@ -1,4 +1,4 @@ -from opennotebookllm.preprocessing.data_cleaners import ( +from document_to_podcast.preprocessing.data_cleaners import ( clean_html, clean_with_regex, clean_markdown, diff --git a/tests/unit/preprocessing/test_data_loaders.py b/tests/unit/preprocessing/test_data_loaders.py index 6c3bada..1e445a8 100644 --- a/tests/unit/preprocessing/test_data_loaders.py +++ b/tests/unit/preprocessing/test_data_loaders.py @@ -1,4 +1,4 @@ -from opennotebookllm.preprocessing.data_loaders import load_pdf, load_txt, load_docx +from document_to_podcast.preprocessing.data_loaders import load_pdf, load_txt, load_docx def test_load_pdf(example_data): From e99aaeff0a6d703a5caa023c6a7016438e6506f1 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 3 Dec 2024 09:42:06 +0100 Subject: [PATCH 2/3] Use dash in package name --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7e02e7c..03f54d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=48", "setuptools_scm[toml]>=6.3.1"] build-backend = "setuptools.build_meta" [project] -name = "document_to_podcast" +name = "document-to-podcast" readme = "README.md" license = {text = "Apache-2.0"} requires-python = ">=3.10" From d73d7050cfa115f98422ddd102a3fd86a55bb3e1 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 3 Dec 2024 09:42:32 +0100 Subject: [PATCH 3/3] Update links --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 03f54d9..d3f1f80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,9 +31,9 @@ tests = [ ] [project.urls] -Documentation = "https://mozilla-ai.github.io/OpenNotebookLLM/" -Issues = "https://github.com/mozilla-ai/OpenNotebookLLM/issues" -Source = "https://github.com/mozilla-ai/OpenNotebookLLM" +Documentation = "https://mozilla-ai.github.io/document-to-podcast/" +Issues = "https://github.com/mozilla-ai/document-to-podcast/issues" +Source = "https://github.com/mozilla-ai/document-to-podcast" [tool.setuptools.packages.find] exclude = ["tests", "tests.*"]