diff --git a/.github/setup.sh b/.github/setup.sh index 7d4084b..97aacba 100644 --- a/.github/setup.sh +++ b/.github/setup.sh @@ -3,4 +3,3 @@ git clone https://github.com/descriptinc/audiotools python -m pip install audiotools python -m pip install -e . rm -rf audiotools - diff --git a/demo/app.py b/demo/app.py index de8ff98..ef0550f 100644 --- a/demo/app.py +++ b/demo/app.py @@ -2,9 +2,7 @@ from pathlib import Path import streamlit as st -from huggingface_hub import list_repo_files -from opennotebookllm.podcast_maker.config import PodcastConfig, SpeakerConfig from opennotebookllm.preprocessing import DATA_LOADERS, DATA_CLEANERS from opennotebookllm.inference.model_loaders import ( load_llama_cpp_model, @@ -34,20 +32,20 @@ SPEAKER_DESCRIPTIONS = { "1": "Laura's voice is exciting and fast in delivery with very clear audio and no background noise.", - "2": "Jon's voice is calm with very clear audio and no background noise." + "2": "Jon's voice is calm with very clear audio and no background noise.", } @st.cache_resource def load_text_to_text_model(): return load_llama_cpp_model( - model_id="allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf") + model_id="allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf" + ) + @st.cache_resource def load_text_to_speech_model_and_tokenizer(): - return load_parler_tts_model_and_tokenizer( - "parler-tts/parler-tts-mini-v1", "cpu" - ) + return load_parler_tts_model_and_tokenizer("parler-tts/parler-tts-mini-v1", "cpu") st.title("Document To Podcast") @@ -60,10 +58,11 @@ def load_text_to_speech_model_and_tokenizer(): if uploaded_file is not None: - st.divider() st.header("Loading and Cleaning Data") - st.markdown("[API Reference for data_cleaners](https://mozilla-ai.github.io/document-to-podcast/api/#opennotebookllm.preprocessing.data_cleaners)") + st.markdown( + "[API Reference for data_cleaners](https://mozilla-ai.github.io/document-to-podcast/api/#opennotebookllm.preprocessing.data_cleaners)" + ) extension = Path(uploaded_file.name).suffix @@ -81,7 +80,9 @@ def load_text_to_speech_model_and_tokenizer(): st.divider() st.header("Downloading and Loading models") - st.markdown("[API Reference for model_loaders](https://mozilla-ai.github.io/document-to-podcast/api/#opennotebookllm.inference.model_loaders)") + st.markdown( + "[API Reference for model_loaders](https://mozilla-ai.github.io/document-to-podcast/api/#opennotebookllm.inference.model_loaders)" + ) text_model = load_text_to_text_model() speech_model, speech_tokenizer = load_text_to_speech_model_and_tokenizer() @@ -109,13 +110,13 @@ def load_text_to_speech_model_and_tokenizer(): text += chunk if text.endswith("\n") and "Speaker" in text: st.write(text) - speaker_id = re.search(r'Speaker (\d+)', text).group(1) + speaker_id = re.search(r"Speaker (\d+)", text).group(1) with st.spinner("Generating Audio..."): speech = _speech_generation_parler( - text.split(f'"Speaker {speaker_id}":')[-1], - speech_model, - speech_tokenizer, - SPEAKER_DESCRIPTIONS[speaker_id] + text.split(f'"Speaker {speaker_id}":')[-1], + speech_model, + speech_tokenizer, + SPEAKER_DESCRIPTIONS[speaker_id], ) st.audio(speech, sample_rate=44_100) text = "" diff --git a/src/opennotebookllm/podcast_maker/script_to_audio.py b/src/opennotebookllm/podcast_maker/script_to_audio.py index 4abba32..911266f 100644 --- a/src/opennotebookllm/podcast_maker/script_to_audio.py +++ b/src/opennotebookllm/podcast_maker/script_to_audio.py @@ -21,7 +21,7 @@ def parse_script_to_waveform(script: str, podcast_config: PodcastConfig): podcast_waveform = [] for part in parts: if ":" in part: - speaker_id, speaker_text = part.replace("\"", "").split(":") + speaker_id, speaker_text = part.replace('"', "").split(":") speaker_model = podcast_config.speakers[speaker_id].model speaker_tokenizer = podcast_config.speakers[speaker_id].tokenizer speaker_description = podcast_config.speakers[