Skip to content

Commit

Permalink
Save single audio file & script when completed in demo app (#42)
Browse files Browse the repository at this point in the history
* Save audio & script when completed in demo app

* Add UI message

* [WIP]

* Save audio and script in streamlit session state
  • Loading branch information
Kostis-S-Z authored Dec 11, 2024
1 parent 7c3c9a6 commit 59d2e5f
Showing 1 changed file with 39 additions and 2 deletions.
41 changes: 39 additions & 2 deletions demo/app.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import re
from pathlib import Path

import numpy as np
import streamlit as st

from document_to_podcast.podcast_maker.script_to_audio import save_waveform_as_file
from document_to_podcast.preprocessing import DATA_LOADERS, DATA_CLEANERS
from document_to_podcast.inference.model_loaders import (
load_llama_cpp_model,
Expand Down Expand Up @@ -48,6 +50,21 @@ def load_text_to_speech_model_and_tokenizer():
return load_parler_tts_model_and_tokenizer("parler-tts/parler-tts-mini-v1", "cpu")


script = "script"
audio = "audio"
gen_button = "generate podcast button"
if script not in st.session_state:
st.session_state[script] = ""
if audio not in st.session_state:
st.session_state.audio = []
if gen_button not in st.session_state:
st.session_state[gen_button] = False


def gen_button_clicked():
st.session_state[gen_button] = True


st.title("Document To Podcast")

st.header("Uploading Data")
Expand Down Expand Up @@ -123,15 +140,17 @@ def load_text_to_speech_model_and_tokenizer():

system_prompt = st.text_area("Podcast generation prompt", value=PODCAST_PROMPT)

if st.button("Generate Podcast"):
if st.button("Generate Podcast", on_click=gen_button_clicked):
with st.spinner("Generating Podcast..."):
text = ""
for chunk in text_to_text_stream(
clean_text, text_model, system_prompt=system_prompt.strip()
):
text += chunk
if text.endswith("\n") and "Speaker" in text:
st.write(text)
st.session_state.script += text
st.write(st.session_state.script)

speaker_id = re.search(r"Speaker (\d+)", text).group(1)
with st.spinner("Generating Audio..."):
speech = text_to_speech(
Expand All @@ -141,4 +160,22 @@ def load_text_to_speech_model_and_tokenizer():
SPEAKER_DESCRIPTIONS[speaker_id],
)
st.audio(speech, sample_rate=speech_model.config.sampling_rate)
st.session_state.audio.append(speech)
text = ""

if st.session_state[gen_button]:
if st.button("Save Podcast to audio file"):
st.session_state.audio = np.concatenate(st.session_state.audio)
save_waveform_as_file(
waveform=st.session_state.audio,
sampling_rate=speech_model.config.sampling_rate,
filename="podcast.wav",
)
st.markdown("Podcast saved to disk!")

if st.button("Save Podcast script to text file"):
with open("script.txt", "w") as f:
st.session_state.script += "}"
f.write(st.session_state.script)

st.markdown("Script saved to disk!")

0 comments on commit 59d2e5f

Please sign in to comment.