Skip to content

Commit

Permalink
Merge pull request #37 from KeneNwogu/main
Browse files Browse the repository at this point in the history
fix: add speech outputs to sidebar, user can optionally play. fixes #35
  • Loading branch information
Sammybams authored Oct 21, 2024
2 parents 66deea9 + 1552303 commit 5dccb95
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 7 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ pyvenv.cfg
*__pycache__/
*.DS_Store
*.venv/
*.wav
*.wav
speech_outputs/
17 changes: 14 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
import openai
import uuid

# Set up page configuration
st.set_page_config(page_title="Speak-To-Docs", page_icon="📝", layout="wide", initial_sidebar_state="expanded")
Expand Down Expand Up @@ -94,14 +95,21 @@ def get_llm() -> ChatOpenAI:
logging.error(f"Error extracting content from document: {e}")
else:
st.session_state.uploaded_files = None


st.subheader("Speech output responses")
if 'speech_outputs' in st.session_state:
for speech_output in st.session_state.speech_outputs:
st.audio(os.path.join('speech_outputs', speech_output), format="audio/wav", start_time=0)

def send_response(message, response=None):
dummy_response = "Hello. How are you?"
st.session_state.messages.append(('assistant', response or dummy_response))

# TODO: make async ??
print(response or dummy_response)
synthesize_speech(text=response or dummy_response)
# generate unique file name
output_file = uuid.uuid4().hex + ".wav"
synthesize_speech(output_file=output_file, text=response or dummy_response)
st.session_state.speech_outputs.append(output_file)


# Chat area and audio input handling
Expand All @@ -115,6 +123,9 @@ def send_message():

if 'messages' not in st.session_state:
st.session_state.messages = []

if 'speech_outputs' not in st.session_state:
st.session_state.speech_outputs = []

message = st.container()

Expand Down
10 changes: 7 additions & 3 deletions src/speech_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,24 @@ def synthesize_speech(text, output_file="output.wav", voice_name='en-NG-EzinneNe
"""
if not SPEECH_KEY or not SPEECH_REGION:
return False, "Azure Speech Service credentials are missing."

path = "speech_outputs"
os.makedirs(path, exist_ok=True)
output_file = os.path.join(path, output_file)

output = open(output_file, 'w+')
output.close()

try:
# Configure speech service
speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)
# audio_config = speechsdk.audio.AudioOutputConfig(filename=output_file)
speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)
audio_config = speechsdk.audio.AudioOutputConfig(filename=output_file)

# Set the voice for synthesis
speech_config.speech_synthesis_voice_name = voice_name

# Create synthesizer and generate speech
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
result = speech_synthesizer.speak_text_async(text).get()

# Handle the result
Expand Down

0 comments on commit 5dccb95

Please sign in to comment.