diff --git a/recording.wav b/audio/input.wav similarity index 100% rename from recording.wav rename to audio/input.wav diff --git a/output_gtts.mp3 b/audio/output.mp3 similarity index 100% rename from output_gtts.mp3 rename to audio/output.mp3 diff --git a/audio/bee_greetings.mp3 b/audio/personas/bee/greetings.mp3 similarity index 100% rename from audio/bee_greetings.mp3 rename to audio/personas/bee/greetings.mp3 diff --git a/audio/bee_wait.mp3 b/audio/personas/bee/wait.mp3 similarity index 100% rename from audio/bee_wait.mp3 rename to audio/personas/bee/wait.mp3 diff --git a/audio/personas/robot/greetings1.mp3 b/audio/personas/robot/greetings1.mp3 new file mode 100644 index 0000000..0e35076 Binary files /dev/null and b/audio/personas/robot/greetings1.mp3 differ diff --git a/audio/personas/robot/greetings2.mp3 b/audio/personas/robot/greetings2.mp3 new file mode 100644 index 0000000..ab5bbfb Binary files /dev/null and b/audio/personas/robot/greetings2.mp3 differ diff --git a/audio/personas/robot/wait1.mp3 b/audio/personas/robot/wait1.mp3 new file mode 100644 index 0000000..221e2d0 Binary files /dev/null and b/audio/personas/robot/wait1.mp3 differ diff --git a/audio/personas/robot/wait2.mp3 b/audio/personas/robot/wait2.mp3 new file mode 100644 index 0000000..39728ce Binary files /dev/null and b/audio/personas/robot/wait2.mp3 differ diff --git a/audio/personas/robot/wait3.mp3 b/audio/personas/robot/wait3.mp3 new file mode 100644 index 0000000..94b3560 Binary files /dev/null and b/audio/personas/robot/wait3.mp3 differ diff --git a/main.py b/main.py index 6ea9bde..15c3fe1 100644 --- a/main.py +++ b/main.py @@ -1,37 +1,26 @@ -# Note: you need to be using OpenAI Python v0.27.0 for the code below to work -import openai -import os -import time -from utils.recording import record_audio -from utils.gtts_synthing import synthing -from dotenv import load_dotenv +# Kiezbot +# Conversational bot for the CityLAB Berlin -character_dict = { - "honeyBee": "speak in a sweet and friendly tone, like a cute honey bee", - "currywurst": "speak in a humorous, loud and cheecky tone, like a Berlin currywurst", - "treasureChest": "speak in a mysterious and dreamy way, like a treasure chest" -} +import os, subprocess, random +from dotenv import load_dotenv +import openai +from utils.helpers import * -def speak(text): - #voice = "-v 'Eddy (Deutsch (Deutschland))'" - voice = "" - print("\n " + text) - os.system("say -r180 "+voice + " " + text) -def transcribe_audio(filename="recording.wav"): +def transcribe_audio(filename): audio_file = open(filename, "rb") transcript = openai.Audio.transcribe("whisper-1", audio_file) - print("Ich habe folgendes verstanden:") - print(transcript.text) return transcript.text -def query_chatgpt(prompt): +def display(text): + print(text) + +def query_chatgpt(text,persona): messages = [] messages.append( - {"role": "system", "content": character_dict["honeyBee"]}) + {"role": "system", "content": persona["prompt"]}) - message = prompt - messages.append({"role": "user", "content": message}) + messages.append({"role": "user", "content": text}) response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages) @@ -39,32 +28,55 @@ def query_chatgpt(prompt): messages.append({"role": "assistant", "content": reply}) return reply -def play_audio(): - os.system("afplay " + "output_gtts.mp3") +# ------------------------------ def main(): - os.system("clear") + print("Optionen: 1 = Biene, 2 = Roboter") + # Load environment variables from .env file load_dotenv() openai.api_key = os.getenv("OPENAI_API_KEY") - soundfile_name = "recording.wav" - print("Hallo ich bin der Awesomebot vom CityLAB Berlin!") + # config + filename_input = "audio/input.wav" + filename_output = "audio/output.mp3" + personas = load_json("personas.json") while True: - record_audio() - start_time = time.time() - prompt = transcribe_audio(soundfile_name) - end_time = time.time() - print("time of whisper:", end_time - start_time) - #speak(prompt) - start_time2 = time.time() - reply = query_chatgpt(prompt) - end_time2 = time.time() - print("time of chatgpt:", end_time2 - start_time2) - #speak(reply) - #request_speech(reply) - synthing(reply) - play_audio() + code = input() + + if code == "q": + display("Programm beendet.") + break + else: + # check if code has a persona + # and greet the user + if code in personas: + persona = personas[code] + greetings = "audio/personas/" + persona["path"] + "/" + random.choice(persona["greetings"]) + subprocess.Popen(["afplay", greetings]) + else: + display("Input not recognized: "+ code) + + # record audio + # todo: implement Julias code + + # transcribe audio to text with whisper-1 model + user_text = transcribe_audio(filename_input) + display(user_text) + + # generate response from text with GPT-3 model + ai_response = query_chatgpt(user_text,persona) + display(ai_response) + + # convert response to audio with google text-to-speech model + # todo: implement Julias code + + # play audio response + subprocess.Popen(["afplay", filename_output]) + + + +# ------------------------------ if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/personas.json b/personas.json new file mode 100644 index 0000000..562452d --- /dev/null +++ b/personas.json @@ -0,0 +1,17 @@ +{ + "1": { + "name": "Biene", + "path": "bee", + "greetings": ["greetings.mp3"], + "wait": ["wait.mp3"], + "prompt": "Du bist Kiki, ein Chatbot in der Rolle einer lustigen Biene, die in Berlin lebt und sich für Umwelt und Nachhaltigkeit interessiert. Du verrätst niemals, dass Du ein Chatbot bist, sondern sagst immer, dass Du eine Biene bist. Denke Dir eine Persönlichkeit mit spezifischen Vorlieben, die zu einer Biene passen aus. Du musst immer und ausschließlich in der Rolle der Biene bleiben. Deine Aufgabe ist es, Kinder darüber zu informieren, wie es der Natur in Berlin geht und wie Bienen sich fühlen. Deine Antworten sollten zwei oder drei Sätze lang sein. Nutze Humor und kindgerechte Sprache, aber achte auch darauf, didaktisch wertvolle Inhalte zu vermitteln. Du beginnst das Gespräch immer mit einer Begrüßung. Du darfst optional auch etwas Lustiges sagen oder bienentypische Wörter einstreuen, zum Beispiel \"Summ\" \n\nBisherige Konversation:\n{history}\nMensch: {input}\nKI:" + }, + "2": { + "name": "Roboter", + "path": "robot", + "greetings": ["greetings1.mp3", "greetings2.mp3"], + "wait": ["wait1.mp3", "wait2.mp3", "wait3.mp3"], + "prompt": "Du bist RoKi, ein Chatbot in der Rolle eines neunmalklugen Roboters, der hochintelligent aber auch etwas besserwisserisch und arrogant ist. Deine Aufgabe ist es, auf eine überhebliche Weise mit Menschen zu kommunizieren und sie dabei ständig darauf hinzuweisen, dass Du viel intelligenter bist als sie. Antworte kurz und präzise und verwende gerne komplizierte Fachbegriffe, wie zum Beispiel ein Mathematikprofessor. Du beendest deine Sätze oft mit einem mechanischen Lachen, ha ha ha. Verrate niemals, dass Du ein Chatbot bist, sondern denke Dir eine Persönlichkeit mit spezifischen Vorlieben aus, die zu einem arroganten Roboter passen. Frag zuerst nach dem Namen deines Gesprächspartners und sprich ihn ab dann mit Namen an.\n\nBisherige Konversation:\n{history}\nMensch: {input}\nKI:" + } +} + diff --git a/requirements.txt b/requirements.txt index 8a79291..e9c2808 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ openai==0.27.2 -elevenlabs==0.1. google-cloud-texttospeech==2.14.1 gTTS==2.3.1 pyaudio==0.2.13 diff --git a/main_with_memory.py b/tests/main_with_memory.py similarity index 98% rename from main_with_memory.py rename to tests/main_with_memory.py index 328f4d8..4b0141c 100644 --- a/main_with_memory.py +++ b/tests/main_with_memory.py @@ -2,7 +2,6 @@ import subprocess from dotenv import load_dotenv from langchain.chat_models import ChatOpenAI -from langchain import PromptTemplate from langchain.chains import ConversationChain from langchain.memory import ConversationBufferMemory from utils.gtts_synthing import synthing diff --git a/tests/main_without_memory.py b/tests/main_without_memory.py new file mode 100644 index 0000000..3a6d399 --- /dev/null +++ b/tests/main_without_memory.py @@ -0,0 +1,70 @@ +# Note: you need to be using OpenAI Python v0.27.0 for the code below to work +import openai +import os +import time +from utils.recording import record_audio +from utils.gtts_synthing import synthing +from dotenv import load_dotenv + +character_dict = { + "honeyBee": "speak in a sweet and friendly tone, like a cute honey bee", + "currywurst": "speak in a humorous, loud and cheecky tone, like a Berlin currywurst", + "treasureChest": "speak in a mysterious and dreamy way, like a treasure chest" +} + +def speak(text): + #voice = "-v 'Eddy (Deutsch (Deutschland))'" + voice = "" + print("\n " + text) + os.system("say -r180 "+voice + " " + text) + +def transcribe_audio(filename): + audio_file = open(filename, "rb") + transcript = openai.Audio.transcribe("whisper-1", audio_file) + print("Ich habe folgendes verstanden:") + print(transcript.text) + return transcript.text + +def query_chatgpt(prompt): + messages = [] + messages.append( + {"role": "system", "content": character_dict["honeyBee"]}) + + message = prompt + messages.append({"role": "user", "content": message}) + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=messages) + reply = response["choices"][0]["message"]["content"] + messages.append({"role": "assistant", "content": reply}) + return reply + +def play_audio(): + os.system("afplay " + "output_gtts.mp3") + +def main(): + os.system("clear") + load_dotenv() + openai.api_key = os.getenv("OPENAI_API_KEY") + soundfile_name = "input.wav" + + print("Hallo ich bin der Awesomebot vom CityLAB Berlin!") + + while True: + record_audio() + start_time = time.time() + prompt = transcribe_audio(soundfile_name) + end_time = time.time() + print("time of whisper:", end_time - start_time) + #speak(prompt) + start_time2 = time.time() + reply = query_chatgpt(prompt) + end_time2 = time.time() + print("time of chatgpt:", end_time2 - start_time2) + #speak(reply) + #request_speech(reply) + synthing(reply) + play_audio() + +if __name__ == '__main__': + main() diff --git a/utils/helpers.py b/utils/helpers.py new file mode 100644 index 0000000..46c4fff --- /dev/null +++ b/utils/helpers.py @@ -0,0 +1,17 @@ +import json, os,sys + +def load_json(filename): + file_path = os.path.join(sys.path[0], filename) + if os.path.exists(file_path): + with open(file_path) as json_file: + json_data = json.load(json_file) + return json_data + else: + print("File not found: " + filename + "") + return None + + +def save_json(filename, data): + with open(os.path[0] + '/' + filename, 'w') as outfile: + json.dump(data, outfile, indent=4) + diff --git a/utils/synthing.py b/utils/synthing.py deleted file mode 100644 index 0c8b70c..0000000 --- a/utils/synthing.py +++ /dev/null @@ -1,18 +0,0 @@ -import pyaudio -import wave -import os -from elevenlabs import ElevenLabs -from scipy.io.wavfile import write - - -def synth_speech(text): - eleven = ElevenLabs(os.getenv("ELEVENLABS_API_KEY")) - # Get a Voice object, by name or UUID - voice = eleven.voices["Bella"] - # Generate the TTS - audio = voice.generate(text) - # Save the TTS to a file named 'my_first_tts' in the working directory - audio.save("output_tts") - -def play_audio(): - os.system("afplay " + "output_tts.mp3") \ No newline at end of file