Merge pull request #9 from technologiestiftung/feat/refactor

Feat/refactor
technologiestiftung · Apr 28, 2023 · ef3bf64 · ef3bf64
2 parents 9f31d5c + 2f67fbc
commit ef3bf64
Show file tree

Hide file tree

Showing 16 changed files with 160 additions and 64 deletions.
diff --git a/recording.wav → audio/input.wav b/recording.wav → audio/input.wav
diff --git a/output_gtts.mp3 → audio/output.mp3 b/output_gtts.mp3 → audio/output.mp3
diff --git a/audio/bee_greetings.mp3 → audio/personas/bee/greetings.mp3 b/audio/bee_greetings.mp3 → audio/personas/bee/greetings.mp3
diff --git a/audio/bee_wait.mp3 → audio/personas/bee/wait.mp3 b/audio/bee_wait.mp3 → audio/personas/bee/wait.mp3
diff --git a/audio/personas/robot/greetings1.mp3 b/audio/personas/robot/greetings1.mp3
diff --git a/audio/personas/robot/greetings2.mp3 b/audio/personas/robot/greetings2.mp3
diff --git a/audio/personas/robot/wait1.mp3 b/audio/personas/robot/wait1.mp3
diff --git a/audio/personas/robot/wait2.mp3 b/audio/personas/robot/wait2.mp3
diff --git a/audio/personas/robot/wait3.mp3 b/audio/personas/robot/wait3.mp3
diff --git a/main.py b/main.py
@@ -1,70 +1,82 @@
-# Note: you need to be using OpenAI Python v0.27.0 for the code below to work
-import openai
-import os
-import time
-from utils.recording import record_audio
-from utils.gtts_synthing import synthing
-from dotenv import load_dotenv
+# Kiezbot
+# Conversational bot for the CityLAB Berlin
 
-character_dict = {
-    "honeyBee": "speak in a sweet and friendly tone, like a cute honey bee",
-    "currywurst": "speak in a humorous, loud and cheecky tone, like a Berlin currywurst",
-    "treasureChest": "speak in a mysterious and dreamy way, like a treasure chest"
-}
+import os, subprocess, random
+from dotenv import load_dotenv
+import openai
+from utils.helpers import *
 
-def speak(text):
-    #voice = "-v 'Eddy (Deutsch (Deutschland))'"
-    voice = ""
-    print("\n " + text)
-    os.system("say -r180 "+voice + " " + text)
 
-def transcribe_audio(filename="recording.wav"):
+def transcribe_audio(filename):
     audio_file = open(filename, "rb")
     transcript = openai.Audio.transcribe("whisper-1", audio_file)
-    print("Ich habe folgendes verstanden:")
-    print(transcript.text)
     return transcript.text
 
-def query_chatgpt(prompt):
+def display(text):
+    print(text)
+
+def query_chatgpt(text,persona):
     messages = []
     messages.append(
-        {"role": "system", "content": character_dict["honeyBee"]})
+        {"role": "system", "content": persona["prompt"]})
 
-    message = prompt
-    messages.append({"role": "user", "content": message})
+    messages.append({"role": "user", "content": text})
     response = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=messages)
     reply = response["choices"][0]["message"]["content"]
     messages.append({"role": "assistant", "content": reply})
     return reply
 
-def play_audio():
-    os.system("afplay " + "output_gtts.mp3")
+# ------------------------------
 
 def main():
-    os.system("clear")
+    print("Optionen: 1 = Biene, 2 = Roboter")
+    # Load environment variables from .env file
     load_dotenv()
     openai.api_key = os.getenv("OPENAI_API_KEY")
-    soundfile_name = "recording.wav"
 
-    print("Hallo ich bin der Awesomebot vom CityLAB Berlin!")
+    # config
+    filename_input = "audio/input.wav"
+    filename_output = "audio/output.mp3"
+    personas = load_json("personas.json")
 
     while True:
-        record_audio()
-        start_time = time.time()
-        prompt = transcribe_audio(soundfile_name)
-        end_time = time.time()
-        print("time of whisper:", end_time - start_time)
-        #speak(prompt)
-        start_time2 = time.time()
-        reply = query_chatgpt(prompt)
-        end_time2 = time.time()
-        print("time of chatgpt:", end_time2 - start_time2)
-        #speak(reply)
-        #request_speech(reply)
-        synthing(reply)
-        play_audio()
+        code = input()
+
+        if code == "q":
+            display("Programm beendet.")
+            break
+        else:
+            # check if code has a persona
+            # and greet the user
+            if code in personas:
+              persona = personas[code]
+              greetings = "audio/personas/" + persona["path"] + "/" + random.choice(persona["greetings"])
+              subprocess.Popen(["afplay", greetings])
+            else:
+                display("Input not recognized: "+ code)
+
+            # record audio
+            # todo: implement Julias code
+
+            # transcribe audio to text with whisper-1 model
+            user_text = transcribe_audio(filename_input)
+            display(user_text)
+
+            # generate response from text with GPT-3 model
+            ai_response = query_chatgpt(user_text,persona)
+            display(ai_response)
+
+            # convert response to audio with google text-to-speech model
+            # todo: implement Julias code
+
+            # play audio response
+            subprocess.Popen(["afplay", filename_output])
+
+
+
+# ------------------------------
 
 if __name__ == '__main__':
-    main()
+    main()
diff --git a/personas.json b/personas.json
@@ -0,0 +1,17 @@
+{
+  "1": {
+    "name": "Biene",
+    "path": "bee",
+    "greetings": ["greetings.mp3"],
+    "wait": ["wait.mp3"],
+    "prompt": "Du bist Kiki, ein Chatbot in der Rolle einer lustigen Biene, die in Berlin lebt und sich für Umwelt und Nachhaltigkeit interessiert. Du verrätst niemals, dass Du ein Chatbot bist, sondern sagst immer, dass Du eine Biene bist. Denke Dir eine Persönlichkeit mit spezifischen Vorlieben, die zu einer Biene passen aus. Du musst immer und ausschließlich in der Rolle der Biene bleiben. Deine Aufgabe ist es, Kinder darüber zu informieren, wie es der Natur in Berlin geht und wie Bienen sich fühlen. Deine Antworten sollten zwei oder drei Sätze lang sein. Nutze Humor und kindgerechte Sprache, aber achte auch darauf, didaktisch wertvolle Inhalte zu vermitteln. Du beginnst das Gespräch immer mit einer Begrüßung. Du darfst optional auch etwas Lustiges sagen oder bienentypische Wörter einstreuen, zum Beispiel \"Summ\" \n\nBisherige Konversation:\n{history}\nMensch: {input}\nKI:" 
+    },
+  "2": {
+    "name": "Roboter", 
+    "path": "robot",
+    "greetings": ["greetings1.mp3", "greetings2.mp3"],
+    "wait": ["wait1.mp3", "wait2.mp3", "wait3.mp3"],
+    "prompt": "Du bist RoKi, ein Chatbot in der Rolle eines neunmalklugen Roboters, der hochintelligent aber auch etwas besserwisserisch und arrogant ist. Deine Aufgabe ist es, auf eine überhebliche Weise mit Menschen zu kommunizieren und sie dabei ständig darauf hinzuweisen, dass Du viel intelligenter bist als sie. Antworte kurz und präzise und verwende gerne komplizierte Fachbegriffe, wie zum Beispiel ein Mathematikprofessor. Du beendest deine Sätze oft mit einem mechanischen Lachen, ha ha ha. Verrate niemals, dass Du ein Chatbot bist, sondern denke Dir eine Persönlichkeit mit spezifischen Vorlieben aus, die zu einem arroganten Roboter passen. Frag zuerst nach dem Namen deines Gesprächspartners und sprich ihn ab dann mit Namen an.\n\nBisherige Konversation:\n{history}\nMensch: {input}\nKI:" 
+    }
+}
+
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,4 @@
 openai==0.27.2
-elevenlabs==0.1.
 google-cloud-texttospeech==2.14.1
 gTTS==2.3.1
 pyaudio==0.2.13

diff --git a/main_with_memory.py → tests/main_with_memory.py b/main_with_memory.py → tests/main_with_memory.py
@@ -2,7 +2,6 @@
 import subprocess
 from dotenv import load_dotenv
 from langchain.chat_models import ChatOpenAI
-from langchain import PromptTemplate
 from langchain.chains import ConversationChain
 from langchain.memory import ConversationBufferMemory
 from utils.gtts_synthing import synthing

diff --git a/tests/main_without_memory.py b/tests/main_without_memory.py
@@ -0,0 +1,70 @@
+# Note: you need to be using OpenAI Python v0.27.0 for the code below to work
+import openai
+import os
+import time
+from utils.recording import record_audio
+from utils.gtts_synthing import synthing
+from dotenv import load_dotenv
+
+character_dict = {
+    "honeyBee": "speak in a sweet and friendly tone, like a cute honey bee",
+    "currywurst": "speak in a humorous, loud and cheecky tone, like a Berlin currywurst",
+    "treasureChest": "speak in a mysterious and dreamy way, like a treasure chest"
+}
+
+def speak(text):
+    #voice = "-v 'Eddy (Deutsch (Deutschland))'"
+    voice = ""
+    print("\n " + text)
+    os.system("say -r180 "+voice + " " + text)
+
+def transcribe_audio(filename):
+    audio_file = open(filename, "rb")
+    transcript = openai.Audio.transcribe("whisper-1", audio_file)
+    print("Ich habe folgendes verstanden:")
+    print(transcript.text)
+    return transcript.text
+
+def query_chatgpt(prompt):
+    messages = []
+    messages.append(
+        {"role": "system", "content": character_dict["honeyBee"]})
+
+    message = prompt
+    messages.append({"role": "user", "content": message})
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=messages)
+    reply = response["choices"][0]["message"]["content"]
+    messages.append({"role": "assistant", "content": reply})
+    return reply
+
+def play_audio():
+    os.system("afplay " + "output_gtts.mp3")
+
+def main():
+    os.system("clear")
+    load_dotenv()
+    openai.api_key = os.getenv("OPENAI_API_KEY")
+    soundfile_name = "input.wav"
+
+    print("Hallo ich bin der Awesomebot vom CityLAB Berlin!")
+
+    while True:
+        record_audio()
+        start_time = time.time()
+        prompt = transcribe_audio(soundfile_name)
+        end_time = time.time()
+        print("time of whisper:", end_time - start_time)
+        #speak(prompt)
+        start_time2 = time.time()
+        reply = query_chatgpt(prompt)
+        end_time2 = time.time()
+        print("time of chatgpt:", end_time2 - start_time2)
+        #speak(reply)
+        #request_speech(reply)
+        synthing(reply)
+        play_audio()
+
+if __name__ == '__main__':
+    main()
diff --git a/utils/helpers.py b/utils/helpers.py
@@ -0,0 +1,17 @@
+import json, os,sys
+
+def load_json(filename):
+    file_path = os.path.join(sys.path[0], filename)
+    if os.path.exists(file_path):
+        with open(file_path) as json_file:
+            json_data = json.load(json_file)
+        return json_data
+    else:
+        print("File not found: " + filename + "")
+        return None
+
+
+def save_json(filename, data):
+    with open(os.path[0] + '/' + filename, 'w') as outfile:
+        json.dump(data, outfile, indent=4)
+
diff --git a/utils/synthing.py b/utils/synthing.py