From 5bee915f6d5ad77b881e12c891eda1777913d9da Mon Sep 17 00:00:00 2001 From: hashcode-ankit <2698ankitsharma@gmail.com> Date: Sun, 3 Sep 2023 20:18:54 +0530 Subject: [PATCH 1/6] weaviate vector database feature added for openai only --- examples/openai-bot/main.py | 10 ++++++- textbase/__init__.py | 4 ++- textbase/models.py | 59 ++++++++++++++++++++++++++----------- textbase/vector_database.py | 52 ++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 20 deletions(-) create mode 100644 textbase/vector_database.py diff --git a/examples/openai-bot/main.py b/examples/openai-bot/main.py index ebc25837..bb92f965 100644 --- a/examples/openai-bot/main.py +++ b/examples/openai-bot/main.py @@ -3,7 +3,15 @@ from typing import List # Load your OpenAI API key -OpenAI.api_key = "" +OpenAI.api_key = "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" + +# optional if you want to use vector database +# currently textbase support weaviate +OpenAI.vector_db_host = "http://20.193.138.42:8080" + +OpenAI.vector_db_data_class = "Documents" +OpenAI.max_vector_database_objects = 1 + # Prompt for GPT-3.5 Turbo SYSTEM_PROMPT = """You are chatting with an AI. There are no specific prefixes for responses, so you can ask or talk about anything you like. diff --git a/textbase/__init__.py b/textbase/__init__.py index a19f84ae..5e923b6a 100644 --- a/textbase/__init__.py +++ b/textbase/__init__.py @@ -1,2 +1,4 @@ from .bot import bot -from .message import Message \ No newline at end of file +from .message import Message +from .message import Content +from .vector_database import WeaviateClass \ No newline at end of file diff --git a/textbase/models.py b/textbase/models.py index 814ed533..0124eb41 100644 --- a/textbase/models.py +++ b/textbase/models.py @@ -4,8 +4,8 @@ import time import typing import traceback - -from textbase import Message +from textbase import Message, WeaviateClass +# from vector_database import Weaviate # Return list of values of content. def get_contents(message: Message, data_type: str): @@ -28,7 +28,10 @@ def extract_content_values(message: Message): class OpenAI: api_key = None - + vector_db_host = None + vector_db_auth_key = None + vector_db_data_class = None + max_vector_database_objects = None @classmethod def generate( cls, @@ -40,7 +43,6 @@ def generate( ): assert cls.api_key is not None, "OpenAI API key is not set." openai.api_key = cls.api_key - filtered_messages = [] for message in message_history: @@ -48,20 +50,41 @@ def generate( contents = get_contents(message, "STRING") if contents: filtered_messages.extend(contents) - - response = openai.ChatCompletion.create( - model=model, - messages=[ - { - "role": "system", - "content": system_prompt - }, - *map(dict, filtered_messages), - ], - temperature=temperature, - max_tokens=max_tokens, - ) - + weaviate_response = None + # if vector database host provided get response from weaviate + if cls.vector_db_host : + WeaviateClass.host = cls.vector_db_host + WeaviateClass.auth_key = cls.vector_db_auth_key + WeaviateClass.api_key = cls.api_key + WeaviateClass.vector_db_data_class = cls.vector_db_data_class + weaviate_response = WeaviateClass.search_in_weaviate(message_history[-1],"X-OpenAI-Api-Key",cls.max_vector_database_objects) + + response = openai.ChatCompletion.create( + model=model, + messages=[ + { + "role": "system", + "content": system_prompt + }, + *map(dict, filtered_messages), + weaviate_response, + ], + temperature=temperature, + max_tokens=max_tokens, + ) + else : + response = openai.ChatCompletion.create( + model=model, + messages=[ + { + "role": "system", + "content": system_prompt + }, + *map(dict, filtered_messages), + ], + temperature=temperature, + max_tokens=max_tokens, + ) return response["choices"][0]["message"]["content"] class HuggingFace: diff --git a/textbase/vector_database.py b/textbase/vector_database.py new file mode 100644 index 00000000..9fab96ff --- /dev/null +++ b/textbase/vector_database.py @@ -0,0 +1,52 @@ +import weaviate +from textbase import Message,Content +import json +from langchain.vectorstores import Weaviate +from langchain.embeddings.openai import OpenAIEmbeddings + +class WeaviateClass: + api_key = None + host = None + auth_key = None + vector_db_data_class = None + @classmethod + def search_in_weaviate( + cls, + message_query:Message, + model_header_key: str, + max_vector_database_objects: int, + ): + assert cls.api_key is not None, "OpenAI API key is not set." + assert cls.host is not None, "Waaviate Host is not set." + # auth key is optional so can be skipped for None verification + weaviate_client = weaviate.Client( + url = cls.host, + auth_client_secret=cls.auth_key, + additional_headers = { + model_header_key: cls.api_key, + } + ) + user_query = message_query['content'][0]['value'] + embeddings = OpenAIEmbeddings( + openai_api_key = cls.api_key + ) + + weaviate_vectorstore = Weaviate( + weaviate_client, + "Documents", + "text", + embedding = embeddings + ) + weaviate_response = weaviate_vectorstore.similarity_search(user_query) + messages = [] + for response in weaviate_response: + messages.append(response.page_content) + + return { + "role": message_query['role'], + # token limit will exceed so taking only 1000 char + "content": json.dumps(messages,indent=4)[:1000] + } + + + \ No newline at end of file From 799cfaf811e8b4bbd1f1a2019497990c38a9d987 Mon Sep 17 00:00:00 2001 From: hashcode-ankit <2698ankitsharma@gmail.com> Date: Sun, 3 Sep 2023 20:26:17 +0530 Subject: [PATCH 2/6] reformatting --- examples/openai-bot/main.py | 9 +++++---- textbase/models.py | 3 +-- textbase/vector_database.py | 8 +++++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/examples/openai-bot/main.py b/examples/openai-bot/main.py index bb92f965..638750c6 100644 --- a/examples/openai-bot/main.py +++ b/examples/openai-bot/main.py @@ -3,14 +3,15 @@ from typing import List # Load your OpenAI API key -OpenAI.api_key = "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" +OpenAI.api_key = "" # optional if you want to use vector database # currently textbase support weaviate -OpenAI.vector_db_host = "http://20.193.138.42:8080" - +OpenAI.vector_db_host = "http://20.345.234.42:8080" +# vector database class where you stored the objects OpenAI.vector_db_data_class = "Documents" -OpenAI.max_vector_database_objects = 1 +# lenght of response you want to take from weaviate +OpenAI.max_weaviate_res_length = 1000 # Prompt for GPT-3.5 Turbo diff --git a/textbase/models.py b/textbase/models.py index 0124eb41..ee0a3410 100644 --- a/textbase/models.py +++ b/textbase/models.py @@ -5,7 +5,6 @@ import typing import traceback from textbase import Message, WeaviateClass -# from vector_database import Weaviate # Return list of values of content. def get_contents(message: Message, data_type: str): @@ -56,7 +55,7 @@ def generate( WeaviateClass.host = cls.vector_db_host WeaviateClass.auth_key = cls.vector_db_auth_key WeaviateClass.api_key = cls.api_key - WeaviateClass.vector_db_data_class = cls.vector_db_data_class + WeaviateClass.max_weaviate_res_length = cls.max_weaviate_res_length weaviate_response = WeaviateClass.search_in_weaviate(message_history[-1],"X-OpenAI-Api-Key",cls.max_vector_database_objects) response = openai.ChatCompletion.create( diff --git a/textbase/vector_database.py b/textbase/vector_database.py index 9fab96ff..f2d087dc 100644 --- a/textbase/vector_database.py +++ b/textbase/vector_database.py @@ -18,6 +18,7 @@ def search_in_weaviate( ): assert cls.api_key is not None, "OpenAI API key is not set." assert cls.host is not None, "Waaviate Host is not set." + assert cls.max_weaviate_res_length is not None,"max_weaviate_res_length is not set " # auth key is optional so can be skipped for None verification weaviate_client = weaviate.Client( url = cls.host, @@ -26,6 +27,7 @@ def search_in_weaviate( model_header_key: cls.api_key, } ) + # take out user query user_query = message_query['content'][0]['value'] embeddings = OpenAIEmbeddings( openai_api_key = cls.api_key @@ -33,7 +35,7 @@ def search_in_weaviate( weaviate_vectorstore = Weaviate( weaviate_client, - "Documents", + cls.vector_db_data_class, "text", embedding = embeddings ) @@ -44,8 +46,8 @@ def search_in_weaviate( return { "role": message_query['role'], - # token limit will exceed so taking only 1000 char - "content": json.dumps(messages,indent=4)[:1000] + # token limit will exceed so taking limit + "content": json.dumps(messages,indent=4)[:cls.max_weaviate_res_length] } From 25f3479eb6c94269d704ca139f4e0ad6d52be377 Mon Sep 17 00:00:00 2001 From: hashcode-ankit <2698ankitsharma@gmail.com> Date: Sun, 3 Sep 2023 20:44:09 +0530 Subject: [PATCH 3/6] maxmax_weaviate_res_length introduces --- examples/openai-bot/main.py | 12 +++++------- textbase/models.py | 5 +++-- textbase/vector_database.py | 1 - 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/examples/openai-bot/main.py b/examples/openai-bot/main.py index 638750c6..e24ca1cd 100644 --- a/examples/openai-bot/main.py +++ b/examples/openai-bot/main.py @@ -3,21 +3,19 @@ from typing import List # Load your OpenAI API key -OpenAI.api_key = "" +OpenAI.api_key = "sk-6IUNBlbkFJkzBaDKIYHubR6q2I23" # optional if you want to use vector database # currently textbase support weaviate -OpenAI.vector_db_host = "http://20.345.234.42:8080" -# vector database class where you stored the objects +OpenAI.vector_db_host = "http://193.123.12.42:8080" OpenAI.vector_db_data_class = "Documents" -# lenght of response you want to take from weaviate -OpenAI.max_weaviate_res_length = 1000 +OpenAI.max_weaviate_res_length = 1 # Prompt for GPT-3.5 Turbo SYSTEM_PROMPT = """You are chatting with an AI. There are no specific prefixes for responses, so you can ask or talk about anything you like. The AI will respond in a natural, conversational manner. Feel free to start the conversation with any question or topic, and let's have a -pleasant chat! +pleasant chat! You can use the data given at """ @bot() @@ -50,4 +48,4 @@ def on_message(message_history: List[Message], state: dict = None): return { "status_code": 200, "response": response - } \ No newline at end of file + } diff --git a/textbase/models.py b/textbase/models.py index ee0a3410..54dda705 100644 --- a/textbase/models.py +++ b/textbase/models.py @@ -30,7 +30,7 @@ class OpenAI: vector_db_host = None vector_db_auth_key = None vector_db_data_class = None - max_vector_database_objects = None + max_weaviate_res_length = None @classmethod def generate( cls, @@ -56,7 +56,8 @@ def generate( WeaviateClass.auth_key = cls.vector_db_auth_key WeaviateClass.api_key = cls.api_key WeaviateClass.max_weaviate_res_length = cls.max_weaviate_res_length - weaviate_response = WeaviateClass.search_in_weaviate(message_history[-1],"X-OpenAI-Api-Key",cls.max_vector_database_objects) + WeaviateClass.vector_db_data_class = cls.vector_db_data_class + weaviate_response = WeaviateClass.search_in_weaviate(message_history[-1],"X-OpenAI-Api-Key") response = openai.ChatCompletion.create( model=model, diff --git a/textbase/vector_database.py b/textbase/vector_database.py index f2d087dc..f35f8a36 100644 --- a/textbase/vector_database.py +++ b/textbase/vector_database.py @@ -14,7 +14,6 @@ def search_in_weaviate( cls, message_query:Message, model_header_key: str, - max_vector_database_objects: int, ): assert cls.api_key is not None, "OpenAI API key is not set." assert cls.host is not None, "Waaviate Host is not set." From 25350a18bf51a49b550228e325f48f11ff1cf225 Mon Sep 17 00:00:00 2001 From: hashcode-ankit <2698ankitsharma@gmail.com> Date: Sun, 3 Sep 2023 21:27:31 +0530 Subject: [PATCH 4/6] more refactoring --- examples/openai-bot/main.py | 11 ++++---- textbase/models.py | 56 +++++++++++++------------------------ textbase/vector_database.py | 33 ++++++++++------------ 3 files changed, 41 insertions(+), 59 deletions(-) diff --git a/examples/openai-bot/main.py b/examples/openai-bot/main.py index e24ca1cd..fd62c7a1 100644 --- a/examples/openai-bot/main.py +++ b/examples/openai-bot/main.py @@ -3,19 +3,20 @@ from typing import List # Load your OpenAI API key -OpenAI.api_key = "sk-6IUNBlbkFJkzBaDKIYHubR6q2I23" +OpenAI.api_key = "" # optional if you want to use vector database # currently textbase support weaviate -OpenAI.vector_db_host = "http://193.123.12.42:8080" -OpenAI.vector_db_data_class = "Documents" -OpenAI.max_weaviate_res_length = 1 +OpenAI.weaviate_host = "http://12.345.138.42:8080" +OpenAI.weaviate_data_class = "Documents" +# if using free OpenAI key set limit to avoid limit reach error +OpenAI.max_weaviate_res_length = 1000 # Prompt for GPT-3.5 Turbo SYSTEM_PROMPT = """You are chatting with an AI. There are no specific prefixes for responses, so you can ask or talk about anything you like. The AI will respond in a natural, conversational manner. Feel free to start the conversation with any question or topic, and let's have a -pleasant chat! You can use the data given at +pleasant chat!. Use the information provided by vector database here : {vector_database_response}. """ @bot() diff --git a/textbase/models.py b/textbase/models.py index 54dda705..272746a6 100644 --- a/textbase/models.py +++ b/textbase/models.py @@ -27,9 +27,9 @@ def extract_content_values(message: Message): class OpenAI: api_key = None - vector_db_host = None - vector_db_auth_key = None - vector_db_data_class = None + weaviate_host = None + weaviate_auth_key = None + weaviate_data_class = None max_weaviate_res_length = None @classmethod def generate( @@ -49,42 +49,26 @@ def generate( contents = get_contents(message, "STRING") if contents: filtered_messages.extend(contents) + weaviate_response = None # if vector database host provided get response from weaviate - if cls.vector_db_host : - WeaviateClass.host = cls.vector_db_host - WeaviateClass.auth_key = cls.vector_db_auth_key - WeaviateClass.api_key = cls.api_key - WeaviateClass.max_weaviate_res_length = cls.max_weaviate_res_length - WeaviateClass.vector_db_data_class = cls.vector_db_data_class - weaviate_response = WeaviateClass.search_in_weaviate(message_history[-1],"X-OpenAI-Api-Key") + if cls.weaviate_host : + weaviate_response = WeaviateClass.search_in_weaviate(cls.api_key,cls.weaviate_host,cls.weaviate_auth_key,cls.weaviate_data_class,message_history[-1],cls.max_weaviate_res_length,"X-OpenAI-Api-Key") - response = openai.ChatCompletion.create( - model=model, - messages=[ - { - "role": "system", - "content": system_prompt - }, - *map(dict, filtered_messages), - weaviate_response, - ], - temperature=temperature, - max_tokens=max_tokens, - ) - else : - response = openai.ChatCompletion.create( - model=model, - messages=[ - { - "role": "system", - "content": system_prompt - }, - *map(dict, filtered_messages), - ], - temperature=temperature, - max_tokens=max_tokens, - ) + # append the vector databases result in system prompt for better answers + system_prompt= system_prompt.format(vector_database_response = weaviate_response) + response = openai.ChatCompletion.create( + model=model, + messages=[ + { + "role": "system", + "content": system_prompt + }, + *map(dict, filtered_messages), + ], + temperature=temperature, + max_tokens=max_tokens, + ) return response["choices"][0]["message"]["content"] class HuggingFace: diff --git a/textbase/vector_database.py b/textbase/vector_database.py index f35f8a36..2b6c32c8 100644 --- a/textbase/vector_database.py +++ b/textbase/vector_database.py @@ -5,36 +5,33 @@ from langchain.embeddings.openai import OpenAIEmbeddings class WeaviateClass: - api_key = None - host = None - auth_key = None - vector_db_data_class = None @classmethod def search_in_weaviate( cls, + api_key: str, + host: str, + auth_key:str, + weaviate_data_class, message_query:Message, + max_weaviate_res_length: int, model_header_key: str, ): - assert cls.api_key is not None, "OpenAI API key is not set." - assert cls.host is not None, "Waaviate Host is not set." - assert cls.max_weaviate_res_length is not None,"max_weaviate_res_length is not set " - # auth key is optional so can be skipped for None verification weaviate_client = weaviate.Client( - url = cls.host, - auth_client_secret=cls.auth_key, + url = host, + auth_client_secret=auth_key, additional_headers = { - model_header_key: cls.api_key, + model_header_key: api_key, } ) # take out user query user_query = message_query['content'][0]['value'] embeddings = OpenAIEmbeddings( - openai_api_key = cls.api_key + openai_api_key = api_key ) weaviate_vectorstore = Weaviate( weaviate_client, - cls.vector_db_data_class, + weaviate_data_class, "text", embedding = embeddings ) @@ -42,12 +39,12 @@ def search_in_weaviate( messages = [] for response in weaviate_response: messages.append(response.page_content) + response_string = json.dumps(messages) - return { - "role": message_query['role'], - # token limit will exceed so taking limit - "content": json.dumps(messages,indent=4)[:cls.max_weaviate_res_length] - } + # if token limit exceed error come user can configure max_weaviate_res_length to be considered for output + if max_weaviate_res_length and len(response_string)>max_weaviate_res_length : + response_string = response_string[:max_weaviate_res_length] + return response_string \ No newline at end of file From b2f23ee76026f0d52aac7ecd8c6a22ac1bebaf8b Mon Sep 17 00:00:00 2001 From: hashcode-ankit <2698ankitsharma@gmail.com> Date: Sun, 3 Sep 2023 21:35:07 +0530 Subject: [PATCH 5/6] todo added --- textbase/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/textbase/models.py b/textbase/models.py index 272746a6..e3184736 100644 --- a/textbase/models.py +++ b/textbase/models.py @@ -51,9 +51,10 @@ def generate( filtered_messages.extend(contents) weaviate_response = None - # if vector database host provided get response from weaviate + # if weaviate_host provided get response from weaviate if cls.weaviate_host : weaviate_response = WeaviateClass.search_in_weaviate(cls.api_key,cls.weaviate_host,cls.weaviate_auth_key,cls.weaviate_data_class,message_history[-1],cls.max_weaviate_res_length,"X-OpenAI-Api-Key") + # Todo: support for other vector database # append the vector databases result in system prompt for better answers system_prompt= system_prompt.format(vector_database_response = weaviate_response) From 96882694dcb34355b4129b1310c808489ad33600 Mon Sep 17 00:00:00 2001 From: hashcode-ankit <2698ankitsharma@gmail.com> Date: Sun, 3 Sep 2023 22:52:50 +0530 Subject: [PATCH 6/6] documentation update --- docs/docs/examples/openai-bot.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/docs/examples/openai-bot.md b/docs/docs/examples/openai-bot.md index a7df7b7c..b981eff3 100644 --- a/docs/docs/examples/openai-bot.md +++ b/docs/docs/examples/openai-bot.md @@ -14,10 +14,16 @@ from typing import List # Load your OpenAI API key OpenAI.api_key = "" +# For using weaviate vector database +OpenAI.weaviate_host = "http://23.345.138.42:8080" +OpenAI.weaviate_data_class = "Documents" +# if using free OpenAI key set limit to avoid limit reach error +OpenAI.max_weaviate_res_length = 1000 # Prompt for GPT-3.5 Turbo SYSTEM_PROMPT = """You are chatting with an AI. There are no specific prefixes for responses, so you can ask or talk about anything you like. The AI will respond in a natural, conversational manner. Feel free to start the conversation with any question or topic, and let's have a pleasant chat! +Use the information provided by vector database here : {vector_database_response}. """ @bot()