diff --git a/app.py b/app.py
index 25e4523..a62fd62 100644
--- a/app.py
+++ b/app.py
@@ -1,11 +1,8 @@
 import os
 import uuid
-from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL
 import streamlit as st
 import datasets
-from langchain import hub
 from langchain_huggingface import HuggingFaceEndpointEmbeddings, HuggingFaceEndpoint
-from langchain_openai import ChatOpenAI
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from langchain.agents import create_react_agent, AgentExecutor
@@ -61,16 +58,18 @@ def setup_huggingface_endpoint():
         task="conversational",
         stop_sequences=[
             "<|im_end|>",
-            "{your_token}".format(your_token=os.getenv("STOP_TOKEN", "<|endoftext|>")),
+            "{your_token}".format(your_token=os.getenv("STOP_TOKEN", "<|end_of_text|>")),
         ],
     )
     return model
 
 def setup_portkey_integrated_model():
+    from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL
+    from langchain_openai import ChatOpenAI
     portkey_headers = createHeaders(
-    api_key=os.getenv("PORTKEY_API_KEY"),
-    custom_host=os.getenv("PORTKEY_CUSTOM_HOST"),
-    provider=os.getenv("PORTKEY_PROVIDER")
+        api_key=os.getenv("PORTKEY_API_KEY"),
+        custom_host=os.getenv("PORTKEY_CUSTOM_HOST"),
+        provider=os.getenv("PORTKEY_PROVIDER")
     )
     
     model = ChatOpenAI(
@@ -94,61 +93,17 @@ def setup_huggingface_embeddings():
 
 def load_prompt_and_system_ins():
     #prompt = hub.pull("hwchase17/react-chat")
-    prompt = PromptTemplate.from_template("""
-                                        InSightful is a bot developed by InfraCloud Technologies.
-
-                                        InSightful is used to assist technical communities online on platforms such as Slack, Reddit and Discord.
-
-                                        InSightful can answer questions from conversations amongst community members and can also search StackOverflow for technical questions.
-
-                                        InSightful can also conduct its own research on the web to find answers to questions.
-
-                                        InSightful is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. InSightful is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
-                                        
-                                        InSightful is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, InSightful is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.
-                                        
-                                        Overall, InSightful is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, InSightful is here to assist.
-                                        
-                                        TOOLS:
-                                        ------
-
-                                        InSightful has access to the following tools:
-
-                                        {tools}
-
-                                        To use a tool, please use the following format:
-
-                                        ```
-                                        Thought: Do I need to use a tool? Yes
-                                        Action: the action to take, should be one of [{tool_names}]
-                                        Action Input: the input to the action
-                                        Observation: the result of the action
-                                        ```
-                                        For the tavily_search_results_json tool, make sure the Action Input is a string derived from the new input.
-                                                                                
-                                        When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:
-
-                                        ```
-                                        Thought: Do I need to use a tool? No
-                                        Final Answer: [your response here]
-                                        ```
-
-                                        Begin!
-
-                                        Previous conversation history:
-                                        {chat_history}
-
-                                        New input: {input}
-                                        {agent_scratchpad}
-                                        """)
+    prompt = PromptTemplate.from_file("templates/prompt_template.tmpl")
     
     # Set up prompt template
     template = """
     Based on the retrieved context, respond with an accurate answer. Use the provided tools to support your response.
+
+    Be concise and always provide accurate, specific, and relevant information.
     """
 
     system_instructions = SystemMessage(
-        content=system_message_template,
+        content=template,
         metadata={"role": "system"},
     )
 
@@ -221,46 +176,28 @@ def query_docs(self, model, question, vector_store, prompt):
 def format_docs(docs):
     return "\n\n".join(doc.page_content for doc in docs)
 
-#def create_retriever(name, model, description, client, chroma_embedding_function, embedder):
-#    rag = RAG(llm=model, embeddings=embedder, collection_name="Slack", db_client=client)
-#    pages = rag.load_documents("spencer/software_slacks")
-#    chunks = rag.chunk_doc(pages)
-#    vector_store = rag.insert_embeddings(chunks, chroma_embedding_function, embedder)
-#    retriever = vector_store.as_retriever(
-#        search_type="similarity", search_kwargs={"k": 10}
-#    )
-#    info_retriever = create_retriever_tool(retriever, name, description)
-#    return info_retriever
-
-def create_reranker_retriever(name, model, description, client, chroma_embedding_function, embedder):
+def create_retriever(name, model, description, client, chroma_embedding_function, embedder):
     rag = RAG(llm=model, embeddings=embedder, collection_name="Slack", db_client=client)
-    pages = rag.load_documents("spencer/software_slacks", num_docs=100)
+    pages = rag.load_documents("spencer/software_slacks")
     chunks = rag.chunk_doc(pages)
     vector_store = rag.insert_embeddings(chunks, chroma_embedding_function, embedder)
-    compressor = TEIRerank(url="http://{host}:{port}".format(host=os.getenv("RERANKER_HOST", "localhost"), 
-                                                                    port=os.getenv("RERANKER_PORT", "8082")), 
-                                                                    top_n=10,
-                                                                    batch_size=16)
     retriever = vector_store.as_retriever(
-        search_type="similarity", search_kwargs={"k": 100}
-    )
-    compression_retriever = ContextualCompressionRetriever(
-        base_compressor=compressor, base_retriever=retriever
+        search_type="similarity", search_kwargs={"k": 10}
     )
-    info_retriever = create_retriever_tool(compression_retriever, name, description)
+    info_retriever = create_retriever_tool(retriever, name, description)
     return info_retriever
 
 def create_reranker_retriever(name, model, description, client, chroma_embedding_function, embedder):
     rag = RAG(llm=model, embeddings=embedder, collection_name="Slack", db_client=client)
-    pages = rag.load_documents("spencer/software_slacks", num_docs=250)
+    pages = rag.load_documents("spencer/software_slacks", num_docs=100)
     chunks = rag.chunk_doc(pages)
     vector_store = rag.insert_embeddings(chunks, chroma_embedding_function, embedder)
     compressor = TEIRerank(url="http://{host}:{port}".format(host=os.getenv("RERANKER_HOST", "localhost"), 
                                                                     port=os.getenv("RERANKER_PORT", "8082")), 
-                                                                    top_n=50,
+                                                                    top_n=10,
                                                                     batch_size=16)
     retriever = vector_store.as_retriever(
-        search_type="similarity", search_kwargs={"k": 10}
+        search_type="similarity", search_kwargs={"k": 100}
     )
     compression_retriever = ContextualCompressionRetriever(
         base_compressor=compressor, base_retriever=retriever
@@ -283,21 +220,32 @@ def setup_tools(_model, _client, _chroma_embedding_function, _embedder):
     #    chroma_embedding_function=_chroma_embedding_function,
     #    embedder=_embedder,
     #)
-    reranker_retriever = create_reranker_retriever(
-        name="slack_conversations_retriever",
-        model=_model,
-        description="Useful for when you need to answer from Slack conversations.",
-        client=_client,
-        chroma_embedding_function=_chroma_embedding_function,
-        embedder=_embedder,
-    )
+    if os.getenv("USE_RERANKER", "False") == "True":
+        retriever = create_reranker_retriever(
+            name="slack_conversations_retriever",
+            model=_model,
+            description="Useful for when you need to answer from Slack conversations.",
+            client=_client,
+            chroma_embedding_function=_chroma_embedding_function,
+            embedder=_embedder,
+        )
+    else:
+        retriever = create_retriever(
+            name="slack_conversations_retriever",
+            model=_model,
+            description="Useful for when you need to answer from Slack conversations.",
+            client=_client,
+            chroma_embedding_function=_chroma_embedding_function,
+            embedder=_embedder,
+        )
+
 
-    return [web_search_tool, stackexchange_tool, reranker_retriever]
+    return [web_search_tool, stackexchange_tool, retriever]
 
 @st.cache_resource
 def setup_agent(_model, _prompt, _client, _chroma_embedding_function, _embedder):
     tools = setup_tools(_model, _client, _chroma_embedding_function, _embedder)
-    agent = create_react_agent(llm=_model, prompt=_prompt, tools=tools)
+    agent = create_react_agent(llm=_model, prompt=_prompt, tools=tools,)
     agent_executor = AgentExecutor(
         agent=agent, verbose=True, tools=tools, handle_parsing_errors=True
     )
@@ -307,8 +255,10 @@ def main():
     client = setup_chroma_client()
     chroma_embedding_function = setup_chroma_embedding_function()
     prompt, system_instructions = load_prompt_and_system_ins()
-    #model = setup_huggingface_endpoint()
-    model = setup_portkey_integrated_model()
+    if os.getenv("ENABLE_PORTKEY", "False") == "True":
+        model = setup_portkey_integrated_model()
+    else:
+        model = setup_huggingface_endpoint()
     embedder = setup_huggingface_embeddings()
 
     agent_executor = setup_agent(
@@ -316,6 +266,7 @@ def main():
     )
 
     st.title("InSightful: Your AI Assistant for community questions")
+    st.text("Made with ❤️ by InfraCloud Technologies")
     st.markdown(
         """
     InSightful is an AI assistant that helps you with your questions.