Skip to content

Commit

Permalink
Merge pull request #17 from Odeyiany2/main
Browse files Browse the repository at this point in the history
Added File Compatibility Check and Large Document Processing Check
  • Loading branch information
Sammybams authored Oct 5, 2024
2 parents 30a6a55 + a88934b commit 7972de2
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 2 deletions.
27 changes: 26 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
llm = ChatOpenAI(temperature = 0.3, openai_api_key = os.getenv("API_KEY"), openai_api_base = os.getenv("ENDPOINT"), model_name="gpt-35-turbo", engine="Voicetask")

#sidebar configuration
#import the file check functions
from src.rag_functions import *

if 'uploaded_files' not in st.session_state:
st.session_state.uploaded_files = None

Expand All @@ -51,11 +54,33 @@
st.error("You can only upload a maximum of 2 documents.")
st.session_state.uploaded_files = None
else:
st.success(f"{len(st.session_state.uploaded_files)} file(s) uploaded.")
#set a valid upload to True
valid_file = True
for file in st.session_state.uploaded_files:
if allowed_files(file.name):
num_pages = file_check_num(file)
if num_pages > 50:
st.error(f"{file.name} exceeds the 50-page limit (has {num_pages} pages).")
valid_file = False
break
else:
st.error(f"{file.name} is not a valid file type.")
valid_file = False
break

if valid_file:
st.success(f"{len(st.session_state.uploaded_files)} file(s) uploaded successfully.")
else:
st.session_state.uploaded_files = None




#chat area
message = st.container()
if prompt:=st.chat_input("Enter your query"):
message.chat_message("user").write(prompt)




4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ pydantic==2.9.2
pydantic_core==2.23.4
pydeck==0.9.1
Pygments==2.18.0
PyPDF2==3.0.1
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-pptx==1.0.2
pytz==2024.2
PyYAML==6.0.2
referencing==0.35.1
Expand All @@ -65,4 +67,4 @@ tqdm==4.66.5
typing_extensions==4.12.2
tzdata==2024.2
urllib3==2.2.3
yarl==1.12.1
yarl==1.12.1
29 changes: 29 additions & 0 deletions src/rag_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#functions to check files compatibility and don't exceed the 50 page limit
from io import BytesIO
from PyPDF2 import PdfReader
from pptx import Presentation

allowed_files_list = ["pdf", "txt", "pptx"]
def allowed_files(filename):
'''
Returns True if the file type is in the allowed file list
'''
return "." in filename and filename.rsplit(".",1)[1].lower() in allowed_files_list

def file_check_num(uploaded_file):
'''
Returns the number of pages (for PDFs), slides (for PPTX), or lines (for TXT) in the file
'''
file_ext = uploaded_file.name.rsplit(".", 1)[1].lower() #extract the file extension only
if file_ext == "pdf":
pdf_bytes = BytesIO(uploaded_file.read())
pdf_reader = PdfReader(pdf_bytes)
return len(pdf_reader.pages)

elif file_ext == "pptx":
pptx_bytes = BytesIO(uploaded_file.read())
pptx = Presentation(pptx_bytes)
return len(pptx.slides)

elif file_ext == "txt":
return len(uploaded_file.read().decode("utf-8").splitlines())

0 comments on commit 7972de2

Please sign in to comment.