Skip to content

Commit

Permalink
updated xlsx creation and merging
Browse files Browse the repository at this point in the history
  • Loading branch information
Nick-prog committed Jul 2, 2024
1 parent cb2457e commit 64236cc
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 42 deletions.
10 changes: 5 additions & 5 deletions core/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ def find_consultant_agency(self, app_data: list, filename: str) -> list:
self.student_flag = 1
elif self.student_flag == 1 and str(items).startswith('Date of Birth'):
self.student_flag = 0
_list.append(items)
_list.append(items[:25])
_list.append(items[26:])
_list.append(filename)

return _list
Expand All @@ -196,12 +197,11 @@ def generate_xlsx_sheet(self, _list: list, filename: str) -> None:
_temp = []

for idx, items in enumerate(_list):
if len(items[0]) >= 3:
_temp.append([items[0][0], items[0][1], items[0][2], items[1]])

if len(items[0]) >= 4:
_temp.append([items[0][0], items[0][1], items[0][2], items[0][3], items[1]])

if len(_temp) != 0:
df = pd.DataFrame(_temp)
download_default = str(os.path.join(Path.home(), "Downloads"))
filepath = f'{download_default}/{filename}.xlsx'
df.to_excel(filepath, index=False, header= ["DOB", "Info", "Filename", "Name"])
df.to_excel(filepath, index=False, header= ["DOB", "Gender", "Info", "Filename", "Name"])
66 changes: 29 additions & 37 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pandas as pd
from tkinter.filedialog import askopenfilename
from tkinter.filedialog import askdirectory
from typing import Union
from pathlib import Path

def find_initial_dir() -> str:
Expand Down Expand Up @@ -74,11 +73,7 @@ def run(file_path: str, filename: str) -> None:
"""

try:
download_default = str(os.path.join(Path.home(), "Downloads"))
# tk.Tk().withdraw()
# folder = askdirectory(initialdir=download_default, title='Select Download Path')
folder = download_default

folder = str(os.path.join(Path.home(), "Downloads"))
p = core.Process(file_path)
spe_list = p.read_spe_file()

Expand All @@ -94,7 +89,7 @@ def run(file_path: str, filename: str) -> None:
r.capture_student_name()
r.capture_app_type()

# create_xlsx(translated_spe, filename)
create_xlsx(translated_spe, filename)

for idx, item in enumerate(translated_spe):
_list = r.fit_student_data(item)
Expand Down Expand Up @@ -122,38 +117,35 @@ def create_xlsx(translated_spe: list, filename: str)-> None:

def merge_xlsx()-> None:
# specifying the path to csv files
path = str(os.path.join(Path.home(), "Downloads"))

# csv files in the path
file_list = glob.glob(path + "/*.xlsx")

# list of excel files we want to merge.
# pd.read_excel(file_path) reads the excel
# data into pandas dataframe.
excl_list = []
input_folder = str(os.path.join(Path.home(), "Downloads"))
output_file = str(os.path.join(input_folder, 'total.xlsx'))

for file in file_list:
excl_list.append(pd.read_excel(os.path.abspath(file)))

# create a new dataframe to store the
# merged excel file.
excl_merged = pd.DataFrame()

for excl_file in excl_list:

# appends the data into the excl_merged
# dataframe.
excl_merged = excl_merged.append(
excl_file, ignore_index=True)

# exports the dataframe into excel file with
# specified name.
excl_merged.to_excel(f'{path}/total.xlsx', index=False)
# Create a list to hold the dataframes
dfs = []

# Iterate over all Excel files in the specified folder
for file_name in os.listdir(input_folder):
if file_name.endswith('.xlsx') or file_name.endswith('.xls'):
file_path = os.path.join(input_folder, file_name)
# Read all sheets from the Excel file
xls = pd.ExcelFile(file_path, engine='openpyxl')
for sheet_name in xls.sheet_names:
df = pd.read_excel(file_path, sheet_name=sheet_name)
dfs.append(df)

# Concatenate all dataframes into one
merged_df = pd.concat(dfs, ignore_index=True)

# Drop duplicate rows
merged_df = merged_df.drop_duplicates()

# Save the merged dataframe to a new Excel file
merged_df.to_excel(output_file, index=False, engine='openpyxl')

if __name__ == "__main__":

# find_spe_files() # Multiple .spe files
find_spe_file() # Singluar .spe file
print('Done')
# merge_xlsx()
# print('Done Done')
# find_spe_file() # Singluar .spe file
# print('Done')
merge_xlsx()
print('Done Done')

0 comments on commit 64236cc

Please sign in to comment.