Skip to content

Commit

Permalink
Merge pull request #125 from ComPlat/arbin-excel-reader
Browse files Browse the repository at this point in the history
Arbin excel reader
  • Loading branch information
StarmanMartin authored Nov 19, 2024
2 parents b9a9318 + e930729 commit 6950a9e
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,5 @@ __pycache__/
test_manager/test_readers.py
/test_manager/test_profiles.py
/test_files/

logs/
40 changes: 35 additions & 5 deletions converter_app/readers/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import openpyxl

from converter_app.readers.helper.reader import Readers
from converter_app.readers.helper.base import Reader
from converter_app.readers.helper.base import Reader, Table

logger = logging.getLogger(__name__)

Expand All @@ -19,6 +19,8 @@ class ExcelReader(Reader):
def __init__(self, file):
super().__init__(file)
self.wb = None
self._table_row_meta = Table()
self._table_col_meta = Table()

def check(self):
"""
Expand All @@ -40,29 +42,35 @@ def check(self):

def prepare_tables(self):
tables = []

# A, B, C
# C , C ,C ,C
# loop over worksheets
for ws in self.wb:
self.append_table(tables)
keys = []

previous_shape = None
for row in ws.values:
shape = self.get_shape(row)

if 's' in shape:
# there is a string in this row, this cant be the table

self._set_col_metadata(row)
if tables[-1]['rows']:
# if a table is already there, this must be a new header
self.append_table(tables)
keys = []

tables[-1]['header'].append('\t'.join([str(cell) for cell in row]))
self._set_row_metadata(keys, row, True)

elif 'f' in shape:
if tables[-1]['rows'] and shape != previous_shape:
# start a new table if the shape has changed
self.append_table(tables)

keys = []
else:
self._set_row_metadata(keys, row, False)
# this row has floats but no strings, this is the "real" table
values = [row[i] for i, value in enumerate(shape) if value == 'f']
tables[-1]['rows'].append(values)
Expand All @@ -73,7 +81,29 @@ def prepare_tables(self):

# store shape and row for the next iteration
previous_shape = shape

tables.append(self._table_row_meta)
tables.append(self._table_col_meta)
return tables

def _set_row_metadata(self, keys, row, set_keys):
"""Sets the metadata for the table."""
if len(keys) == 0:
if set_keys:
for cell in row:
keys.append(str(cell))
else:
for cell in row:
key = keys.pop(0)
if set_keys:
keys.append(str(cell))
if key != 'None':
self._table_row_meta['metadata'].add_unique(key, str(cell))

def _set_col_metadata(self, row):
row = [x for x in row if x != 'None']
shape = self.get_shape(row)
if len(shape) < 4 and shape[0] == 's':
for val in row[1:]:
self._table_col_meta['metadata'].add_unique(row[0], str(val))

Readers.instance().register(ExcelReader)
3 changes: 3 additions & 0 deletions converter_app/readers/helper/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ def get_shape(self, row) -> list:
if cell is None:
shape.append(None)
else:
if isinstance(cell, datetime):
shape.append('f')
continue
cell = str(cell).strip()
if cell in self.empty_values:
shape.append('')
Expand Down
7 changes: 4 additions & 3 deletions test_manager/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ def generate_test(src_path, file, res_path, _unused):

test_file.write(f'\n\n\ndef {test_name}():'
f'\n global all_reader'
f'\n (b,a,c)=compare_reader_result(\'{src_path}\',\'{res_path}\',\'{file}\')'
f'\n (b,a,c)=compare_reader_result(r\'{src_path}\',r\'{res_path}\',r\'{file}\')'
f'\n if not c:'
f'\n assert a == {{}}'
f'\n return'
f'\n all_reader.add(a[\'metadata\'][\'reader\'])'
f'\n assert a[\'tables\'] == b[\'tables\']'
f'\n compare_tables(a[\'tables\'], b[\'tables\'])'
f'\n assert a[\'metadata\'][\'extension\'] == b[\'metadata\'][\'extension\']'
f'\n assert a[\'metadata\'][\'reader\'] == b[\'metadata\'][\'reader\']'
f'\n assert a[\'metadata\'][\'mime_type\'] == b[\'metadata\'][\'mime_type\']')
Expand All @@ -95,7 +95,8 @@ def generate_test(src_path, file, res_path, _unused):
TEST_IDX = 0
TEST_DICT = {}
with open(TEST_FILE, 'w+', encoding='utf8') as fp:
fp.write("from .utils_test import compare_reader_result\n"
fp.write("import pytest\n"
"from .utils_test import compare_reader_result, compare_tables\n"
"from converter_app.readers import READERS as registry\n"
"\nall_reader = set()\n")
basic_walk(generate_test)
Expand Down
2 changes: 1 addition & 1 deletion test_manager/profile_test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def _generate_profile_tests(src_path, file, _unused, res_path):
with open(TEST_FILE, 'a', encoding='utf8') as test_file:
test_file.write(f'\n\n\ndef {test_name}():'
f'\n global all_reader'
f'\n (a, b)=compare_profile_result(\'{src_path}\',\'{res_path}\',\'{file}\')'
f'\n (a, b)=compare_profile_result(r\'{src_path}\',r\'{res_path}\',r\'{file}\')'
f'\n assert len(a) == len(b)'
f'\n if len(a) > 0:'
f'\n all_reader.add(a[0])'
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions test_manager/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,25 @@

FLASK_APP = None

def _compare_row_val(val_res:str, val_exp:str):
assert val_res[:3] == val_exp[:3]

def _compare_row(row_res, row_exp):
for idx, val_res in enumerate([str(x) for x in row_res]):
_compare_row_val(val_res, str(row_exp[idx]))

def compare_tables(tables_res, tables_exp):
assert len(tables_res) == len(tables_exp)
for idx, table_res in enumerate(tables_res):
assert '\n'.join(table_res['header']) == '\n'.join(tables_exp[idx]['header'])
for key, value_res in table_res['metadata'].items():
assert value_res == tables_exp[idx]['metadata'][key]
assert table_res['columns'] == tables_exp[idx]['columns']
assert len(table_res['rows']) == len(tables_exp[idx]['rows'])
for row_idx, row_res in enumerate(table_res['rows']):
_compare_row(row_res,tables_exp[idx]['rows'][row_idx])


def compare_reader_result(src_path, res_path, file):
expected_result = {}
try:
Expand Down

0 comments on commit 6950a9e

Please sign in to comment.