Skip to content

Commit

Permalink
fix(pre): define reduced data sheet list
Browse files Browse the repository at this point in the history
  • Loading branch information
JGrothoff committed Nov 20, 2024
1 parent f787a3c commit 832fec7
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/pdf2aas/preprocessor/pdf/pdf2html_ex.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def reduce_datasheet( # noqa: C901
if reduced_datasheet_match is None:
return ""
reduced_datasheet = reduced_datasheet_match.group(1)
reduced_datasheet_list = []
if level >= ReductionLevel.PAGES:
logger.debug("Reducing datasheet to ReductionLevel.PAGES")
reduced_datasheet_list = re.findall(r'<div id="pf.*', reduced_datasheet)
Expand All @@ -175,9 +176,10 @@ def reduce_datasheet( # noqa: C901
logger.debug("Reducing datasheet to ReductionLevel.TEXT")
for idx, page in enumerate(reduced_datasheet_list):
reduced_datasheet_list[idx] = re.sub(r"<div.*?>|</div>", "", page)
result = reduced_datasheet_list if level >= ReductionLevel.DIVS else reduced_datasheet
logger.info("Reduced datasheet to ReductionLevel %s", level.name)
logger.debug("reduced datasheet:\n%s", str(reduced_datasheet_list))
return reduced_datasheet_list
logger.debug("Reduced datasheet text:\n%s", str(result))
return result

def clear_temp_dir(self) -> None:
"""Clear the temporary directory used for storing intermediate HTML files."""
Expand Down

0 comments on commit 832fec7

Please sign in to comment.