Skip to content

Commit

Permalink
Minor code hygiene
Browse files Browse the repository at this point in the history
  • Loading branch information
dgunning committed Nov 19, 2024
1 parent 00425bf commit cfbea9b
Showing 1 changed file with 0 additions and 38 deletions.
38 changes: 0 additions & 38 deletions edgar/html2markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,41 +348,6 @@ def _process_table_placeholder(self) -> str:
"""Return a placeholder for tables that will be replaced later"""
return f"[[TABLE_{id(self)}]]"

def _process_table(self, element: Tag, style: StyleInfo) -> DocumentNode:
"""Process table elements into structured data"""
headers = []
rows = []
col_alignments = []

# Process table headers
thead = element.find('thead')
if thead:
header_row = thead.find('tr')
if header_row:
headers = self._process_table_row(header_row)
# Determine column alignments from header cells
col_alignments = self._get_column_alignments(header_row)

# Process table body
tbody = element.find('tbody') or element
for row in tbody.find_all('tr'):
if row.parent == thead: # Skip if this row is in thead
continue
processed_row = self._process_table_row(row)
if processed_row: # Only add non-empty rows
rows.append(processed_row)

# Create table node with structured data
return DocumentNode(
type='table',
content={
'headers': headers,
'rows': rows,
'alignments': col_alignments,
'style': style
},
style=style
)

def _process_table_row(self, row: Tag) -> List[str]:
"""Process a table row, handling both th and td elements"""
Expand Down Expand Up @@ -592,9 +557,6 @@ def _is_heading(self, element: Tag, style: StyleInfo) -> bool:
# Check font weight
is_bold = style.font_weight in ['bold', '700', '800', '900']

# Check text alignment
is_centered = style.text_align == 'center'

# Check content length
text = element.get_text(strip=True)
is_short = len(text) < 200 # Arbitrary threshold
Expand Down

0 comments on commit cfbea9b

Please sign in to comment.