Skip to content

Commit

Permalink
♻️ string flow based implementation of md2html
Browse files Browse the repository at this point in the history
- hide the temp file inside the pandoc conversion
- toc addtition as doc -> doc conversion
- goal simplify the logic as a filter flow
  • Loading branch information
vokimon committed Nov 8, 2024
1 parent 4248ce3 commit b5b5e78
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 51 deletions.
2 changes: 1 addition & 1 deletion TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

- [ ] El TOC del document del webforms no te titol (Taula de continguts)
- [ ] Target del TOC ha de ser també multi idioma (o insertem el target de toc a mà)
- [ ] Fix items in general-conditions
- [ ] Las listas del general conditions estan rotas la mayoria
- [ ] Provar el generat a webforms
- [ ] Links in a different window
- [ ] import general-conditions in different languages
Expand Down
68 changes: 38 additions & 30 deletions legaltexts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
import itertools
from consolemsg import warn, step, error
import difflib
from .toc_generator import generate_toc, add_links_to_toc
from .toc_generator import add_markdown_toc, add_links_to_toc
from .translate import tr
from typing_extensions import Annotated

help="""\
This CLI tool automates legaltext workflow
Expand Down Expand Up @@ -155,13 +156,18 @@ def md_to_html_fragment(markdown: str)->str:
Generates html fragmentf from markdown file
"""
import subprocess
subprocess.run([
'pandoc',
str(markdown_file),
'-t', 'html',
'-o', output_html,
'--metadata', 'pagetitle="CHANGE ME"',
])
from somutils.testutils import temp_path
with temp_path() as tmp:
markdown_file = tmp/f"input.md"
output_html = tmp/'output.html'
markdown_file.write_text(markdown)
subprocess.run([
'pandoc',
str(markdown_file),
'-t', 'html',
'-o', output_html,
])
return output_html.read_text()

app = typer.Typer(
help=help,
Expand Down Expand Up @@ -228,7 +234,7 @@ def generate(target: Annotated[str, typer.Argument()]=''):
)

def generate_web_pdf(master_path: Path, output_prefix: str):
"""Generates a set of deployable files"""
"""Generates a pdf for the website"""
document = master_path.name
output_dir.mkdir(exist_ok=True)
for markdown_file in master_path.glob('??.md'):
Expand All @@ -239,8 +245,7 @@ def generate_web_pdf(master_path: Path, output_prefix: str):
generate_pdf(markdown_file, 'pagedlegaltext.css', target)

def generate_webforms_html(master_path: Path, output_prefix: str):
"""Generates a set of deployable files"""
from somutils.testutils import temp_path
"""Generates an html fragment to be included in webforms LegalText view"""
document = master_path.name
output_dir.mkdir(exist_ok=True)
for markdown_file in master_path.glob('??.md'):
Expand All @@ -249,28 +254,31 @@ def generate_webforms_html(master_path: Path, output_prefix: str):
target = output_dir / output_template
step(f"Generating {target}")

step(f" Reading {markdown_file}...")
markdown_content = markdown_file.read_text()

step(f" Generating TOC")
# Inserta la tabla de content al inicio del archivo
toc = generate_toc(markdown_content, top_level = 2)
markdown_with_toc = markdown_content.replace(
"[TABLE]",
f"# {tr(lang, 'TOC_TITLE')}\n\n{toc}\n\n"
markdown_with_toc = add_markdown_toc(
markdown_content,
place_holder='[TABLE]',
title=tr(lang, 'TOC_TITLE'),
top_level=2,
)
with temp_path() as temp_dir:
toc_markdown_file = temp_dir/f"{lang}.md"
toc_markdown_file.write_text(markdown_with_toc)

step(f" Generating html...")
toc_html_file = temp_dir/f'withtoc.html'
md_to_html_fragment(toc_markdown_file, toc_html_file)
html = toc_html_file.read_text()
final_content = add_links_to_toc(
html,
text=f"{tr(lang, 'TOC_GO_TO_TOC')} ↑",
target="#tabla-de-contenidos",
)
target.write_text(final_content)

step(f" Generating html...")
html = md_to_html_fragment(markdown_with_toc)

step(f" Adding up-links...")
top="<span id='top'></span>\n\n"
final_content = top+add_links_to_toc(
html,
text=f"{tr(lang, 'TOC_GO_TO_TOC')} ↑",
target="#top",
)

step(f" Writing output")
target.write_text(final_content)



if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion legaltexts/i18n/ca.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
TOC_TITLE: Taula de continguts
TOC_GO_TO_TOC: Tornar a dalt
TOC_GO_TO_TOC: Pujar a l'índex
2 changes: 1 addition & 1 deletion legaltexts/i18n/es.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@

TOC_TITLE: Tabla de contenidos
TOC_GO_TO_TOC: Volver a arriba
TOC_GO_TO_TOC: Subir al índice
48 changes: 30 additions & 18 deletions legaltexts/toc_generator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re
from bs4 import BeautifulSoup


def add_links_to_toc(html, text, target="#toc"):
"""
>>> add_links_to_toc('<h2>Titol</h2>', text='Torna a dalt')
Expand All @@ -27,7 +26,7 @@ def add_links_to_toc(html, text, target="#toc"):
header.append(uplink)
return str(soup)

def generate_toc(markdown_text, top_level=None, bottom_level=None):
def generate_toc(markdown_text, top_level=None, bottom_level=None, title=None):
"""
>>> md = (
... "Ignored\\n"
Expand All @@ -45,8 +44,12 @@ def generate_toc(markdown_text, top_level=None, bottom_level=None):
>>> generate_toc(md, bottom_level=2)
'- [1. level 1](#level-1)\\n - [1.1. level 2](#level-2)'
>>> generate_toc(md, title="Index")
'# Index\\n\\n- [1. level 1](#level-1)\\n - [1.1. level 2](#level-2)\\n - [1.1.1. level 3](#level-3)'
"""
top_level = top_level or 1
toc_title = f"# {title}\n\n" if title else ''
toc = []
for linia in markdown_text.splitlines():
header = re.match(r"^(#{1,6})\s+((?:\d+[.])+)\s+(.*)", linia)
Expand All @@ -60,23 +63,32 @@ def generate_toc(markdown_text, top_level=None, bottom_level=None):
# Crea el link del titol
link = title.lower().replace(" ", "-").replace(".", "").replace(",", "")
toc.append(f"{' ' * (level - top_level)}- [{numbers} {title}](#{link})")
return "\n".join(toc)

def main():
# Lee el archivo Markdown
with open("es_tmp.md", "r", encoding="utf-8") as file:
content = file.read()

# Genera la tabla de content
toc = generate_toc(content)
return toc_title + "\n".join(toc)

def add_markdown_toc(
original_md: str,
title: str|None=None,
place_holder:str = '',
top_level: int = 0,
):
"""
>>> md = (
... "[TOC]\\n"
... "# 1. level 1\\n"
... )
# Inserta la tabla de content al inicio del archivo
content_toc = f"# TABLA DE CONTENIDOS\n\n{toc}\n\n"
mod_content = content.replace("[TABLE]", content_toc)
>>> add_markdown_toc(md)
'- [1. level 1](#level-1)\\n\\n[TOC]\\n# 1. level 1\\n'
>>> add_markdown_toc(md, place_holder='[TOC]')
'- [1. level 1](#level-1)\\n# 1. level 1\\n'
>>> add_markdown_toc(md, place_holder='[BAD]')
'- [1. level 1](#level-1)\\n\\n[TOC]\\n# 1. level 1\\n'
"""
toc = generate_toc(original_md, top_level = top_level, title=title)
if place_holder and place_holder in original_md:
return original_md.replace(place_holder, toc)
return '\n\n'.join([toc, original_md])

# Guarda el nuevo archivo con la TOC agregada
with open("es.md", "w", encoding="utf-8") as file:
file.write(mod_content)

if __name__ == "__main__":
main()
main()

0 comments on commit b5b5e78

Please sign in to comment.