♻️ string flow based implementation of md2html

- hide the temp file inside the pandoc conversion - toc addtition as doc -> doc conversion - goal simplify the logic as a filter flow
Som-Energia · Nov 8, 2024 · b5b5e78 · b5b5e78
1 parent 4248ce3
commit b5b5e78
Show file tree

Hide file tree

Showing 5 changed files with 71 additions and 51 deletions.
diff --git a/TODO.md b/TODO.md
@@ -2,7 +2,7 @@
 
 - [ ] El TOC del document del webforms no te titol (Taula de continguts)
 - [ ] Target del TOC ha de ser també multi idioma (o insertem el target de toc a mà)
-- [ ] Fix items in general-conditions
+- [ ] Las listas del general conditions estan rotas la mayoria
 - [ ] Provar el generat a webforms
 - [ ] Links in a different window
 - [ ] import general-conditions in different languages

diff --git a/legaltexts/cli.py b/legaltexts/cli.py
@@ -7,8 +7,9 @@
 import itertools
 from consolemsg import warn, step, error
 import difflib
-from .toc_generator import generate_toc, add_links_to_toc
+from .toc_generator import add_markdown_toc, add_links_to_toc
 from .translate import tr
+from typing_extensions import Annotated
 
 help="""\
 This CLI tool automates legaltext workflow
@@ -155,13 +156,18 @@ def md_to_html_fragment(markdown: str)->str:
     Generates html fragmentf from markdown file
     """
     import subprocess
-    subprocess.run([
-        'pandoc',
-        str(markdown_file),
-        '-t', 'html',
-        '-o', output_html,
-        '--metadata', 'pagetitle="CHANGE ME"',
-    ])
+    from somutils.testutils import temp_path
+    with temp_path() as tmp:
+        markdown_file = tmp/f"input.md"
+        output_html = tmp/'output.html'
+        markdown_file.write_text(markdown)
+        subprocess.run([
+            'pandoc',
+            str(markdown_file),
+            '-t', 'html',
+            '-o', output_html,
+        ])
+        return output_html.read_text()
 
 app = typer.Typer(
     help=help,
@@ -228,7 +234,7 @@ def generate(target: Annotated[str, typer.Argument()]=''):
         )
 
 def generate_web_pdf(master_path: Path, output_prefix: str):
-    """Generates a set of deployable files"""
+    """Generates a pdf for the website"""
     document = master_path.name
     output_dir.mkdir(exist_ok=True)
     for markdown_file in master_path.glob('??.md'):
@@ -239,8 +245,7 @@ def generate_web_pdf(master_path: Path, output_prefix: str):
         generate_pdf(markdown_file, 'pagedlegaltext.css', target)
 
 def generate_webforms_html(master_path: Path, output_prefix: str):
-    """Generates a set of deployable files"""
-    from somutils.testutils import temp_path
+    """Generates an html fragment to be included in webforms LegalText view"""
     document = master_path.name
     output_dir.mkdir(exist_ok=True)
     for markdown_file in master_path.glob('??.md'):
@@ -249,28 +254,31 @@ def generate_webforms_html(master_path: Path, output_prefix: str):
         target = output_dir / output_template
         step(f"Generating {target}")
 
+        step(f"  Reading {markdown_file}...")
         markdown_content = markdown_file.read_text()
+
         step(f"  Generating TOC")
-        # Inserta la tabla de content al inicio del archivo
-        toc = generate_toc(markdown_content, top_level = 2)
-        markdown_with_toc = markdown_content.replace(
-            "[TABLE]",
-            f"# {tr(lang, 'TOC_TITLE')}\n\n{toc}\n\n"
+        markdown_with_toc = add_markdown_toc(
+            markdown_content,
+            place_holder='[TABLE]',
+            title=tr(lang, 'TOC_TITLE'),
+            top_level=2,
         )
-        with temp_path() as temp_dir:
-            toc_markdown_file = temp_dir/f"{lang}.md"
-            toc_markdown_file.write_text(markdown_with_toc)
-
-            step(f"  Generating html...")
-            toc_html_file = temp_dir/f'withtoc.html'
-            md_to_html_fragment(toc_markdown_file, toc_html_file)
-            html = toc_html_file.read_text()
-            final_content = add_links_to_toc(
-                html,
-                text=f"{tr(lang, 'TOC_GO_TO_TOC')} ↑",
-                target="#tabla-de-contenidos",
-            )
-            target.write_text(final_content)
+
+        step(f"  Generating html...")
+        html = md_to_html_fragment(markdown_with_toc)
+
+        step(f"  Adding up-links...")
+        top="<span id='top'></span>\n\n"
+        final_content = top+add_links_to_toc(
+            html,
+            text=f"{tr(lang, 'TOC_GO_TO_TOC')} ↑",
+            target="#top",
+        )
+
+        step(f"  Writing output")
+        target.write_text(final_content)
+
 
 
 if __name__ == "__main__":

diff --git a/legaltexts/i18n/ca.yaml b/legaltexts/i18n/ca.yaml
@@ -1,2 +1,2 @@
 TOC_TITLE: Taula de continguts
-TOC_GO_TO_TOC: Tornar a dalt
+TOC_GO_TO_TOC: Pujar a l'índex
diff --git a/legaltexts/i18n/es.yaml b/legaltexts/i18n/es.yaml
@@ -1,3 +1,3 @@
 
 TOC_TITLE: Tabla de contenidos
-TOC_GO_TO_TOC: Volver a arriba
+TOC_GO_TO_TOC: Subir al índice
diff --git a/legaltexts/toc_generator.py b/legaltexts/toc_generator.py
@@ -1,7 +1,6 @@
 import re
 from bs4 import BeautifulSoup
 
-
 def add_links_to_toc(html, text, target="#toc"):
     """
     >>> add_links_to_toc('<h2>Titol</h2>', text='Torna a dalt')
@@ -27,7 +26,7 @@ def add_links_to_toc(html, text, target="#toc"):
         header.append(uplink)
     return str(soup)
 
-def generate_toc(markdown_text, top_level=None, bottom_level=None):
+def generate_toc(markdown_text, top_level=None, bottom_level=None, title=None):
     """
     >>> md = (
     ...     "Ignored\\n"
@@ -45,8 +44,12 @@ def generate_toc(markdown_text, top_level=None, bottom_level=None):
     >>> generate_toc(md, bottom_level=2)
     '- [1. level 1](#level-1)\\n  - [1.1. level 2](#level-2)'
 
+    >>> generate_toc(md, title="Index")
+    '# Index\\n\\n- [1. level 1](#level-1)\\n  - [1.1. level 2](#level-2)\\n    - [1.1.1. level 3](#level-3)'
+
     """
     top_level = top_level or 1
+    toc_title = f"# {title}\n\n" if title else ''
     toc = []
     for linia in markdown_text.splitlines():
         header = re.match(r"^(#{1,6})\s+((?:\d+[.])+)\s+(.*)", linia)
@@ -60,23 +63,32 @@ def generate_toc(markdown_text, top_level=None, bottom_level=None):
         # Crea el link del titol
         link = title.lower().replace(" ", "-").replace(".", "").replace(",", "")
         toc.append(f"{'  ' * (level - top_level)}- [{numbers} {title}](#{link})")
-    return "\n".join(toc)
-
-def main():
-    # Lee el archivo Markdown
-    with open("es_tmp.md", "r", encoding="utf-8") as file:
-        content = file.read()
-
-    # Genera la tabla de content
-    toc = generate_toc(content)
+    return toc_title + "\n".join(toc)
+
+def add_markdown_toc(
+    original_md: str,
+    title: str|None=None,
+    place_holder:str = '',
+    top_level: int = 0,
+):
+    """
+    >>> md = (
+    ...     "[TOC]\\n"
+    ...     "# 1. level 1\\n"
+    ...     )
 
-    # Inserta la tabla de content al inicio del archivo
-    content_toc = f"# TABLA DE CONTENIDOS\n\n{toc}\n\n"
-    mod_content = content.replace("[TABLE]", content_toc)
+    >>> add_markdown_toc(md)
+    '- [1. level 1](#level-1)\\n\\n[TOC]\\n# 1. level 1\\n'
+    >>> add_markdown_toc(md, place_holder='[TOC]')
+    '- [1. level 1](#level-1)\\n# 1. level 1\\n'
+    >>> add_markdown_toc(md, place_holder='[BAD]')
+    '- [1. level 1](#level-1)\\n\\n[TOC]\\n# 1. level 1\\n'
+    """
+    toc = generate_toc(original_md, top_level = top_level, title=title)
+    if place_holder and place_holder in original_md:
+        return original_md.replace(place_holder, toc)
+    return '\n\n'.join([toc, original_md])
 
-    # Guarda el nuevo archivo con la TOC agregada
-    with open("es.md", "w", encoding="utf-8") as file:
-        file.write(mod_content)
 
 if __name__ == "__main__":
-    main()
+    main()