Skip to content

Commit

Permalink
🐛 Fix spaces in \begin, \end and \operatorname (#393)
Browse files Browse the repository at this point in the history
* 🐛 Fix spaces in \begin, \end and \operatorname

* Bump version
  • Loading branch information
roniemartinez authored May 6, 2023
1 parent 55944d0 commit dbecc53
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 13 deletions.
28 changes: 16 additions & 12 deletions latex2mathml/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,21 @@

PATTERN = re.compile(
rf"""
(%[^\n]+) | # comment
(a-zA-Z) | # letter
([_^])(\d) | # number succeeding an underscore or a caret
(-?\d+(?:\.\d+)?\s*(?:{'|'.join(UNITS)})) | # dimension
(\d+(?:\.\d+)?) | # integer/decimal
(\.\d*) | # dot (.) or decimal can start with just a dot
(\\[\\\[\]{{}}\s!,:>;|_%#$&]) | # escaped characters
(\\(?:begin|end|operatorname){{[a-zA-Z]+\*?}}) | # begin, end or operatorname
(%[^\n]+) | # comment
(a-zA-Z) | # letter
([_^])(\d) | # number succeeding an underscore or a caret
(-?\d+(?:\.\d+)?\s*(?:{'|'.join(UNITS)})) | # dimension
(\d+(?:\.\d+)?) | # integer/decimal
(\.\d*) | # dot (.) or decimal can start with just a dot
(\\[\\\[\]{{}}\s!,:>;|_%#$&]) | # escaped characters
(\\(?:begin|end)\s*{{[a-zA-Z]+\*?}}) | # begin or end
(\\operatorname\s*{{[a-zA-Z\s*]+\*?\s*}}) | # operatorname
# color, fbox, href, hbox, mbox, style, text, textbf, textit, textrm, textsf, texttt
(\\(?:color|fbox|hbox|href|mbox|style|text|textbf|textit|textrm|textsf|texttt))\s*{{([^}}]*)}} |
(\\[cdt]?frac)\s*([.\d])\s*([.\d])? | # fractions
(\\math[a-z]+)({{)([a-zA-Z])(}}) | # commands starting with math
(\\[a-zA-Z]+) | # other commands
(\S) # non-space character
(\\[cdt]?frac)\s*([.\d])\s*([.\d])? | # fractions
(\\math[a-z]+)({{)([a-zA-Z])(}}) | # commands starting with math
(\\[a-zA-Z]+) | # other commands
(\S) # non-space character
""",
re.VERBOSE,
)
Expand All @@ -48,4 +49,7 @@ def tokenize(latex_string: str, skip_comments: bool = True) -> Iterator[str]:
if captured.endswith(UNITS):
yield captured.replace(" ", "")
continue
if captured.startswith((commands.BEGIN, commands.END, commands.OPERATORNAME)):
yield "".join(captured.split(" "))
continue
yield captured
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "latex2mathml"
version = "3.75.3"
version = "3.75.4"
repository = "https://github.com/roniemartinez/latex2mathml"
description = "Pure Python library for LaTeX to MathML conversion"
authors = ["Ronie Martinez <[email protected]>"]
Expand Down
2 changes: 2 additions & 0 deletions tests/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,8 @@
pytest.param(
r"\frac 1 2 3 + \frac 123", [r"\frac", "1", "2", "3", "+", r"\frac", "1", "2", "3"], id="issue-386"
),
pytest.param(r"\begin {cases} \end {cases}", [r"\begin{cases}", r"\end{cases}"], id="issue-391"),
pytest.param(r"\operatorname { s n } x", [r"\operatorname{sn}", "x"], id="issue-391-operatorname"),
],
)
def test_tokenize(latex: str, expected: list) -> None:
Expand Down

0 comments on commit dbecc53

Please sign in to comment.