diff --git a/CHANGES.rst b/CHANGES.rst index 21d71107..7447d068 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,8 +8,10 @@ New Features ^^^^^^^^^^^^ - Add Catalan localisation (`#966`_, welpo) +- Make for syntax highlighting (`#998`_, pkvach) .. _#966: https://github.com/posativ/isso/pull/966 +.. _#998: https://github.com/isso-comments/isso/pull/998 Breaking Changes ^^^^^^^^^^^^^^^^ diff --git a/isso/tests/test_html.py b/isso/tests/test_html.py index 5b59b39a..5d5f87a3 100644 --- a/isso/tests/test_html.py +++ b/isso/tests/test_html.py @@ -70,6 +70,8 @@ def test_sanitizer(self): ('ld.so', 'ld.so'), ('/usr/lib/x86_64-linux-gnu/libc/memcpy-preload.so', '/usr/lib/x86_64-linux-gnu/libc/memcpy-preload.so'), ('

Test

', '

Test

'), + ('Test', 'Test'), + ('Test', 'Test'), ('', 'alert("Onoe")')] for (input, expected) in examples: @@ -122,7 +124,7 @@ def test_code_blocks(self): convert = html.Markdown(extensions=('fenced-code',)) examples = [ ("```\nThis is a code-fence. \n```", "

This is a code-fence. <hello>\n

"), - ("```c++\nThis is a code-fence. \n```", "

This is a code-fence. <hello>\n

"), + ("```cpp\nThis is a code-fence. \n```", "

This is a code-fence. <hello>\n

"), (" This is a four-character indent. ", "

This is a four-character indent. <hello>\n

")] for (input, expected) in examples: diff --git a/isso/utils/html.py b/isso/utils/html.py index b7b895d2..c1aafad1 100644 --- a/isso/utils/html.py +++ b/isso/utils/html.py @@ -1,6 +1,7 @@ # -*- encoding: utf-8 -*- import html +import re import bleach import misaka @@ -8,6 +9,13 @@ class Sanitizer(object): + # pattern to match a valid class attribute for code tags + code_language_pattern = re.compile(r"^language-[a-zA-Z0-9]{1,20}$") + + @staticmethod + def allow_attribute_class(tag, name, value): + return name == "class" and bool(Sanitizer.code_language_pattern.match(value)) + def __init__(self, elements, attributes): # attributes found in Sundown's HTML serializer [1] # - except for tag, because images are not generated anyways. @@ -20,8 +28,13 @@ def __init__(self, elements, attributes): "h1", "h2", "h3", "h4", "h5", "h6", "sub", "sup", "table", "thead", "tbody", "th", "td"] + elements - # href for and align for - self.attributes = ["align", "href"] + attributes + # allowed attributes for tags + self.attributes = { + "table": ["align"], + "a": ["href"], + "code": Sanitizer.allow_attribute_class, + "*": attributes + } def sanitize(self, text): clean_html = bleach.clean(text, tags=self.elements, attributes=self.attributes, strip=True) @@ -73,11 +86,11 @@ class Unofficial(misaka.HtmlRenderer): For instance, fenced code blocks (~~~ or ```) are just wrapped in which does not preserve line breaks. If a language is given, it is added - to , compatible with Highlight.js. + to , compatible with Highlight.js. """ def blockcode(self, text, lang): - lang = ' class="{0}"'.format(html.escape(lang)) if lang else '' + lang = ' class="language-{0}"'.format(html.escape(lang)) if lang else '' return "
{0}
\n".format(html.escape(text, False), lang)