Skip to content

Commit

Permalink
utils/html: Provide full control of allowed HTML elements
Browse files Browse the repository at this point in the history
- Added new configuration option "strictly-allowed-html-elements" to specify only allowed HTML tags in the generated output.
- Allowed "mark" and "u" elements for "highlight" and "underline" Markup extensions.
- Updated "allowed-elements" in configuration files to include "tr".

Fixes #751
  • Loading branch information
pkvach committed May 26, 2024
1 parent 6f3874c commit 863c4b7
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 20 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Bugfixes & Improvements
- Make 'text' field in 'comments' table NOT NULL and handling data migration (`#1019`_, pkvach)
- Python 3.12 support (`#1015`_, ix5)
- Disable Postbox submit button on click, enable after response (`#993`_, pkvach)
- Provide full control of allowed HTML elements via the configuration file (`#1007`_, pkvach)

.. _#951: https://github.com/posativ/isso/pull/951
.. _#967: https://github.com/posativ/isso/pull/967
Expand All @@ -64,6 +65,7 @@ Bugfixes & Improvements
.. _#1019: https://github.com/isso-comments/isso/pull/1019
.. _#1015: https://github.com/isso-comments/isso/pull/1015
.. _#993: https://github.com/isso-comments/isso/pull/993
.. _#1007: https://github.com/isso-comments/isso/pull/1007

0.13.1.dev0 (2023-02-05)
------------------------
Expand Down
1 change: 1 addition & 0 deletions contrib/isso-dev.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ reply-to-self = true
options = autolink, fenced-code, no-intra-emphasis, strikethrough, superscript
flags =
allowed-elements =
strictly-allowed-html-elements =
allowed-attributes =

[hash]
Expand Down
15 changes: 12 additions & 3 deletions docs/docs/reference/server-config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ allowed-elements

By default, only ``a``, ``blockquote``, ``br``, ``code``, ``del``, ``em``,
``h1``, ``h2``, ``h3``, ``h4``, ``h5``, ``h6``, ``hr``, ``ins``, ``li``,
``ol``, ``p``, ``pre``, ``strong``, ``table``, ``tbody``, ``td``, ``th``,
``ol``, ``p``, ``pre``, ``strong``, ``table``, ``tbody``, ``tr``, ``td``, ``th``,
``thead`` and ``ul`` are allowed.

For a more detailed explanation, see :doc:`/docs/reference/markdown-config`.
Expand All @@ -444,11 +444,20 @@ allowed-elements
mean that ``br, code, del, ...`` and all other default allowed tags are
still allowed. You can only add *additional* elements here.

It is planned to change this behavior, see
`this issue <https://github.com/isso-comments/isso/issues/751>`_.
To specify a list of *only* allowed elements, use the
``strictly-allowed-html-elements`` option instead.

Default: (empty)

strictly-allowed-html-elements

**Only** allow the specified HTML tags in the generated output, comma-separated.
If this option is set, the ``allowed-elements`` option is ignored.

Default: (empty)

.. versionadded:: 0.13.1

allowed-attributes
**Additional** HTML attributes (independent from elements) to allow in the
generated output, comma-separated.
Expand Down
6 changes: 5 additions & 1 deletion isso/isso.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,13 @@ flags =

# Additional HTML tags to allow in the generated output, comma-separated. By
# default, only a, blockquote, br, code, del, em, h1, h2, h3, h4, h5, h6, hr,
# ins, li, ol, p, pre, strong, table, tbody, td, th, thead and ul are allowed.
# ins, li, ol, p, pre, strong, table, tbody, tr, td, th, thead and ul are allowed.
allowed-elements =

# Only allow the specified HTML tags in the generated output, comma-separated.
# If this option is set, the "allowed-elements" option is ignored.
strictly-allowed-html-elements =

# Additional HTML attributes (independent from elements) to allow in the
# generated output, comma-separated. By default, only align and href are
# allowed.
Expand Down
26 changes: 21 additions & 5 deletions isso/tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_github_flavoured_markdown(self):
</code></pre>""")

def test_sanitizer(self):
sanitizer = html.Sanitizer(elements=[], attributes=[])
sanitizer = html.Sanitizer(elements=["p", "a", "code"], attributes=["href"])
examples = [
('Look: <img src="..." />', 'Look: '),
('<a href="http://example.org/">Ha</a>',
Expand Down Expand Up @@ -94,23 +94,39 @@ def test_render(self):
"markup": {
"options": "autolink",
"flags": "",
"allowed-elements": "",
"allowed-attributes": ""
"allowed-elements": "a, p",
"allowed-attributes": "href",
"strictly-allowed-html-elements": ""
}
})
renderer = html.Markup(conf.section("markup")).render
self.assertIn(renderer("http://example.org/ and sms:+1234567890"),
['<p><a href="http://example.org/" rel="nofollow noopener">http://example.org/</a> and sms:+1234567890</p>',
'<p><a rel="nofollow noopener" href="http://example.org/">http://example.org/</a> and sms:+1234567890</p>'])

def test_render_with_strictly_allowed_elements(self):
conf = config.new({
"markup": {
"options": "autolink",
"flags": "",
"allowed-elements": "a, p",
"strictly-allowed-html-elements": "p",
"allowed-attributes": "href"
}
})
renderer = html.Markup(conf.section("markup")).render
self.assertEqual(renderer("http://example.org/ and sms:+1234567890"),
'<p>http://example.org/ and sms:+1234567890</p>')

def test_sanitized_render_extensions(self):
"""Options should be normalized from both dashed-case or snake_case (legacy)"""
conf = config.new({
"markup": {
"options": "no_intra_emphasis", # Deliberately snake_case
"flags": "",
"allowed-elements": "",
"allowed-attributes": ""
"allowed-elements": "p",
"allowed-attributes": "",
"strictly-allowed-html-elements": ""
}
})
renderer = html.Markup(conf.section("markup")).render
Expand Down
39 changes: 28 additions & 11 deletions isso/utils/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,7 @@ def allow_attribute_class(tag, name, value):
return name == "class" and bool(Sanitizer.code_language_pattern.match(value))

def __init__(self, elements, attributes):
# attributes found in Sundown's HTML serializer [1]
# - except for <img> tag, because images are not generated anyways.
# - sub and sup added
#
# [1] https://github.com/vmg/sundown/blob/master/html/html.c
self.elements = ["a", "p", "hr", "br", "ol", "ul", "li",
"pre", "code", "blockquote",
"del", "ins", "strong", "em",
"h1", "h2", "h3", "h4", "h5", "h6", "sub", "sup",
"table", "thead", "tbody", "th", "td"] + elements
self.elements = elements

# allowed attributes for tags
self.attributes = {
Expand Down Expand Up @@ -108,12 +99,38 @@ def __init__(self, conf):
parser = Markdown(extensions=self.extensions,
flags=self.flags)
# Filter out empty strings:
allowed_elements = [x for x in conf.getlist("allowed-elements") if x]
strictly_allowed_html_elements = [x for x in conf.getlist("strictly-allowed-html-elements") if x]
allowed_attributes = [x for x in conf.getlist("allowed-attributes") if x]

# if "strictly-allowed-html-elements" option is set, use it instead of "allowed-elements"
if strictly_allowed_html_elements:
allowed_elements = strictly_allowed_html_elements
else:
allowed_elements = [x for x in conf.getlist("allowed-elements") if x]

# attributes found in Sundown's HTML serializer [1]
# - except for <img> tag, because images are not generated anyways.
# - sub and sup added
#
# [1] https://github.com/vmg/sundown/blob/master/html/html.c
allowed_elements = ["a", "p", "hr", "br", "ol", "ul", "li",
"pre", "code", "blockquote",
"del", "ins", "strong", "em",
"h1", "h2", "h3", "h4", "h5", "h6", "sub", "sup",
"table", "thead", "tbody", "tr", "th", "td"] + allowed_elements

# If images are allowed, source element should be allowed as well
if 'img' in allowed_elements and 'src' not in allowed_attributes:
allowed_attributes.append('src')

# If 'highlight' extension is enabled, allow 'mark' element
if 'highlight' in self.extensions and 'mark' not in allowed_elements:
allowed_elements.append('mark')

# If 'underline' extension is enabled, allow 'u' element
if 'underline' in self.extensions and 'u' not in allowed_elements:
allowed_elements.append('u')

sanitizer = Sanitizer(allowed_elements, allowed_attributes)

self._render = lambda text: sanitizer.sanitize(parser(text))
Expand Down

0 comments on commit 863c4b7

Please sign in to comment.