diff --git a/README.rst b/README.rst index f1d99d5e..04ec5827 100644 --- a/README.rst +++ b/README.rst @@ -90,6 +90,19 @@ to ``True``: KEEP_COMMENTS_ON_MINIFYING = True +Conservative whitespace minifying +--------------------------------- + +By default the minifier will try to intelligently remove whitespace and leave +spaces only as needed for inline text rendering. Sometimes it may be necessary +to not completely remove whitespace but only reduce spaces to a single space. +If you set ``CONSERVATIVE_WHITESPACE_ON_MINIFYING`` to ``False`` then +whitespace is always reduced to a single space and never completely removed. + +.. code-block:: python + + CONSERVATIVE_WHITESPACE_ON_MINIFYING = True + Using the decorator =================== diff --git a/htmlmin/minify.py b/htmlmin/minify.py index 63386e06..f3b0a7b0 100644 --- a/htmlmin/minify.py +++ b/htmlmin/minify.py @@ -22,6 +22,13 @@ except: pass # working in non Django projects +# if `True`, reduce whitespace to a single space only +CONSERVATIVE_WHITESPACE = False +try: + CONSERVATIVE_WHITESPACE = getattr(settings, "CONSERVATIVE_WHITESPACE_ON_MINIFYING", False) +except: + pass # working in non Django projects + # element list coming from # https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/HTML5/HTML5_element_list # combining text-level semantics & edits @@ -84,27 +91,28 @@ def space_minify(soup, ignore_comments=True): if not is_prestr(soup): # reduce multiple space characters new_string = re_multi_space.sub(' ', soup.string) - (prev_flow, next_flow) = is_inflow(soup) - # if the string is in a flow of text, don't remove lone - # spaces - if prev_flow and next_flow: - new_string = re_only_space.sub(' ', new_string) - # else, remove spaces, they are between grouping, section, - # metadata or other types of block - else: - new_string = re_only_space.sub('', new_string) - # if the previous element is not text then remove leading - # spaces - if prev_flow: - new_string = re_start_space.sub(' ', new_string) - else: - new_string = re_start_space.sub('', new_string) - # if the previous element is not text then remove leading - # spaces - if next_flow: - new_string = re_end_space.sub(' ', new_string) - else: - new_string = re_end_space.sub('', new_string) + if not CONSERVATIVE_WHITESPACE: + (prev_flow, next_flow) = is_inflow(soup) + # if the string is in a flow of text, don't remove lone + # spaces + if prev_flow and next_flow: + new_string = re_only_space.sub(' ', new_string) + # else, remove spaces, they are between grouping, section, + # metadata or other types of block + else: + new_string = re_only_space.sub('', new_string) + # if the previous element is not text then remove leading + # spaces + if prev_flow: + new_string = re_start_space.sub(' ', new_string) + else: + new_string = re_start_space.sub('', new_string) + # if the previous element is not text then remove leading + # spaces + if next_flow: + new_string = re_end_space.sub(' ', new_string) + else: + new_string = re_end_space.sub('', new_string) # bs4 sometimes add a lone newline in the body new_string = re_single_nl.sub('', new_string) soup.string.replace_with(new_string) diff --git a/htmlmin/tests/resources/inline_whitespace.html b/htmlmin/tests/resources/inline_whitespace.html new file mode 100644 index 00000000..180d2667 --- /dev/null +++ b/htmlmin/tests/resources/inline_whitespace.html @@ -0,0 +1,26 @@ + + + Inline Whitespace + + +
+

Main Title TM

+
+

Abstract

+

+ a + b + c +

+
+
+

Premise

+

a space

+

no space

+

+ This is a link. +

+
+
+ + diff --git a/htmlmin/tests/resources/inline_whitespace_minified.html b/htmlmin/tests/resources/inline_whitespace_minified.html new file mode 100644 index 00000000..24a24e5b --- /dev/null +++ b/htmlmin/tests/resources/inline_whitespace_minified.html @@ -0,0 +1 @@ +Inline Whitespace

Main TitleTM

Abstract

a b c

Premise

a space

nospace

This isa link.

\ No newline at end of file diff --git a/htmlmin/tests/resources/inline_whitespace_minified_conservative.html b/htmlmin/tests/resources/inline_whitespace_minified_conservative.html new file mode 100644 index 00000000..1955a253 --- /dev/null +++ b/htmlmin/tests/resources/inline_whitespace_minified_conservative.html @@ -0,0 +1 @@ + Inline Whitespace

Main Title TM

Abstract

a b c

Premise

a space

no space

This is a link.

\ No newline at end of file diff --git a/htmlmin/tests/test_minify.py b/htmlmin/tests/test_minify.py index 3887a5b1..78b284df 100644 --- a/htmlmin/tests/test_minify.py +++ b/htmlmin/tests/test_minify.py @@ -21,9 +21,10 @@ def resources_path(*paths): class TestMinify(unittest.TestCase): - def _normal_and_minified(self, filename): + def _normal_and_minified(self, filename, variant=None): html_file = resources_path('%s.html' % filename) - html_file_minified = resources_path('%s_minified.html' % filename) + minified_postfix = '_%s' % variant if variant else '' + html_file_minified = resources_path('%s_minified%s.html' % (filename, minified_postfix)) html = open(html_file).read() f_minified = codecs.open(html_file_minified, encoding='utf-8') @@ -173,3 +174,14 @@ def test_non_ascii_in_excluded_element(self): 'non_ascii_in_excluded_element' ) self.assertEqual(minified, html_minify(html)) + + def test_non_conservative_whitespace(self): + self.maxDiff = None + html, minified = self._normal_and_minified('inline_whitespace') + self.assertEqual(minified, html_minify(html)) + + @unittest.mock.patch('htmlmin.minify.CONSERVATIVE_WHITESPACE', True) + def test_conservative_whitespace(self): + self.maxDiff = None + html, minified = self._normal_and_minified('inline_whitespace', variant='conservative') + self.assertEqual(minified, html_minify(html))