diff --git a/AUTHORS b/AUTHORS index f6f71bb1..3ced2e41 100644 --- a/AUTHORS +++ b/AUTHORS @@ -12,3 +12,4 @@ Matthias Bauer Mystic-Mirage Younes Zakaria Marcel Mulder +Tim Tisdall diff --git a/README.rst b/README.rst index f1d99d5e..04ec5827 100644 --- a/README.rst +++ b/README.rst @@ -90,6 +90,19 @@ to ``True``: KEEP_COMMENTS_ON_MINIFYING = True +Conservative whitespace minifying +--------------------------------- + +By default the minifier will try to intelligently remove whitespace and leave +spaces only as needed for inline text rendering. Sometimes it may be necessary +to not completely remove whitespace but only reduce spaces to a single space. +If you set ``CONSERVATIVE_WHITESPACE_ON_MINIFYING`` to ``False`` then +whitespace is always reduced to a single space and never completely removed. + +.. code-block:: python + + CONSERVATIVE_WHITESPACE_ON_MINIFYING = True + Using the decorator =================== diff --git a/htmlmin/minify.py b/htmlmin/minify.py index 63386e06..f3b0a7b0 100644 --- a/htmlmin/minify.py +++ b/htmlmin/minify.py @@ -22,6 +22,13 @@ except: pass # working in non Django projects +# if `True`, reduce whitespace to a single space only +CONSERVATIVE_WHITESPACE = False +try: + CONSERVATIVE_WHITESPACE = getattr(settings, "CONSERVATIVE_WHITESPACE_ON_MINIFYING", False) +except: + pass # working in non Django projects + # element list coming from # https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/HTML5/HTML5_element_list # combining text-level semantics & edits @@ -84,27 +91,28 @@ def space_minify(soup, ignore_comments=True): if not is_prestr(soup): # reduce multiple space characters new_string = re_multi_space.sub(' ', soup.string) - (prev_flow, next_flow) = is_inflow(soup) - # if the string is in a flow of text, don't remove lone - # spaces - if prev_flow and next_flow: - new_string = re_only_space.sub(' ', new_string) - # else, remove spaces, they are between grouping, section, - # metadata or other types of block - else: - new_string = re_only_space.sub('', new_string) - # if the previous element is not text then remove leading - # spaces - if prev_flow: - new_string = re_start_space.sub(' ', new_string) - else: - new_string = re_start_space.sub('', new_string) - # if the previous element is not text then remove leading - # spaces - if next_flow: - new_string = re_end_space.sub(' ', new_string) - else: - new_string = re_end_space.sub('', new_string) + if not CONSERVATIVE_WHITESPACE: + (prev_flow, next_flow) = is_inflow(soup) + # if the string is in a flow of text, don't remove lone + # spaces + if prev_flow and next_flow: + new_string = re_only_space.sub(' ', new_string) + # else, remove spaces, they are between grouping, section, + # metadata or other types of block + else: + new_string = re_only_space.sub('', new_string) + # if the previous element is not text then remove leading + # spaces + if prev_flow: + new_string = re_start_space.sub(' ', new_string) + else: + new_string = re_start_space.sub('', new_string) + # if the previous element is not text then remove leading + # spaces + if next_flow: + new_string = re_end_space.sub(' ', new_string) + else: + new_string = re_end_space.sub('', new_string) # bs4 sometimes add a lone newline in the body new_string = re_single_nl.sub('', new_string) soup.string.replace_with(new_string) diff --git a/htmlmin/tests/resources/inline_whitespace.html b/htmlmin/tests/resources/inline_whitespace.html new file mode 100644 index 00000000..180d2667 --- /dev/null +++ b/htmlmin/tests/resources/inline_whitespace.html @@ -0,0 +1,26 @@ + + + Inline Whitespace + + +
+

Main Title TM

+
+

Abstract

+

+ a + b + c +

+
+
+

Premise

+

a space

+

no space

+

+ This is a link. +

+
+
+ + diff --git a/htmlmin/tests/resources/inline_whitespace_minified.html b/htmlmin/tests/resources/inline_whitespace_minified.html new file mode 100644 index 00000000..24a24e5b --- /dev/null +++ b/htmlmin/tests/resources/inline_whitespace_minified.html @@ -0,0 +1 @@ +Inline Whitespace

Main TitleTM

Abstract

a b c

Premise

a space

nospace

This isa link.

\ No newline at end of file diff --git a/htmlmin/tests/resources/inline_whitespace_minified_conservative.html b/htmlmin/tests/resources/inline_whitespace_minified_conservative.html new file mode 100644 index 00000000..1955a253 --- /dev/null +++ b/htmlmin/tests/resources/inline_whitespace_minified_conservative.html @@ -0,0 +1 @@ + Inline Whitespace

Main Title TM

Abstract

a b c

Premise

a space

no space

This is a link.

\ No newline at end of file diff --git a/htmlmin/tests/test_minify.py b/htmlmin/tests/test_minify.py index 3887a5b1..faf0dbe0 100644 --- a/htmlmin/tests/test_minify.py +++ b/htmlmin/tests/test_minify.py @@ -15,15 +15,22 @@ from htmlmin.minify import html_minify +try: + from unittest.mock import patch # python 3.3 and greater +except ImportError: + from mock import patch + + def resources_path(*paths): return abspath(join(dirname(__file__), 'resources', *paths)) class TestMinify(unittest.TestCase): - def _normal_and_minified(self, filename): + def _normal_and_minified(self, filename, variant=None): html_file = resources_path('%s.html' % filename) - html_file_minified = resources_path('%s_minified.html' % filename) + minified_postfix = '_%s' % variant if variant else '' + html_file_minified = resources_path('%s_minified%s.html' % (filename, minified_postfix)) html = open(html_file).read() f_minified = codecs.open(html_file_minified, encoding='utf-8') @@ -173,3 +180,14 @@ def test_non_ascii_in_excluded_element(self): 'non_ascii_in_excluded_element' ) self.assertEqual(minified, html_minify(html)) + + def test_non_conservative_whitespace(self): + self.maxDiff = None + html, minified = self._normal_and_minified('inline_whitespace') + self.assertEqual(minified, html_minify(html)) + + @patch('htmlmin.minify.CONSERVATIVE_WHITESPACE', True) + def test_conservative_whitespace(self): + self.maxDiff = None + html, minified = self._normal_and_minified('inline_whitespace', variant='conservative') + self.assertEqual(minified, html_minify(html)) diff --git a/requirements.txt b/requirements.txt index 9a14c5f9..2a402a32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ django==1.11.28 beautifulsoup4==4.7.1 html5lib==1.0.1 six==1.12.0 +mock diff --git a/setup.py b/setup.py index 35334802..a990f4b8 100644 --- a/setup.py +++ b/setup.py @@ -6,6 +6,7 @@ """ import os import re +import sys from setuptools import setup, find_packages @@ -23,6 +24,12 @@ def get_version(package): with open('README.rst', 'r') as fp: README = fp.read() +tests_require = ['django'] +if (sys.version_info[0] == 2 + or (sys.version_info[0] == 3 and sys.version_info[1] < 3)): + # 'mock' is a separate library prior to Python 3.3 + tests_require.append('mock') + setup( name='django-htmlmin', version=version, @@ -34,7 +41,7 @@ def get_version(package): packages=find_packages(), include_package_data=True, install_requires=['beautifulsoup4', 'html5lib'], - tests_require=['django'], + tests_require=tests_require, entry_points={ 'console_scripts': [ 'pyminify = htmlmin.commands:main',