Skip to content

Commit

Permalink
Add CONSERVATIVE_WHITESPACE_ON_MINIFYING to retain inline text spaces
Browse files Browse the repository at this point in the history
  • Loading branch information
tisdall committed Jan 8, 2021
1 parent 0b908db commit 3db9295
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 24 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ Matthias Bauer <[email protected]>
Mystic-Mirage <[email protected]>
Younes Zakaria <[email protected]>
Marcel Mulder <[email protected]>
Tim Tisdall
13 changes: 13 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,19 @@ to ``True``:
KEEP_COMMENTS_ON_MINIFYING = True
Conservative whitespace minifying
---------------------------------

By default the minifier will try to intelligently remove whitespace and leave
spaces only as needed for inline text rendering. Sometimes it may be necessary
to not completely remove whitespace but only reduce spaces to a single space.
If you set ``CONSERVATIVE_WHITESPACE_ON_MINIFYING`` to ``False`` then
whitespace is always reduced to a single space and never completely removed.

.. code-block:: python
CONSERVATIVE_WHITESPACE_ON_MINIFYING = True
Using the decorator
===================

Expand Down
50 changes: 29 additions & 21 deletions htmlmin/minify.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@
except:
pass # working in non Django projects

# if `True`, reduce whitespace to a single space only
CONSERVATIVE_WHITESPACE = False
try:
CONSERVATIVE_WHITESPACE = getattr(settings, "CONSERVATIVE_WHITESPACE_ON_MINIFYING", False)
except:
pass # working in non Django projects

# element list coming from
# https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/HTML5/HTML5_element_list
# combining text-level semantics & edits
Expand Down Expand Up @@ -84,27 +91,28 @@ def space_minify(soup, ignore_comments=True):
if not is_prestr(soup):
# reduce multiple space characters
new_string = re_multi_space.sub(' ', soup.string)
(prev_flow, next_flow) = is_inflow(soup)
# if the string is in a flow of text, don't remove lone
# spaces
if prev_flow and next_flow:
new_string = re_only_space.sub(' ', new_string)
# else, remove spaces, they are between grouping, section,
# metadata or other types of block
else:
new_string = re_only_space.sub('', new_string)
# if the previous element is not text then remove leading
# spaces
if prev_flow:
new_string = re_start_space.sub(' ', new_string)
else:
new_string = re_start_space.sub('', new_string)
# if the previous element is not text then remove leading
# spaces
if next_flow:
new_string = re_end_space.sub(' ', new_string)
else:
new_string = re_end_space.sub('', new_string)
if not CONSERVATIVE_WHITESPACE:
(prev_flow, next_flow) = is_inflow(soup)
# if the string is in a flow of text, don't remove lone
# spaces
if prev_flow and next_flow:
new_string = re_only_space.sub(' ', new_string)
# else, remove spaces, they are between grouping, section,
# metadata or other types of block
else:
new_string = re_only_space.sub('', new_string)
# if the previous element is not text then remove leading
# spaces
if prev_flow:
new_string = re_start_space.sub(' ', new_string)
else:
new_string = re_start_space.sub('', new_string)
# if the previous element is not text then remove leading
# spaces
if next_flow:
new_string = re_end_space.sub(' ', new_string)
else:
new_string = re_end_space.sub('', new_string)
# bs4 sometimes add a lone newline in the body
new_string = re_single_nl.sub('', new_string)
soup.string.replace_with(new_string)
Expand Down
26 changes: 26 additions & 0 deletions htmlmin/tests/resources/inline_whitespace.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<html>
<head>
<title>Inline Whitespace</title>
</head>
<body>
<article>
<h1>Main Title<sup> TM</sup></h1>
<section>
<h2>Abstract</h2>
<p>
<span class="s1">a</span>
<span class="s2">b</span>
<span class="s1"> c </span>
</p>
</section>
<section>
<h2>Premise </h2>
<p><span>a</span> <span>space</span></p>
<p><span>no</span><span> space</span></p>
<p>
This is<a href="http://example.com"> a link</a>.
</p>
</section>
</article>
</body>
</html>
1 change: 1 addition & 0 deletions htmlmin/tests/resources/inline_whitespace_minified.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<html><head><title>Inline Whitespace</title></head><body><article><h1>Main Title<sup>TM</sup></h1><section><h2>Abstract</h2><p><span class="s1">a</span> <span class="s2">b</span> <span class="s1">c</span></p></section><section><h2>Premise</h2><p><span>a</span> <span>space</span></p><p><span>no</span><span>space</span></p><p>This is<a href="http://example.com">a link</a>.</p></section></article></body></html>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<html><head> <title>Inline Whitespace</title> </head> <body> <article> <h1>Main Title<sup> TM</sup></h1> <section> <h2>Abstract</h2> <p> <span class="s1">a</span> <span class="s2">b</span> <span class="s1"> c </span> </p> </section> <section> <h2>Premise </h2> <p><span>a</span> <span>space</span></p> <p><span>no</span><span> space</span></p> <p> This is<a href="http://example.com"> a link</a>. </p> </section> </article> </body></html>
22 changes: 20 additions & 2 deletions htmlmin/tests/test_minify.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,22 @@
from htmlmin.minify import html_minify


try:
from unittest.mock import patch # python 3.3 and greater
except ImportError:
from mock import patch


def resources_path(*paths):
return abspath(join(dirname(__file__), 'resources', *paths))


class TestMinify(unittest.TestCase):

def _normal_and_minified(self, filename):
def _normal_and_minified(self, filename, variant=None):
html_file = resources_path('%s.html' % filename)
html_file_minified = resources_path('%s_minified.html' % filename)
minified_postfix = '_%s' % variant if variant else ''
html_file_minified = resources_path('%s_minified%s.html' % (filename, minified_postfix))

html = open(html_file).read()
f_minified = codecs.open(html_file_minified, encoding='utf-8')
Expand Down Expand Up @@ -173,3 +180,14 @@ def test_non_ascii_in_excluded_element(self):
'non_ascii_in_excluded_element'
)
self.assertEqual(minified, html_minify(html))

def test_non_conservative_whitespace(self):
self.maxDiff = None
html, minified = self._normal_and_minified('inline_whitespace')
self.assertEqual(minified, html_minify(html))

@patch('htmlmin.minify.CONSERVATIVE_WHITESPACE', True)
def test_conservative_whitespace(self):
self.maxDiff = None
html, minified = self._normal_and_minified('inline_whitespace', variant='conservative')
self.assertEqual(minified, html_minify(html))
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ django==1.11.28
beautifulsoup4==4.7.1
html5lib==1.0.1
six==1.12.0
mock
9 changes: 8 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
import os
import re
import sys

from setuptools import setup, find_packages

Expand All @@ -23,6 +24,12 @@ def get_version(package):
with open('README.rst', 'r') as fp:
README = fp.read()

tests_require = ['django']
if (sys.version_info[0] == 2
or (sys.version_info[0] == 3 and sys.version_info[1] < 3)):
# 'mock' is a separate library prior to Python 3.3
tests_require.append('mock')

setup(
name='django-htmlmin',
version=version,
Expand All @@ -34,7 +41,7 @@ def get_version(package):
packages=find_packages(),
include_package_data=True,
install_requires=['beautifulsoup4', 'html5lib'],
tests_require=['django'],
tests_require=tests_require,
entry_points={
'console_scripts': [
'pyminify = htmlmin.commands:main',
Expand Down

0 comments on commit 3db9295

Please sign in to comment.