Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CONSERVATIVE_WHITESPACE_ON_MINIFYING to retain inline text spaces #148

Merged
merged 1 commit into from
Jan 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ Matthias Bauer <[email protected]>
Mystic-Mirage <[email protected]>
Younes Zakaria <[email protected]>
Marcel Mulder <[email protected]>
Tim Tisdall
13 changes: 13 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,19 @@ to ``True``:

KEEP_COMMENTS_ON_MINIFYING = True

Conservative whitespace minifying
---------------------------------

By default the minifier will try to intelligently remove whitespace and leave
spaces only as needed for inline text rendering. Sometimes it may be necessary
to not completely remove whitespace but only reduce spaces to a single space.
If you set ``CONSERVATIVE_WHITESPACE_ON_MINIFYING`` to ``False`` then
whitespace is always reduced to a single space and never completely removed.

.. code-block:: python

CONSERVATIVE_WHITESPACE_ON_MINIFYING = True

Using the decorator
===================

Expand Down
50 changes: 29 additions & 21 deletions htmlmin/minify.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@
except:
pass # working in non Django projects

# if `True`, reduce whitespace to a single space only
CONSERVATIVE_WHITESPACE = False
try:
CONSERVATIVE_WHITESPACE = getattr(settings, "CONSERVATIVE_WHITESPACE_ON_MINIFYING", False)
except:
pass # working in non Django projects

# element list coming from
# https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/HTML5/HTML5_element_list
# combining text-level semantics & edits
Expand Down Expand Up @@ -84,27 +91,28 @@ def space_minify(soup, ignore_comments=True):
if not is_prestr(soup):
# reduce multiple space characters
new_string = re_multi_space.sub(' ', soup.string)
(prev_flow, next_flow) = is_inflow(soup)
# if the string is in a flow of text, don't remove lone
# spaces
if prev_flow and next_flow:
new_string = re_only_space.sub(' ', new_string)
# else, remove spaces, they are between grouping, section,
# metadata or other types of block
else:
new_string = re_only_space.sub('', new_string)
# if the previous element is not text then remove leading
# spaces
if prev_flow:
new_string = re_start_space.sub(' ', new_string)
else:
new_string = re_start_space.sub('', new_string)
# if the previous element is not text then remove leading
# spaces
if next_flow:
new_string = re_end_space.sub(' ', new_string)
else:
new_string = re_end_space.sub('', new_string)
if not CONSERVATIVE_WHITESPACE:
(prev_flow, next_flow) = is_inflow(soup)
# if the string is in a flow of text, don't remove lone
# spaces
if prev_flow and next_flow:
new_string = re_only_space.sub(' ', new_string)
# else, remove spaces, they are between grouping, section,
# metadata or other types of block
else:
new_string = re_only_space.sub('', new_string)
# if the previous element is not text then remove leading
# spaces
if prev_flow:
new_string = re_start_space.sub(' ', new_string)
else:
new_string = re_start_space.sub('', new_string)
# if the previous element is not text then remove leading
# spaces
if next_flow:
new_string = re_end_space.sub(' ', new_string)
else:
new_string = re_end_space.sub('', new_string)
# bs4 sometimes add a lone newline in the body
new_string = re_single_nl.sub('', new_string)
soup.string.replace_with(new_string)
Expand Down
26 changes: 26 additions & 0 deletions htmlmin/tests/resources/inline_whitespace.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<html>
<head>
<title>Inline Whitespace</title>
</head>
<body>
<article>
<h1>Main Title<sup> TM</sup></h1>
<section>
<h2>Abstract</h2>
<p>
<span class="s1">a</span>
<span class="s2">b</span>
<span class="s1"> c </span>
</p>
</section>
<section>
<h2>Premise </h2>
<p><span>a</span> <span>space</span></p>
<p><span>no</span><span> space</span></p>
<p>
This is<a href="http://example.com"> a link</a>.
</p>
</section>
</article>
</body>
</html>
1 change: 1 addition & 0 deletions htmlmin/tests/resources/inline_whitespace_minified.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<html><head><title>Inline Whitespace</title></head><body><article><h1>Main Title<sup>TM</sup></h1><section><h2>Abstract</h2><p><span class="s1">a</span> <span class="s2">b</span> <span class="s1">c</span></p></section><section><h2>Premise</h2><p><span>a</span> <span>space</span></p><p><span>no</span><span>space</span></p><p>This is<a href="http://example.com">a link</a>.</p></section></article></body></html>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<html><head> <title>Inline Whitespace</title> </head> <body> <article> <h1>Main Title<sup> TM</sup></h1> <section> <h2>Abstract</h2> <p> <span class="s1">a</span> <span class="s2">b</span> <span class="s1"> c </span> </p> </section> <section> <h2>Premise </h2> <p><span>a</span> <span>space</span></p> <p><span>no</span><span> space</span></p> <p> This is<a href="http://example.com"> a link</a>. </p> </section> </article> </body></html>
22 changes: 20 additions & 2 deletions htmlmin/tests/test_minify.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,22 @@
from htmlmin.minify import html_minify


try:
from unittest.mock import patch # python 3.3 and greater
except ImportError:
from mock import patch


def resources_path(*paths):
return abspath(join(dirname(__file__), 'resources', *paths))


class TestMinify(unittest.TestCase):

def _normal_and_minified(self, filename):
def _normal_and_minified(self, filename, variant=None):
html_file = resources_path('%s.html' % filename)
html_file_minified = resources_path('%s_minified.html' % filename)
minified_postfix = '_%s' % variant if variant else ''
html_file_minified = resources_path('%s_minified%s.html' % (filename, minified_postfix))

html = open(html_file).read()
f_minified = codecs.open(html_file_minified, encoding='utf-8')
Expand Down Expand Up @@ -173,3 +180,14 @@ def test_non_ascii_in_excluded_element(self):
'non_ascii_in_excluded_element'
)
self.assertEqual(minified, html_minify(html))

def test_non_conservative_whitespace(self):
self.maxDiff = None
html, minified = self._normal_and_minified('inline_whitespace')
self.assertEqual(minified, html_minify(html))

@patch('htmlmin.minify.CONSERVATIVE_WHITESPACE', True)
def test_conservative_whitespace(self):
self.maxDiff = None
html, minified = self._normal_and_minified('inline_whitespace', variant='conservative')
self.assertEqual(minified, html_minify(html))
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ django==1.11.28
beautifulsoup4==4.7.1
html5lib==1.0.1
six==1.12.0
mock
9 changes: 8 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
import os
import re
import sys

from setuptools import setup, find_packages

Expand All @@ -23,6 +24,12 @@ def get_version(package):
with open('README.rst', 'r') as fp:
README = fp.read()

tests_require = ['django']
if (sys.version_info[0] == 2
or (sys.version_info[0] == 3 and sys.version_info[1] < 3)):
# 'mock' is a separate library prior to Python 3.3
tests_require.append('mock')

setup(
name='django-htmlmin',
version=version,
Expand All @@ -34,7 +41,7 @@ def get_version(package):
packages=find_packages(),
include_package_data=True,
install_requires=['beautifulsoup4', 'html5lib'],
tests_require=['django'],
tests_require=tests_require,
entry_points={
'console_scripts': [
'pyminify = htmlmin.commands:main',
Expand Down