Main Title TM
+Abstract
++ a + b + c +
+Premise
+a space
+no space
++ This is a link. +
+diff --git a/README.rst b/README.rst index f1d99d5e..04ec5827 100644 --- a/README.rst +++ b/README.rst @@ -90,6 +90,19 @@ to ``True``: KEEP_COMMENTS_ON_MINIFYING = True +Conservative whitespace minifying +--------------------------------- + +By default the minifier will try to intelligently remove whitespace and leave +spaces only as needed for inline text rendering. Sometimes it may be necessary +to not completely remove whitespace but only reduce spaces to a single space. +If you set ``CONSERVATIVE_WHITESPACE_ON_MINIFYING`` to ``False`` then +whitespace is always reduced to a single space and never completely removed. + +.. code-block:: python + + CONSERVATIVE_WHITESPACE_ON_MINIFYING = True + Using the decorator =================== diff --git a/htmlmin/minify.py b/htmlmin/minify.py index 63386e06..f3b0a7b0 100644 --- a/htmlmin/minify.py +++ b/htmlmin/minify.py @@ -22,6 +22,13 @@ except: pass # working in non Django projects +# if `True`, reduce whitespace to a single space only +CONSERVATIVE_WHITESPACE = False +try: + CONSERVATIVE_WHITESPACE = getattr(settings, "CONSERVATIVE_WHITESPACE_ON_MINIFYING", False) +except: + pass # working in non Django projects + # element list coming from # https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/HTML5/HTML5_element_list # combining text-level semantics & edits @@ -84,27 +91,28 @@ def space_minify(soup, ignore_comments=True): if not is_prestr(soup): # reduce multiple space characters new_string = re_multi_space.sub(' ', soup.string) - (prev_flow, next_flow) = is_inflow(soup) - # if the string is in a flow of text, don't remove lone - # spaces - if prev_flow and next_flow: - new_string = re_only_space.sub(' ', new_string) - # else, remove spaces, they are between grouping, section, - # metadata or other types of block - else: - new_string = re_only_space.sub('', new_string) - # if the previous element is not text then remove leading - # spaces - if prev_flow: - new_string = re_start_space.sub(' ', new_string) - else: - new_string = re_start_space.sub('', new_string) - # if the previous element is not text then remove leading - # spaces - if next_flow: - new_string = re_end_space.sub(' ', new_string) - else: - new_string = re_end_space.sub('', new_string) + if not CONSERVATIVE_WHITESPACE: + (prev_flow, next_flow) = is_inflow(soup) + # if the string is in a flow of text, don't remove lone + # spaces + if prev_flow and next_flow: + new_string = re_only_space.sub(' ', new_string) + # else, remove spaces, they are between grouping, section, + # metadata or other types of block + else: + new_string = re_only_space.sub('', new_string) + # if the previous element is not text then remove leading + # spaces + if prev_flow: + new_string = re_start_space.sub(' ', new_string) + else: + new_string = re_start_space.sub('', new_string) + # if the previous element is not text then remove leading + # spaces + if next_flow: + new_string = re_end_space.sub(' ', new_string) + else: + new_string = re_end_space.sub('', new_string) # bs4 sometimes add a lone newline in the body new_string = re_single_nl.sub('', new_string) soup.string.replace_with(new_string) diff --git a/htmlmin/tests/resources/inline_whitespace.html b/htmlmin/tests/resources/inline_whitespace.html new file mode 100644 index 00000000..180d2667 --- /dev/null +++ b/htmlmin/tests/resources/inline_whitespace.html @@ -0,0 +1,26 @@ + +
++ a + b + c +
+a space
+no space
++ This is a link. +
+a b c
a space
nospace
This isa link.
a b c
a space
no space
This is a link.