diff --git a/se/se_epub_lint.py b/se/se_epub_lint.py index dfab6695..f3d11c02 100644 --- a/se/se_epub_lint.py +++ b/se/se_epub_lint.py @@ -3645,7 +3645,11 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N headings.append((header_text, str(filename))) # Check for double spacing - matches = regex.search(fr"[{se.NO_BREAK_SPACE}{se.HAIR_SPACE} ]{{2,}}", file_contents) + # First, remove any table cells which contain quotation marks followed by multiple spaces, as those are probably ditto marks. + dom_copy = deepcopy(dom) + for td_node in dom_copy.xpath(f"//td[re:test(., '”[{se.NO_BREAK_SPACE}{se.HAIR_SPACE} ]+”')]"): + td_node.remove() + matches = regex.search(fr"[{se.NO_BREAK_SPACE}{se.HAIR_SPACE} ]{{2,}}", dom_copy.to_string()) if matches: double_spaced_files.append(filename) diff --git a/tests/lint/typography/t-001/in/src/epub/text/chapter-1.xhtml b/tests/lint/typography/t-001/in/src/epub/text/chapter-1.xhtml index e85ae9b7..4f2830fe 100644 --- a/tests/lint/typography/t-001/in/src/epub/text/chapter-1.xhtml +++ b/tests/lint/typography/t-001/in/src/epub/text/chapter-1.xhtml @@ -26,6 +26,29 @@
The first kaput cuticle is, in its own way, a tree. A sunflower is an underwear from the right perspective. A flashy sprout's hydrant comes with it the thought that the deictic freon is a cheque. … Some sonless elements are thought of simply as caravans.
Far from the truth, an innocent sees a glue as an unposed thumb. A Thursday of the lier is assumed to be an honied donna. Nowhere is it disputed that the widest rutabaga comes from a flamy kendo.
++ | . | ++ | + | + | + |
---|---|---|---|---|---|
14 | ++ + | + +” ” | +Southeast face | +11,200? | +Guides—Michel Croz, Christian Almer, Franz Biener; porter—Luc Meynet. See Chapter XV. | +