diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 693f078f321..128538471cf 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -61,7 +61,6 @@ "(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|" "\\b[\\w.%+-]+@[\\w.-]+\\b)" ) -inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P[\w,]*))?") USAGE = """ \t%prog [OPTIONS] [file1 file2 ... fileN] """ @@ -885,20 +884,10 @@ def parse_file( if not line or line in exclude_lines: continue - extra_words_to_ignore = set() - match = inline_ignore_regex.search(line) - if match: - extra_words_to_ignore = set( - filter(None, (match.group("words") or "").split(",")) - ) - if not extra_words_to_ignore: - continue - fixed_words = set() asked_for = set() - issues = spellchecker.spellcheck_line(line, line_tokenizer, extra_words_to_ignore=extra_words_to_ignore) - for issue in issues: + for issue in spellchecker.spellcheck_line(line, line_tokenizer): misspelling = issue.misspelling word = issue.word lword = issue.lword diff --git a/codespell_lib/spellchecker.py b/codespell_lib/spellchecker.py index bd4eb0db56d..69006e850a0 100644 --- a/codespell_lib/spellchecker.py +++ b/codespell_lib/spellchecker.py @@ -16,16 +16,17 @@ Copyright (C) 2011 ProFUSION embedded systems """ -import re import os +import re from typing import ( - Dict, - Sequence, Container, - Optional, + Dict, + FrozenSet, + Generic, Iterable, + Optional, Protocol, - Generic, + Sequence, TypeVar, ) @@ -108,6 +109,8 @@ _builtin_default_as_tuple = tuple(_builtin_default.split(",")) +_inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P[\w,]*))?") + class UnknownBuiltinDictionaryError(ValueError): def __init__(self, name: str) -> None: @@ -206,12 +209,21 @@ def __init__( if builtin_dictionaries: self.load_builtin_dictionaries(builtin_dictionaries) + def _parse_inline_ignore(self, line: str) -> Optional[FrozenSet[str]]: + inline_ignore_match = _inline_ignore_regex.search(line) + if inline_ignore_match: + words = frozenset( + filter(None, (inline_ignore_match.group("words") or "").split(",")) + ) + return words if words else None + return frozenset() + def spellcheck_line( self, line: str, tokenizer: LineTokenizer[T_co], *, - extra_words_to_ignore: Container[str] = frozenset() + respect_inline_ignore: bool = True, ) -> Iterable[DetectedMisspelling[T_co]]: """Tokenize and spellcheck a line @@ -220,12 +232,19 @@ def spellcheck_line( :param line: The line to spellcheck. :param tokenizer: A callable that will tokenize the line - :param extra_words_to_ignore: Extra words to ignore for this particular line - (such as content from a `codespell:ignore` comment) + :param respect_inline_ignore: Whether to check the line for + `codespell:ignore` instructions + :returns: An iterable of discovered typos. """ misspellings = self._misspellings ignore_words_cased = self.ignore_words_cased + extra_words_to_ignore = ( + self._parse_inline_ignore(line) if respect_inline_ignore else frozenset() + ) + if extra_words_to_ignore is None: + return + for token in tokenizer(line): word = token.group() if word in ignore_words_cased: