Move codespell:ignore check into Spellchecker

This makes the API automatically avoid some declared false-positives that the command line tool would also filter.
codespell-project · May 17, 2024 · 4ae56b4 · 4ae56b4
1 parent 700b861
commit 4ae56b4
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 20 deletions.
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
@@ -61,7 +61,6 @@
     "(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
     "\\b[\\w.%+-]+@[\\w.-]+\\b)"
 )
-inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
 USAGE = """
 \t%prog [OPTIONS] [file1 file2 ... fileN]
 """
@@ -885,20 +884,10 @@ def parse_file(
         if not line or line in exclude_lines:
             continue
 
-        extra_words_to_ignore = set()
-        match = inline_ignore_regex.search(line)
-        if match:
-            extra_words_to_ignore = set(
-                filter(None, (match.group("words") or "").split(","))
-            )
-            if not extra_words_to_ignore:
-                continue
-
         fixed_words = set()
         asked_for = set()
 
-        issues = spellchecker.spellcheck_line(line, line_tokenizer, extra_words_to_ignore=extra_words_to_ignore)
-        for issue in issues:
+        for issue in spellchecker.spellcheck_line(line, line_tokenizer):
             misspelling = issue.misspelling
             word = issue.word
             lword = issue.lword

diff --git a/codespell_lib/spellchecker.py b/codespell_lib/spellchecker.py
@@ -16,16 +16,17 @@
 Copyright (C) 2011  ProFUSION embedded systems
 """
 
-import re
 import os
+import re
 from typing import (
-    Dict,
-    Sequence,
     Container,
-    Optional,
+    Dict,
+    FrozenSet,
+    Generic,
     Iterable,
+    Optional,
     Protocol,
-    Generic,
+    Sequence,
     TypeVar,
 )
 
@@ -108,6 +109,8 @@
 
 _builtin_default_as_tuple = tuple(_builtin_default.split(","))
 
+_inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
+
 
 class UnknownBuiltinDictionaryError(ValueError):
     def __init__(self, name: str) -> None:
@@ -206,12 +209,21 @@ def __init__(
         if builtin_dictionaries:
             self.load_builtin_dictionaries(builtin_dictionaries)
 
+    def _parse_inline_ignore(self, line: str) -> Optional[FrozenSet[str]]:
+        inline_ignore_match = _inline_ignore_regex.search(line)
+        if inline_ignore_match:
+            words = frozenset(
+                filter(None, (inline_ignore_match.group("words") or "").split(","))
+            )
+            return words if words else None
+        return frozenset()
+
     def spellcheck_line(
         self,
         line: str,
         tokenizer: LineTokenizer[T_co],
         *,
-        extra_words_to_ignore: Container[str] = frozenset()
+        respect_inline_ignore: bool = True,
     ) -> Iterable[DetectedMisspelling[T_co]]:
         """Tokenize and spellcheck a line
 
@@ -220,12 +232,19 @@ def spellcheck_line(
 
         :param line: The line to spellcheck.
         :param tokenizer: A callable that will tokenize the line
-        :param extra_words_to_ignore: Extra words to ignore for this particular line
-          (such as content from a `codespell:ignore` comment)
+        :param respect_inline_ignore: Whether to check the line for
+           `codespell:ignore` instructions
+        :returns: An iterable of discovered typos.
         """
         misspellings = self._misspellings
         ignore_words_cased = self.ignore_words_cased
 
+        extra_words_to_ignore = (
+            self._parse_inline_ignore(line) if respect_inline_ignore else frozenset()
+        )
+        if extra_words_to_ignore is None:
+            return
+
         for token in tokenizer(line):
             word = token.group()
             if word in ignore_words_cased: