Skip to content

Commit

Permalink
Move codespell:ignore check into Spellchecker
Browse files Browse the repository at this point in the history
This makes the API automatically avoid some declared false-positives
that the command line tool would also filter.
  • Loading branch information
nthykier committed May 17, 2024
1 parent 700b861 commit 4ae56b4
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 20 deletions.
13 changes: 1 addition & 12 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
"(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
"\\b[\\w.%+-]+@[\\w.-]+\\b)"
)
inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
USAGE = """
\t%prog [OPTIONS] [file1 file2 ... fileN]
"""
Expand Down Expand Up @@ -885,20 +884,10 @@ def parse_file(
if not line or line in exclude_lines:
continue

extra_words_to_ignore = set()
match = inline_ignore_regex.search(line)
if match:
extra_words_to_ignore = set(
filter(None, (match.group("words") or "").split(","))
)
if not extra_words_to_ignore:
continue

fixed_words = set()
asked_for = set()

issues = spellchecker.spellcheck_line(line, line_tokenizer, extra_words_to_ignore=extra_words_to_ignore)
for issue in issues:
for issue in spellchecker.spellcheck_line(line, line_tokenizer):
misspelling = issue.misspelling
word = issue.word
lword = issue.lword
Expand Down
35 changes: 27 additions & 8 deletions codespell_lib/spellchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,17 @@
Copyright (C) 2011 ProFUSION embedded systems
"""

import re
import os
import re
from typing import (
Dict,
Sequence,
Container,
Optional,
Dict,
FrozenSet,
Generic,
Iterable,
Optional,
Protocol,
Generic,
Sequence,
TypeVar,
)

Expand Down Expand Up @@ -108,6 +109,8 @@

_builtin_default_as_tuple = tuple(_builtin_default.split(","))

_inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")


class UnknownBuiltinDictionaryError(ValueError):
def __init__(self, name: str) -> None:
Expand Down Expand Up @@ -206,12 +209,21 @@ def __init__(
if builtin_dictionaries:
self.load_builtin_dictionaries(builtin_dictionaries)

def _parse_inline_ignore(self, line: str) -> Optional[FrozenSet[str]]:
inline_ignore_match = _inline_ignore_regex.search(line)
if inline_ignore_match:
words = frozenset(
filter(None, (inline_ignore_match.group("words") or "").split(","))
)
return words if words else None
return frozenset()

def spellcheck_line(
self,
line: str,
tokenizer: LineTokenizer[T_co],
*,
extra_words_to_ignore: Container[str] = frozenset()
respect_inline_ignore: bool = True,
) -> Iterable[DetectedMisspelling[T_co]]:
"""Tokenize and spellcheck a line
Expand All @@ -220,12 +232,19 @@ def spellcheck_line(
:param line: The line to spellcheck.
:param tokenizer: A callable that will tokenize the line
:param extra_words_to_ignore: Extra words to ignore for this particular line
(such as content from a `codespell:ignore` comment)
:param respect_inline_ignore: Whether to check the line for
`codespell:ignore` instructions
:returns: An iterable of discovered typos.
"""
misspellings = self._misspellings
ignore_words_cased = self.ignore_words_cased

extra_words_to_ignore = (
self._parse_inline_ignore(line) if respect_inline_ignore else frozenset()
)
if extra_words_to_ignore is None:
return

for token in tokenizer(line):
word = token.group()
if word in ignore_words_cased:
Expand Down

0 comments on commit 4ae56b4

Please sign in to comment.