From b0febf48c294530b5ac0fbae13fcbeaab5365337 Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Fri, 17 May 2024 07:40:18 +0000 Subject: [PATCH] Refactor: Move some code to new files for reuse No new code is introduced; only existing code is shuffled around and the functions moved are unchanged as well. --- codespell_lib/_codespell.py | 64 +------------------------------ codespell_lib/_text_util.py | 27 +++++++++++++ codespell_lib/spellchecker.py | 72 +++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 62 deletions(-) create mode 100644 codespell_lib/_text_util.py create mode 100644 codespell_lib/spellchecker.py diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 62a51b75b34..c36a83567d5 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -43,6 +43,8 @@ from ._version import ( # type: ignore[import-not-found] __version__ as VERSION, # noqa: N812 ) +from .spellchecker import Misspelling, build_dict +from ._text_util import fix_case word_regex_def = r"[\w\-'’]+" # noqa: RUF001 # While we want to treat characters like ( or " as okay for a starting break, @@ -52,9 +54,6 @@ "(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|" "\\b[\\w.%+-]+@[\\w.-]+\\b)" ) -# Pass all misspellings through this translation table to generate -# alternative misspellings and fixes. -alt_chars = (("'", "’"),) # noqa: RUF001 inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P[\w,]*))?") USAGE = """ \t%prog [OPTIONS] [file1 file2 ... fileN] @@ -167,13 +166,6 @@ def match(self, filename: str) -> bool: return any(fnmatch.fnmatch(filename, p) for p in self.pattern_list) -class Misspelling: - def __init__(self, data: str, fix: bool, reason: str) -> None: - self.data = data - self.fix = fix - self.reason = reason - - class TermColors: def __init__(self) -> None: self.FILE = "\033[33m" @@ -703,48 +695,6 @@ def build_ignore_words( ) -def add_misspelling( - key: str, - data: str, - misspellings: Dict[str, Misspelling], -) -> None: - data = data.strip() - - if "," in data: - fix = False - data, reason = data.rsplit(",", 1) - reason = reason.lstrip() - else: - fix = True - reason = "" - - misspellings[key] = Misspelling(data, fix, reason) - - -def build_dict( - filename: str, - misspellings: Dict[str, Misspelling], - ignore_words: Set[str], -) -> None: - with open(filename, encoding="utf-8") as f: - translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars] - for line in f: - [key, data] = line.split("->") - # TODO: For now, convert both to lower. - # Someday we can maybe add support for fixing caps. - key = key.lower() - data = data.lower() - if key not in ignore_words: - add_misspelling(key, data, misspellings) - # generate alternative misspellings/fixes - for x, table in translate_tables: - if x in key: - alt_key = key.translate(table) - alt_data = data.translate(table) - if alt_key not in ignore_words: - add_misspelling(alt_key, alt_data, misspellings) - - def is_hidden(filename: str, check_hidden: bool) -> bool: bfilename = os.path.basename(filename) @@ -759,16 +709,6 @@ def is_text_file(filename: str) -> bool: return b"\x00" not in s -def fix_case(word: str, fixword: str) -> str: - if word == word.capitalize(): - return ", ".join(w.strip().capitalize() for w in fixword.split(",")) - if word == word.upper(): - return fixword.upper() - # they are both lower case - # or we don't have any idea - return fixword - - def ask_for_word_fix( line: str, match: Match[str], diff --git a/codespell_lib/_text_util.py b/codespell_lib/_text_util.py new file mode 100644 index 00000000000..18a2ec89b40 --- /dev/null +++ b/codespell_lib/_text_util.py @@ -0,0 +1,27 @@ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html. +""" +Copyright (C) 2010-2011 Lucas De Marchi +Copyright (C) 2011 ProFUSION embedded systems +""" + + +def fix_case(word: str, fixword: str) -> str: + if word == word.capitalize(): + return ", ".join(w.strip().capitalize() for w in fixword.split(",")) + if word == word.upper(): + return fixword.upper() + # they are both lower case + # or we don't have any idea + return fixword diff --git a/codespell_lib/spellchecker.py b/codespell_lib/spellchecker.py new file mode 100644 index 00000000000..fea439cd66a --- /dev/null +++ b/codespell_lib/spellchecker.py @@ -0,0 +1,72 @@ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html. +""" +Copyright (C) 2010-2011 Lucas De Marchi +Copyright (C) 2011 ProFUSION embedded systems +""" + +from typing import Dict, Set + +# Pass all misspellings through this translation table to generate +# alternative misspellings and fixes. +alt_chars = (("'", "’"),) # noqa: RUF001 + + +class Misspelling: + def __init__(self, data: str, fix: bool, reason: str) -> None: + self.data = data + self.fix = fix + self.reason = reason + + +def add_misspelling( + key: str, + data: str, + misspellings: Dict[str, Misspelling], +) -> None: + data = data.strip() + + if "," in data: + fix = False + data, reason = data.rsplit(",", 1) + reason = reason.lstrip() + else: + fix = True + reason = "" + + misspellings[key] = Misspelling(data, fix, reason) + + +def build_dict( + filename: str, + misspellings: Dict[str, Misspelling], + ignore_words: Set[str], +) -> None: + with open(filename, encoding="utf-8") as f: + translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars] + for line in f: + [key, data] = line.split("->") + # TODO: For now, convert both to lower. + # Someday we can maybe add support for fixing caps. + key = key.lower() + data = data.lower() + if key not in ignore_words: + add_misspelling(key, data, misspellings) + # generate alternative misspellings/fixes + for x, table in translate_tables: + if x in key: + alt_key = key.translate(table) + alt_data = data.translate(table) + if alt_key not in ignore_words: + add_misspelling(alt_key, alt_data, misspellings)