From b0febf48c294530b5ac0fbae13fcbeaab5365337 Mon Sep 17 00:00:00 2001
From: Niels Thykier <niels@thykier.net>
Date: Fri, 17 May 2024 07:40:18 +0000
Subject: [PATCH] Refactor: Move some code to new files for reuse

No new code is introduced; only existing code is shuffled around and
the functions moved are unchanged as well.
---
 codespell_lib/_codespell.py   | 64 +------------------------------
 codespell_lib/_text_util.py   | 27 +++++++++++++
 codespell_lib/spellchecker.py | 72 +++++++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+), 62 deletions(-)
 create mode 100644 codespell_lib/_text_util.py
 create mode 100644 codespell_lib/spellchecker.py
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index 62a51b75b34..c36a83567d5 100644
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -43,6 +43,8 @@
 from ._version import (  # type: ignore[import-not-found]
     __version__ as VERSION,  # noqa: N812
 )
+from .spellchecker import Misspelling, build_dict
+from ._text_util import fix_case
 
 word_regex_def = r"[\w\-'’]+"  # noqa: RUF001
 # While we want to treat characters like ( or " as okay for a starting break,
@@ -52,9 +54,6 @@
     "(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
     "\\b[\\w.%+-]+@[\\w.-]+\\b)"
 )
-# Pass all misspellings through this translation table to generate
-# alternative misspellings and fixes.
-alt_chars = (("'", "’"),)  # noqa: RUF001
 inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
 USAGE = """
 \t%prog [OPTIONS] [file1 file2 ... fileN]
@@ -167,13 +166,6 @@ def match(self, filename: str) -> bool:
         return any(fnmatch.fnmatch(filename, p) for p in self.pattern_list)
 
 
-class Misspelling:
-    def __init__(self, data: str, fix: bool, reason: str) -> None:
-        self.data = data
-        self.fix = fix
-        self.reason = reason
-
-
 class TermColors:
     def __init__(self) -> None:
         self.FILE = "\033[33m"
@@ -703,48 +695,6 @@ def build_ignore_words(
         )
 
 
-def add_misspelling(
-    key: str,
-    data: str,
-    misspellings: Dict[str, Misspelling],
-) -> None:
-    data = data.strip()
-
-    if "," in data:
-        fix = False
-        data, reason = data.rsplit(",", 1)
-        reason = reason.lstrip()
-    else:
-        fix = True
-        reason = ""
-
-    misspellings[key] = Misspelling(data, fix, reason)
-
-
-def build_dict(
-    filename: str,
-    misspellings: Dict[str, Misspelling],
-    ignore_words: Set[str],
-) -> None:
-    with open(filename, encoding="utf-8") as f:
-        translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars]
-        for line in f:
-            [key, data] = line.split("->")
-            # TODO: For now, convert both to lower.
-            #       Someday we can maybe add support for fixing caps.
-            key = key.lower()
-            data = data.lower()
-            if key not in ignore_words:
-                add_misspelling(key, data, misspellings)
-            # generate alternative misspellings/fixes
-            for x, table in translate_tables:
-                if x in key:
-                    alt_key = key.translate(table)
-                    alt_data = data.translate(table)
-                    if alt_key not in ignore_words:
-                        add_misspelling(alt_key, alt_data, misspellings)
-
-
 def is_hidden(filename: str, check_hidden: bool) -> bool:
     bfilename = os.path.basename(filename)
 
@@ -759,16 +709,6 @@ def is_text_file(filename: str) -> bool:
     return b"\x00" not in s
 
 
-def fix_case(word: str, fixword: str) -> str:
-    if word == word.capitalize():
-        return ", ".join(w.strip().capitalize() for w in fixword.split(","))
-    if word == word.upper():
-        return fixword.upper()
-    # they are both lower case
-    # or we don't have any idea
-    return fixword
-
-
 def ask_for_word_fix(
     line: str,
     match: Match[str],
diff --git a/codespell_lib/_text_util.py b/codespell_lib/_text_util.py
new file mode 100644
index 00000000000..18a2ec89b40
--- /dev/null
+++ b/codespell_lib/_text_util.py
@@ -0,0 +1,27 @@
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see
+# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
+"""
+Copyright (C) 2010-2011  Lucas De Marchi <lucas.de.marchi@gmail.com>
+Copyright (C) 2011  ProFUSION embedded systems
+"""
+
+
+def fix_case(word: str, fixword: str) -> str:
+    if word == word.capitalize():
+        return ", ".join(w.strip().capitalize() for w in fixword.split(","))
+    if word == word.upper():
+        return fixword.upper()
+    # they are both lower case
+    # or we don't have any idea
+    return fixword
diff --git a/codespell_lib/spellchecker.py b/codespell_lib/spellchecker.py
new file mode 100644
index 00000000000..fea439cd66a
--- /dev/null
+++ b/codespell_lib/spellchecker.py
@@ -0,0 +1,72 @@
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see
+# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
+"""
+Copyright (C) 2010-2011  Lucas De Marchi <lucas.de.marchi@gmail.com>
+Copyright (C) 2011  ProFUSION embedded systems
+"""
+
+from typing import Dict, Set
+
+# Pass all misspellings through this translation table to generate
+# alternative misspellings and fixes.
+alt_chars = (("'", "’"),)  # noqa: RUF001
+
+
+class Misspelling:
+    def __init__(self, data: str, fix: bool, reason: str) -> None:
+        self.data = data
+        self.fix = fix
+        self.reason = reason
+
+
+def add_misspelling(
+    key: str,
+    data: str,
+    misspellings: Dict[str, Misspelling],
+) -> None:
+    data = data.strip()
+
+    if "," in data:
+        fix = False
+        data, reason = data.rsplit(",", 1)
+        reason = reason.lstrip()
+    else:
+        fix = True
+        reason = ""
+
+    misspellings[key] = Misspelling(data, fix, reason)
+
+
+def build_dict(
+    filename: str,
+    misspellings: Dict[str, Misspelling],
+    ignore_words: Set[str],
+) -> None:
+    with open(filename, encoding="utf-8") as f:
+        translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars]
+        for line in f:
+            [key, data] = line.split("->")
+            # TODO: For now, convert both to lower.
+            #       Someday we can maybe add support for fixing caps.
+            key = key.lower()
+            data = data.lower()
+            if key not in ignore_words:
+                add_misspelling(key, data, misspellings)
+            # generate alternative misspellings/fixes
+            for x, table in translate_tables:
+                if x in key:
+                    alt_key = key.translate(table)
+                    alt_data = data.translate(table)
+                    if alt_key not in ignore_words:
+                        add_misspelling(alt_key, alt_data, misspellings)