From d767d76312c9979398a27f0a51459d7c70d54de9 Mon Sep 17 00:00:00 2001
From: insolor <insolor@gmail.com>
Date: Tue, 5 Sep 2023 20:39:24 +0300
Subject: [PATCH] Reformat code with black

---
 bench/__init__.py          |   2 +-
 bench/speed.py             | 210 ++++++++++++++++++++-----------------
 docs/conf.py               | 136 ++++++++++++------------
 pyproject.toml             |   3 +
 setup.py                   |  44 ++++----
 tests/__init__.py          |   2 +-
 tests/test_dawg.py         |  98 +++++++++--------
 tests/test_payload_dawg.py |  70 +++++++------
 tests/test_prediction.py   | 116 +++++++++-----------
 9 files changed, 342 insertions(+), 339 deletions(-)

diff --git a/bench/__init__.py b/bench/__init__.py
index 7a77a86..139759b 100644
--- a/bench/__init__.py
+++ b/bench/__init__.py
@@ -1,2 +1,2 @@
 # -*- coding: utf-8 -*-
-from __future__ import absolute_import
\ No newline at end of file
+from __future__ import absolute_import
diff --git a/bench/speed.py b/bench/speed.py
index c795b58..886d950 100755
--- a/bench/speed.py
+++ b/bench/speed.py
@@ -7,37 +7,37 @@
 import os
 import zipfile
 import struct
-#import pstats
-#import cProfile
+
+# import pstats
+# import cProfile
 
 import dawg
 
+
 def words100k():
-    zip_name = os.path.join(
-        os.path.abspath(os.path.dirname(__file__)),
-        'words100k.txt.zip'
-    )
+    zip_name = os.path.join(os.path.abspath(os.path.dirname(__file__)), "words100k.txt.zip")
     zf = zipfile.ZipFile(zip_name)
-    txt = zf.open(zf.namelist()[0]).read().decode('utf8')
+    txt = zf.open(zf.namelist()[0]).read().decode("utf8")
     return txt.splitlines()
 
+
 def random_words(num):
-    russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя'
-    alphabet = '%s%s' % (russian, string.ascii_letters)
-    return [
-        "".join([random.choice(alphabet) for x in range(random.randint(1,15))])
-        for y in range(num)
-    ]
+    russian = "абвгдеёжзиклмнопрстуфхцчъыьэюя"
+    alphabet = "%s%s" % (russian, string.ascii_letters)
+    return ["".join([random.choice(alphabet) for x in range(random.randint(1, 15))]) for y in range(num)]
+
 
 def truncated_words(words):
     return [word[:3] for word in words]
 
+
 def prefixes1k(words, prefix_len):
     words = [w for w in words if len(w) >= prefix_len]
-    every_nth = int(len(words)/1000)
+    every_nth = int(len(words) / 1000)
     _words = [w[:prefix_len] for w in words[::every_nth]]
     return _words[:1000]
 
+
 def leet_words(words, replaces):
     for key, value in replaces.items():
         words = [w.replace(key, value) for w in words]
@@ -53,75 +53,104 @@ def leet_words(words, replaces):
 PREFIXES_15_1k = prefixes1k(WORDS100k, 15)
 
 LEET_REPLACES = {
-    'o': '0',
-    'O': '0',
-    'u': '0',
-    'l': '1',
-    'i': '1',
-    'e': '3',
-    'E': '3',
-    'A': '4',
-    'a': '4',
-    'h': '4',
-    's': 'z',
+    "o": "0",
+    "O": "0",
+    "u": "0",
+    "l": "1",
+    "i": "1",
+    "e": "3",
+    "E": "3",
+    "A": "4",
+    "a": "4",
+    "h": "4",
+    "s": "z",
 }
 LEET_50k = leet_words(WORDS100k[:50000], LEET_REPLACES)
 
+
 def format_result(key, value, text_width):
     key = key.ljust(text_width)
     print("    %s %s" % (key, value))
 
 
-def bench(name, timer, descr='M ops/sec', op_count=0.1, repeats=3, runs=5,
-          text_width=33):
+def bench(name, timer, descr="M ops/sec", op_count=0.1, repeats=3, runs=5, text_width=33):
     try:
         times = []
         for x in range(runs):
             times.append(timer.timeit(repeats))
 
         def op_time(time):
-            return op_count*repeats / time
+            return op_count * repeats / time
 
         val = "%0.3f%s" % (op_time(min(times)), descr)
         format_result(name, val, text_width)
     except (AttributeError, TypeError) as e:
         format_result(name, "not supported", text_width)
 
+
 def create_dawg():
     words = words100k()
     return dawg.DAWG(words)
 
+
 def create_bytes_dawg():
     words = words100k()
-    values = [struct.pack(str('<H'), len(word)) for word in words]
+    values = [struct.pack(str("<H"), len(word)) for word in words]
     return dawg.BytesDAWG(zip(words, values))
 
+
 def create_record_dawg():
     words = words100k()
-    values = [ [len(word)] for word in words]
-    return dawg.RecordDAWG(str('<H'), zip(words, values))
+    values = [[len(word)] for word in words]
+    return dawg.RecordDAWG(str("<H"), zip(words, values))
+
 
 def create_int_dawg():
     words = words100k()
     values = [len(word) for word in words]
     return dawg.IntDAWG(zip(words, values))
 
+
 def create_leet_dawg():
     return dawg.DAWG(LEET_50k)
 
 
 def benchmark():
-    print('\n====== Benchmarks (100k unique unicode words) =======\n')
+    print("\n====== Benchmarks (100k unique unicode words) =======\n")
 
     tests = [
-        ('__getitem__ (hits)', "for word in WORDS100k: data[word]", 'M ops/sec', 0.1, 3),
-        ('get() (hits)', "for word in WORDS100k: data.get(word)", 'M ops/sec', 0.1, 3),
-        ('get() (misses)', "for word in NON_WORDS_10k: data.get(word)", 'M ops/sec', 0.01, 5),
-        ('__contains__ (hits)', "for word in WORDS100k: word in data", 'M ops/sec', 0.1, 3),
-        ('__contains__ (misses)', "for word in NON_WORDS100k: word in data", 'M ops/sec', 0.1, 3),
-        ('items()', 'list(data.items())', ' ops/sec', 1, 1),
-        ('keys()', 'list(data.keys())', ' ops/sec', 1, 1),
-#        ('values()', 'list(data.values())', ' ops/sec', 1, 1),
+        (
+            "__getitem__ (hits)",
+            "for word in WORDS100k: data[word]",
+            "M ops/sec",
+            0.1,
+            3,
+        ),
+        ("get() (hits)", "for word in WORDS100k: data.get(word)", "M ops/sec", 0.1, 3),
+        (
+            "get() (misses)",
+            "for word in NON_WORDS_10k: data.get(word)",
+            "M ops/sec",
+            0.01,
+            5,
+        ),
+        (
+            "__contains__ (hits)",
+            "for word in WORDS100k: word in data",
+            "M ops/sec",
+            0.1,
+            3,
+        ),
+        (
+            "__contains__ (misses)",
+            "for word in NON_WORDS100k: word in data",
+            "M ops/sec",
+            0.1,
+            3,
+        ),
+        ("items()", "list(data.items())", " ops/sec", 1, 1),
+        ("keys()", "list(data.keys())", " ops/sec", 1, 1),
+        #        ('values()', 'list(data.values())', ' ops/sec', 1, 1),
     ]
 
     common_setup = """
@@ -132,19 +161,19 @@ def benchmark():
 NON_WORDS_10k = NON_WORDS100k[:10000]
 NON_WORDS_1k = ['ыва', 'xyz', 'соы', 'Axx', 'avы']*200
 """
-    dict_setup = common_setup + 'data = dict((word, len(word)) for word in WORDS100k);'
-    dawg_setup = common_setup + 'data = create_dawg(); repl = data.compile_replaces(LEET_REPLACES);'
-    bytes_dawg_setup = common_setup + 'data = create_bytes_dawg();'
-    record_dawg_setup = common_setup + 'data = create_record_dawg();'
-    int_dawg_setup = common_setup + 'data = create_int_dawg();'
-    leet_dawg_setup = common_setup + 'data = create_leet_dawg(); repl = data.compile_replaces(LEET_REPLACES);'
+    dict_setup = common_setup + "data = dict((word, len(word)) for word in WORDS100k);"
+    dawg_setup = common_setup + "data = create_dawg(); repl = data.compile_replaces(LEET_REPLACES);"
+    bytes_dawg_setup = common_setup + "data = create_bytes_dawg();"
+    record_dawg_setup = common_setup + "data = create_record_dawg();"
+    int_dawg_setup = common_setup + "data = create_int_dawg();"
+    leet_dawg_setup = common_setup + "data = create_leet_dawg(); repl = data.compile_replaces(LEET_REPLACES);"
 
     structures = [
-        ('dict', dict_setup),
-        ('DAWG', dawg_setup),
-        ('BytesDAWG', bytes_dawg_setup),
-        ('RecordDAWG', record_dawg_setup),
-        ('IntDAWG', int_dawg_setup),
+        ("dict", dict_setup),
+        ("DAWG", dawg_setup),
+        ("BytesDAWG", bytes_dawg_setup),
+        ("RecordDAWG", record_dawg_setup),
+        ("IntDAWG", int_dawg_setup),
     ]
     for test_name, test, descr, op_count, repeats in tests:
         for name, setup in structures:
@@ -161,7 +190,7 @@ def benchmark():
             "for word in WORDS100k[:50000]: data.similar_keys(word, repl)",
             setup=dawg_setup,
         ),
-        op_count=0.05
+        op_count=0.05,
     )
     bench(
         "DAWG.similar_keys  (l33t)",
@@ -169,71 +198,60 @@ def benchmark():
             "for word in WORDS100k[:50000]: data.similar_keys(word, repl)",
             setup=leet_dawg_setup,
         ),
-        op_count=0.05
+        op_count=0.05,
     )
 
     for struct_name, setup in structures[1:]:
 
         # prefixes of a given key
         _bench_data = [
-            ('hits', 'WORDS100k'),
-            ('mixed', 'MIXED_WORDS100k'),
-            ('misses', 'NON_WORDS100k'),
+            ("hits", "WORDS100k"),
+            ("mixed", "MIXED_WORDS100k"),
+            ("misses", "NON_WORDS100k"),
         ]
 
-        for meth in ['prefixes']:
+        for meth in ["prefixes"]:
             for name, data in _bench_data:
                 bench(
-                    '%s.%s (%s)' % (struct_name, meth, name),
-                    timeit.Timer(
-                        "for word in %s:\n"
-                        "   data.%s(word)" % (data, meth),
-                        setup
-                    ),
+                    "%s.%s (%s)" % (struct_name, meth, name),
+                    timeit.Timer("for word in %s:\n" "   data.%s(word)" % (data, meth), setup),
                     runs=3,
                 )
 
-        for meth in ['iterprefixes']:
+        for meth in ["iterprefixes"]:
             for name, data in _bench_data:
                 bench(
-                    '%s.%s (%s)' % (struct_name, meth, name),
+                    "%s.%s (%s)" % (struct_name, meth, name),
                     timeit.Timer(
-                        "for word in %s:\n"
-                        "   list(data.%s(word))" % (data, meth),
-                        setup
+                        "for word in %s:\n" "   list(data.%s(word))" % (data, meth),
+                        setup,
                     ),
                     runs=3,
                 )
 
         # keys with a given prefix
         _bench_data = [
-            ('xxx', 'avg_len(res)==415', 'PREFIXES_3_1k'),
-            ('xxxxx', 'avg_len(res)==17', 'PREFIXES_5_1k'),
-            ('xxxxxxxx', 'avg_len(res)==3', 'PREFIXES_8_1k'),
-            ('xxxxx..xx', 'avg_len(res)==1.4', 'PREFIXES_15_1k'),
-            ('xxx', 'NON_EXISTING', 'NON_WORDS_1k'),
+            ("xxx", "avg_len(res)==415", "PREFIXES_3_1k"),
+            ("xxxxx", "avg_len(res)==17", "PREFIXES_5_1k"),
+            ("xxxxxxxx", "avg_len(res)==3", "PREFIXES_8_1k"),
+            ("xxxxx..xx", "avg_len(res)==1.4", "PREFIXES_15_1k"),
+            ("xxx", "NON_EXISTING", "NON_WORDS_1k"),
         ]
         for xxx, avg, data in _bench_data:
-            for meth in ['keys', 'items']:
+            for meth in ["keys", "items"]:
                 bench(
                     '%s.%s(prefix="%s"), %s' % (struct_name, meth, xxx, avg),
-                    timeit.Timer(
-                        "for word in %s: data.%s(word)" % (data, meth),
-                        setup
-                    ),
-                    'K ops/sec',
+                    timeit.Timer("for word in %s: data.%s(word)" % (data, meth), setup),
+                    "K ops/sec",
                     op_count=1,
                     runs=3,
                     text_width=60,
                 )
-            for meth in ['iterkeys', 'iteritems']:
+            for meth in ["iterkeys", "iteritems"]:
                 bench(
                     '%s.%s(prefix="%s"), %s' % (struct_name, meth, xxx, avg),
-                    timeit.Timer(
-                        "for word in %s: list(data.%s(word))" % (data, meth),
-                        setup
-                    ),
-                    'K ops/sec',
+                    timeit.Timer("for word in %s: list(data.%s(word))" % (data, meth), setup),
+                    "K ops/sec",
                     op_count=1,
                     runs=3,
                     text_width=60,
@@ -247,10 +265,12 @@ def check_dawg(trie, words):
     if value != len(words):
         raise Exception()
 
+
 def profiling():
     import pstats
     import cProfile
-    print('\n====== Profiling =======\n')
+
+    print("\n====== Profiling =======\n")
     d = create_bytes_dawg()
     WORDS = words100k()
 
@@ -260,19 +280,19 @@ def check_getitem(trie, words):
 
     cProfile.runctx("check_getitem(d, WORDS)", globals(), locals(), "Profile.prof")
 
-#    def check_prefixes(trie, words):
-#        for word in words:
-#            trie.keys(word)
-#    cProfile.runctx("check_prefixes(d, NON_WORDS_1k)", globals(), locals(), "Profile.prof")
-#
-    #cProfile.runctx("check_trie(d, WORDS)", globals(), locals(), "Profile.prof")
+    #    def check_prefixes(trie, words):
+    #        for word in words:
+    #            trie.keys(word)
+    #    cProfile.runctx("check_prefixes(d, NON_WORDS_1k)", globals(), locals(), "Profile.prof")
+    #
+    # cProfile.runctx("check_trie(d, WORDS)", globals(), locals(), "Profile.prof")
 
     s = pstats.Stats("Profile.prof")
     s.strip_dirs().sort_stats("time").print_stats(20)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     benchmark()
-    #profiling()
-    print('\n~~~~~~~~~~~~~~\n')
\ No newline at end of file
+    # profiling()
+    print("\n~~~~~~~~~~~~~~\n")
diff --git a/docs/conf.py b/docs/conf.py
index 8d74020..e63f3f6 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -17,209 +17,203 @@
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
+# sys.path.insert(0, os.path.abspath('.'))
 
 # -- General configuration -----------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = []
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = 'DAWG'
-copyright = '2015, Mikhail Korobov'
+project = "DAWG"
+copyright = "2015, Mikhail Korobov"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = '0.6'
+version = "0.6"
 # The full version, including alpha/beta/rc tags.
-release = '0.6'
+release = "0.6"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
-#language = None
+# language = None
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ["_build"]
 
 # The reST default role (used for this markup: `text`) to use for all documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 
 # -- Options for HTML output ---------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'default'
+html_theme = "default"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+# html_theme_options = {}
 
 # Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
+# html_theme_path = []
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+# html_logo = None
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'DAWGdoc'
+htmlhelp_basename = "DAWGdoc"
 
 
 # -- Options for LaTeX output --------------------------------------------------
 
 latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
-
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
+    # The paper size ('letterpaper' or 'a4paper').
+    #'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #'preamble': '',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('index', 'DAWG.tex', 'DAWG Documentation',
-   'Mikhail Korobov', 'manual'),
+    ("index", "DAWG.tex", "DAWG Documentation", "Mikhail Korobov", "manual"),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
 
 # If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
 
 
 # -- Options for manual page output --------------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    ('index', 'dawg', 'DAWG Documentation',
-     ['Mikhail Korobov'], 1)
-]
+man_pages = [("index", "dawg", "DAWG Documentation", ["Mikhail Korobov"], 1)]
 
 # If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
 
 
 # -- Options for Texinfo output ------------------------------------------------
@@ -228,16 +222,22 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  ('index', 'DAWG', 'DAWG Documentation',
-   'Mikhail Korobov', 'DAWG', 'One line description of project.',
-   'Miscellaneous'),
+    (
+        "index",
+        "DAWG",
+        "DAWG Documentation",
+        "Mikhail Korobov",
+        "DAWG",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 # Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
 
 # If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
diff --git a/pyproject.toml b/pyproject.toml
index cbc600f..76ed89e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,3 +20,6 @@ exclude_lines = [
     "@(abc\\.)?abstractmethod",
 ]
 include = ["src/*"]
+
+[tool.black]
+line-length = 120
diff --git a/setup.py b/setup.py
index 23acf60..dc8de71 100755
--- a/setup.py
+++ b/setup.py
@@ -17,8 +17,8 @@
 extensions = [
     Extension(
         "dawg",
-        sources=glob.glob('src/*.pyx') + glob.glob('lib/b64/*.c'),
-        include_dirs=['lib'],
+        sources=glob.glob("src/*.pyx") + glob.glob("lib/b64/*.c"),
+        include_dirs=["lib"],
         language="c++",
         define_macros=define_macros,
     )
@@ -35,28 +35,26 @@
     name="DAWG2",
     version="0.9.1",
     description="Fast and memory efficient DAWG (DAFSA) for Python",
-    long_description=open('README.rst').read() + '\n\n' + open('CHANGES.rst').read(),
-    author='Mikhail Korobov',
-    author_email='kmike84@gmail.com',
-    url='https://github.com/pymorphy2-fork/DAWG/',
-
+    long_description=open("README.rst").read() + "\n\n" + open("CHANGES.rst").read(),
+    author="Mikhail Korobov",
+    author_email="kmike84@gmail.com",
+    url="https://github.com/pymorphy2-fork/DAWG/",
     ext_modules=ext_modules,
-
     classifiers=[
-        'Development Status :: 4 - Beta',
-        'Intended Audience :: Developers',
-        'Intended Audience :: Science/Research',
-        'License :: OSI Approved :: MIT License',
-        'Programming Language :: Cython',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
-        'Programming Language :: Python :: 3.10',
-        'Programming Language :: Python :: 3.11',
-        'Programming Language :: Python :: Implementation :: CPython',
-        'Topic :: Software Development :: Libraries :: Python Modules',
-        'Topic :: Scientific/Engineering :: Information Analysis',
-        'Topic :: Text Processing :: Linguistic',
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Cython",
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: Implementation :: CPython",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+        "Topic :: Scientific/Engineering :: Information Analysis",
+        "Topic :: Text Processing :: Linguistic",
     ],
 )
diff --git a/tests/__init__.py b/tests/__init__.py
index 7a77a86..139759b 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,2 +1,2 @@
 # -*- coding: utf-8 -*-
-from __future__ import absolute_import
\ No newline at end of file
+from __future__ import absolute_import
diff --git a/tests/test_dawg.py b/tests/test_dawg.py
index 26f6627..3249925 100644
--- a/tests/test_dawg.py
+++ b/tests/test_dawg.py
@@ -7,25 +7,25 @@
 import pytest
 import dawg
 
+
 def test_contains():
-    d = dawg.IntDAWG({'foo': 1, 'bar': 2, 'foobar': 3})
+    d = dawg.IntDAWG({"foo": 1, "bar": 2, "foobar": 3})
 
-    assert 'foo' in d
-    assert 'bar' in d
-    assert 'foobar' in d
-    assert 'fo' not in d
-    assert 'x' not in d
+    assert "foo" in d
+    assert "bar" in d
+    assert "foobar" in d
+    assert "fo" not in d
+    assert "x" not in d
 
-    assert b'foo' in d
-    assert b'x' not in d
+    assert b"foo" in d
+    assert b"x" not in d
 
 
 class TestDAWG(object):
-
     def test_sorted_iterable(self):
 
-        sorted_data = ['bar', 'foo', 'foobar']
-        contents = "\n".join(sorted_data).encode('utf8')
+        sorted_data = ["bar", "foo", "foobar"]
+        contents = "\n".join(sorted_data).encode("utf8")
         with tempfile.NamedTemporaryFile() as f:
             f.write(contents)
             f.seek(0)
@@ -33,23 +33,23 @@ def test_sorted_iterable(self):
             words = (line.strip() for line in f)
             d = dawg.DAWG(words, input_is_sorted=True)
 
-        assert 'bar' in d
-        assert 'foo' in d
+        assert "bar" in d
+        assert "foo" in d
 
     def test_no_segfaults_on_invalid_file(self):
         d = dawg.DAWG()
         fd, path = tempfile.mkstemp()
-        with open(path, 'w') as f:
-            f.write('foo')
+        with open(path, "w") as f:
+            f.write("foo")
 
         with pytest.raises(IOError) as e:
             d.load(path)
-            assert 'Invalid' in e.args[0]
+            assert "Invalid" in e.args[0]
 
-        with open(path, 'rb') as f:
+        with open(path, "rb") as f:
             with pytest.raises(IOError) as e:
                 d.read(f)
-                assert 'Invalid' in e.args[0]
+                assert "Invalid" in e.args[0]
 
     def test_no_segfaults_after_wrong_stream(self):
         d = dawg.DAWG()
@@ -58,21 +58,21 @@ def test_no_segfaults_after_wrong_stream(self):
         with pytest.raises(IOError):
             d.load(wrong_path)
 
-        assert 'random-key' not in d # there is possible segfault
+        assert "random-key" not in d  # there is possible segfault
 
     def test_build_errors(self):
         with pytest.raises(dawg.Error):
-            data = [b'foo\x00bar', b'bar']
+            data = [b"foo\x00bar", b"bar"]
             dawg.DAWG(data)
 
     def test_contains_with_null_bytes(self):
-        d = dawg.DAWG(['foo'])
-        assert b'foo' in d
-        assert b'foo\x00bar' not in d
+        d = dawg.DAWG(["foo"])
+        assert b"foo" in d
+        assert b"foo\x00bar" not in d
 
     def test_unicode_sorting(self):
-        key1 = '\U00010345\U0001033f\U00010337\U00010330\U0001033d'
-        key2 = '\uff72\uff9c\uff90\uff7b\uff9e\uff9c'
+        key1 = "\U00010345\U0001033f\U00010337\U00010330\U0001033d"
+        key2 = "\uff72\uff9c\uff90\uff7b\uff9e\uff9c"
 
         # This apparently depends on Python version:
         # assert key1 < key2
@@ -84,13 +84,12 @@ def test_unicode_sorting(self):
         dawg.DAWG([key1, key2])
 
 
-
 class TestIntDAWG(object):
 
     IntDAWG = dawg.IntDAWG
 
     def dawg(self):
-        payload = {'foo': 1, 'bar': 5, 'foobar': 3}
+        payload = {"foo": 1, "bar": 5, "foobar": 3}
         d = self.IntDAWG(payload)
         return payload, d
 
@@ -100,8 +99,7 @@ def test_getitem(self):
             assert d[key] == payload[key]
 
         with pytest.raises(KeyError):
-            d['fo']
-
+            d["fo"]
 
     def test_dumps_loads(self):
         payload, d = self.dawg()
@@ -138,15 +136,15 @@ def test_pickling(self):
             assert d[key] == value
 
     def test_int_value_ranges(self):
-        for val in [0, 5, 2**16-1, 2**31-1]:
-            d = self.IntDAWG({'f': val})
-            assert d['f'] == val
+        for val in [0, 5, 2**16 - 1, 2**31 - 1]:
+            d = self.IntDAWG({"f": val})
+            assert d["f"] == val
 
         with pytest.raises(ValueError):
-            self.IntDAWG({'f': -1})
+            self.IntDAWG({"f": -1})
 
         with pytest.raises(OverflowError):
-            self.IntDAWG({'f': 2**32-1})
+            self.IntDAWG({"f": 2**32 - 1})
 
 
 class TestIntCompletionDAWG(TestIntDAWG):
@@ -154,7 +152,7 @@ class TestIntCompletionDAWG(TestIntDAWG):
 
 
 class TestCompletionDAWG(object):
-    keys = ['f', 'bar', 'foo', 'foobar']
+    keys = ["f", "bar", "foo", "foobar"]
 
     def dawg(self):
         return dawg.CompletionDAWG(self.keys)
@@ -197,18 +195,18 @@ def test_iterprefixes(self):
     def test_completion(self):
         d = self.dawg()
 
-        assert d.keys('z') == []
-        assert d.keys('b') == ['bar']
-        assert d.keys('foo') == ['foo', 'foobar']
+        assert d.keys("z") == []
+        assert d.keys("b") == ["bar"]
+        assert d.keys("foo") == ["foo", "foobar"]
 
     def test_has_keys_with_prefix(self):
-        assert self.empty_dawg().has_keys_with_prefix('') == False
+        assert self.empty_dawg().has_keys_with_prefix("") == False
 
         d = self.dawg()
-        assert d.has_keys_with_prefix('') == True
-        assert d.has_keys_with_prefix('b') == True
-        assert d.has_keys_with_prefix('fo') == True
-        assert d.has_keys_with_prefix('bo') == False
+        assert d.has_keys_with_prefix("") == True
+        assert d.has_keys_with_prefix("b") == True
+        assert d.has_keys_with_prefix("fo") == True
+        assert d.has_keys_with_prefix("bo") == False
 
     def test_completion_dawg_saveload(self):
         buf = BytesIO()
@@ -221,15 +219,15 @@ def test_completion_dawg_saveload(self):
         for key in self.keys:
             assert key in d
 
-        assert d.keys('foo') == ['foo', 'foobar']
-        assert d.keys('b') == ['bar']
-        assert d.keys('z') == []
+        assert d.keys("foo") == ["foo", "foobar"]
+        assert d.keys("b") == ["bar"]
+        assert d.keys("z") == []
 
     def test_no_segfaults_on_invalid_file(self):
         d = self.dawg()
         fd, path = tempfile.mkstemp()
-        with open(path, 'w') as f:
-            f.write('foo')
+        with open(path, "w") as f:
+            f.write("foo")
 
         with pytest.raises(IOError) as e:
             d.load(path)
@@ -241,7 +239,7 @@ def test_no_segfaults_on_empty_dawg(self):
 
 
 class TestIntCompletionDAWGComplete(TestCompletionDAWG):
-    keys = ['f', 'bar', 'foo', 'foobar']
+    keys = ["f", "bar", "foo", "foobar"]
 
     def dawg(self):
         return dawg.IntCompletionDAWG((k, len(k)) for k in self.keys)
@@ -267,4 +265,4 @@ def test_iteritems(self):
 
     def test_items_prefix(self):
         d = self.dawg()
-        assert d.items('fo') == [('foo', 3), ('foobar', 6)]
+        assert d.items("fo") == [("foo", 3), ("foobar", 6)]
diff --git a/tests/test_payload_dawg.py b/tests/test_payload_dawg.py
index 305ac3f..d243a22 100644
--- a/tests/test_payload_dawg.py
+++ b/tests/test_payload_dawg.py
@@ -4,13 +4,14 @@
 import pytest
 import dawg
 
+
 class TestBytesDAWG(object):
 
     DATA = (
-        ('foo', b'data3'),
-        ('bar', b'data2'),
-        ('foo', b'data1'),
-        ('foobar', b'data4')
+        ("foo", b"data3"),
+        ("bar", b"data2"),
+        ("foo", b"data1"),
+        ("foobar", b"data4"),
     )
 
     DATA_KEYS = list(zip(*DATA))[0]
@@ -23,29 +24,28 @@ def test_contains(self):
         for key, val in self.DATA:
             assert key in d
 
-        assert 'food' not in d
-        assert 'x' not in d
-        assert 'fo' not in d
-
+        assert "food" not in d
+        assert "x" not in d
+        assert "fo" not in d
 
     def test_getitem(self):
         d = self.dawg()
 
-        assert d['foo'] == [b'data1', b'data3']
-        assert d['bar'] == [b'data2']
-        assert d['foobar'] == [b'data4']
+        assert d["foo"] == [b"data1", b"data3"]
+        assert d["bar"] == [b"data2"]
+        assert d["foobar"] == [b"data4"]
 
         with pytest.raises(KeyError):
-            d['f']
+            d["f"]
 
         with pytest.raises(KeyError):
-            d['food']
+            d["food"]
 
         with pytest.raises(KeyError):
-            d['foobarz']
+            d["foobarz"]
 
         with pytest.raises(KeyError):
-            d['x']
+            d["x"]
 
     def test_prefixes(self):
         d = self.dawg()
@@ -58,13 +58,13 @@ def test_keys(self):
         assert d.keys() == sorted(self.DATA_KEYS)
 
     def test_keys_ordering(self):
-        data = [('foo', b'v1'), ('foobar', b'v2'), ('bar', b'v3')]
+        data = [("foo", b"v1"), ("foobar", b"v2"), ("bar", b"v3")]
 
-        d = dawg.BytesDAWG(data, payload_separator=b'\xff')
-        assert d.keys() == ['bar', 'foobar', 'foo']
+        d = dawg.BytesDAWG(data, payload_separator=b"\xff")
+        assert d.keys() == ["bar", "foobar", "foo"]
 
-        d2 = dawg.BytesDAWG(data, payload_separator=b'\x01')
-        assert d2.keys() == ['bar', 'foo', 'foobar']
+        d2 = dawg.BytesDAWG(data, payload_separator=b"\x01")
+        assert d2.keys() == ["bar", "foo", "foobar"]
 
     def test_iterkeys(self):
         d = self.dawg()
@@ -81,17 +81,16 @@ def test_iteritems(self):
 
     def test_build_error(self):
         with pytest.raises(dawg.Error):
-            self.dawg(payload_separator=b'f')
-
+            self.dawg(payload_separator=b"f")
 
 
 class TestRecordDAWG(object):
 
     STRUCTURED_DATA = (
-        ('foo',     (3, 2, 256)),
-        ('bar',     (3, 1, 0)),
-        ('foo',     (3, 2, 1)),
-        ('foobar',  (6, 3, 0))
+        ("foo", (3, 2, 256)),
+        ("bar", (3, 1, 0)),
+        ("foo", (3, 2, 1)),
+        ("foobar", (6, 3, 0)),
     )
 
     def dawg(self):
@@ -99,9 +98,9 @@ def dawg(self):
 
     def test_record_getitem(self):
         d = self.dawg()
-        assert d['foo'] == [(3, 2, 1), (3, 2, 256)]
-        assert d['bar'] == [(3, 1, 0)]
-        assert d['foobar'] == [(6, 3, 0)]
+        assert d["foo"] == [(3, 2, 1), (3, 2, 256)]
+        assert d["bar"] == [(3, 1, 0)]
+        assert d["foobar"] == [(6, 3, 0)]
 
     def test_record_items(self):
         d = self.dawg()
@@ -109,7 +108,12 @@ def test_record_items(self):
 
     def test_record_keys(self):
         d = self.dawg()
-        assert d.keys() == ['bar', 'foo', 'foo', 'foobar',]
+        assert d.keys() == [
+            "bar",
+            "foo",
+            "foo",
+            "foobar",
+        ]
 
     def test_record_iterkeys(self):
         d = self.dawg()
@@ -121,9 +125,9 @@ def test_record_iteritems(self):
 
     def test_record_keys_prefix(self):
         d = self.dawg()
-        assert d.keys('fo') == ['foo', 'foo', 'foobar']
-        assert d.keys('bar') == ['bar']
-        assert d.keys('barz') == []
+        assert d.keys("fo") == ["foo", "foo", "foobar"]
+        assert d.keys("bar") == ["bar"]
+        assert d.keys("barz") == []
 
     def test_prefixes(self):
         d = self.dawg()
diff --git a/tests/test_prediction.py b/tests/test_prediction.py
index cea8551..b2c69c5 100644
--- a/tests/test_prediction.py
+++ b/tests/test_prediction.py
@@ -3,45 +3,40 @@
 import pytest
 import dawg
 
+
 class TestPrediction(object):
-    DATA = ['ЁЖИК', 'ЁЖИКЕ', 'ЁЖ', 'ДЕРЕВНЯ', 'ДЕРЁВНЯ', 'ЕМ', 'ОЗЕРА', 'ОЗЁРА', 'ОЗЕРО']
+    DATA = [
+        "ЁЖИК",
+        "ЁЖИКЕ",
+        "ЁЖ",
+        "ДЕРЕВНЯ",
+        "ДЕРЁВНЯ",
+        "ЕМ",
+        "ОЗЕРА",
+        "ОЗЁРА",
+        "ОЗЕРО",
+    ]
     LENGTH_DATA = list(zip(DATA, ((len(w),) for w in DATA)))
 
-    REPLACES = dawg.DAWG.compile_replaces({'Е': 'Ё'})
+    REPLACES = dawg.DAWG.compile_replaces({"Е": "Ё"})
 
     SUITE = [
-        ('УЖ', []),
-        ('ЕМ', ['ЕМ']),
-        ('ЁМ', []),
-        ('ЁЖ', ['ЁЖ']),
-        ('ЕЖ', ['ЁЖ']),
-        ('ЁЖИК', ['ЁЖИК']),
-        ('ЕЖИКЕ', ['ЁЖИКЕ']),
-        ('ДЕРЕВНЯ', ['ДЕРЕВНЯ', 'ДЕРЁВНЯ']),
-        ('ДЕРЁВНЯ', ['ДЕРЁВНЯ']),
-        ('ОЗЕРА', ['ОЗЕРА', 'ОЗЁРА']),
-        ('ОЗЕРО', ['ОЗЕРО']),
+        ("УЖ", []),
+        ("ЕМ", ["ЕМ"]),
+        ("ЁМ", []),
+        ("ЁЖ", ["ЁЖ"]),
+        ("ЕЖ", ["ЁЖ"]),
+        ("ЁЖИК", ["ЁЖИК"]),
+        ("ЕЖИКЕ", ["ЁЖИКЕ"]),
+        ("ДЕРЕВНЯ", ["ДЕРЕВНЯ", "ДЕРЁВНЯ"]),
+        ("ДЕРЁВНЯ", ["ДЕРЁВНЯ"]),
+        ("ОЗЕРА", ["ОЗЕРА", "ОЗЁРА"]),
+        ("ОЗЕРО", ["ОЗЕРО"]),
     ]
 
-    SUITE_ITEMS = [
-        (
-            it[0], # key
-            [
-                (w, [(len(w),)]) # item, value pair
-                for w in it[1]
-            ]
-        )
-        for it in SUITE
-    ]
-
-    SUITE_VALUES = [
-        (
-            it[0], # key
-            [[(len(w),)] for w in it[1]]
-        )
-        for it in SUITE
-    ]
+    SUITE_ITEMS = [(it[0], [(w, [(len(w),)]) for w in it[1]]) for it in SUITE]  # key  # item, value pair
 
+    SUITE_VALUES = [(it[0], [[(len(w),)] for w in it[1]]) for it in SUITE]  # key
 
     @pytest.mark.parametrize(("word", "prediction"), SUITE)
     def test_dawg_prediction(self, word, prediction):
@@ -63,52 +58,37 @@ def test_record_dawg_items_values(self, word, prediction):
         d = dawg.RecordDAWG(str("=H"), self.LENGTH_DATA)
         assert d.similar_item_values(word, self.REPLACES) == prediction
 
+
 class TestMultiValuedPrediction(object):
     DATA = "хлѣб ёлка ель лѣс лѣсное всё всѣ бѣлёная изобрѣтён лев лёв лѣв вѣнскій".split(" ")
     LENGTH_DATA = list(zip(DATA, ((len(w),) for w in DATA)))
 
-    REPLACES = dawg.DAWG.compile_replaces({'е': ['ё', 'ѣ'], 'и': 'і'})
+    REPLACES = dawg.DAWG.compile_replaces({"е": ["ё", "ѣ"], "и": "і"})
 
     SUITE = [
-        ('осел', []),
-        ('ель', ['ель']),
-        ('ёль', []),
-        ('хлеб', ['хлѣб']),
-        ('елка', ['ёлка']),
-        ('лесное', ['лѣсное']),
-        ('лесноё', []),
-        ('лёсное', []),
-        ('изобретен', ['изобрѣтён']),
-        ('беленая', ['бѣлёная']),
-        ('белёная', ['бѣлёная']),
-        ('бѣленая', ['бѣлёная']),
-        ('бѣлёная', ['бѣлёная']),
-        ('белѣная', []),
-        ('бѣлѣная', []),
-        ('все', ['всё', 'всѣ']),
-        ('лев', ['лев', 'лёв', 'лѣв']),
-        ('венский', ['вѣнскій']),
-    ]
-
-    SUITE_ITEMS = [
-        (
-            it[0], # key
-            [
-                (w, [(len(w),)]) # item, value pair
-                for w in it[1]
-            ]
-        )
-        for it in SUITE
+        ("осел", []),
+        ("ель", ["ель"]),
+        ("ёль", []),
+        ("хлеб", ["хлѣб"]),
+        ("елка", ["ёлка"]),
+        ("лесное", ["лѣсное"]),
+        ("лесноё", []),
+        ("лёсное", []),
+        ("изобретен", ["изобрѣтён"]),
+        ("беленая", ["бѣлёная"]),
+        ("белёная", ["бѣлёная"]),
+        ("бѣленая", ["бѣлёная"]),
+        ("бѣлёная", ["бѣлёная"]),
+        ("белѣная", []),
+        ("бѣлѣная", []),
+        ("все", ["всё", "всѣ"]),
+        ("лев", ["лев", "лёв", "лѣв"]),
+        ("венский", ["вѣнскій"]),
     ]
 
-    SUITE_VALUES = [
-        (
-            it[0], # key
-            [[(len(w),)] for w in it[1]]
-        )
-        for it in SUITE
-    ]
+    SUITE_ITEMS = [(it[0], [(w, [(len(w),)]) for w in it[1]]) for it in SUITE]  # key  # item, value pair
 
+    SUITE_VALUES = [(it[0], [[(len(w),)] for w in it[1]]) for it in SUITE]  # key
 
     @pytest.mark.parametrize(("word", "prediction"), SUITE)
     def test_dawg_prediction(self, word, prediction):