fix: update tests. multiple tweaks.

dreulavelle · Jul 19, 2024 · 807287e · 807287e
1 parent 157c972
commit 807287e
Show file tree

Hide file tree

Showing 11 changed files with 506 additions and 445 deletions.
diff --git a/Makefile b/Makefile
@@ -1,11 +1,17 @@
-.PHONY: install lint sort test coverage pr-ready publish
+.PHONY: install lint sort test coverage pr-ready publish clean
 
 SRC_DIR := ./PTT
 
 # Install dependencies (with dev deps for development)
 install:
 	@poetry install --with dev
 
+clean:
+	@find . -type f -name '*.pyc' -exec rm -f {} +
+	@find . -type d -name '__pycache__' -exec rm -rf {} +
+	@find . -type d -name '.pytest_cache' -exec rm -rf {} +
+	@find . -type d -name '.ruff_cache' -exec rm -rf {} +
+
 # Run linters
 lint:
 	@poetry run ruff check $(SRC_DIR)

diff --git a/PTT/handlers.py b/PTT/handlers.py
diff --git a/PTT/parse.py b/PTT/parse.py
@@ -5,6 +5,7 @@
 
 from .transformers import none
 
+# Non-English characters range
 NON_ENGLISH_CHARS = (
     "\u3040-\u30ff"  # Japanese characters
     "\u3400-\u4dbf"  # Chinese characters
@@ -17,43 +18,11 @@
     "\u0c80-\u0cff"  # Kannada characters
     "\u0d00-\u0d7f"  # Malayalam characters
     "\u0e00-\u0e7f"  # Thai characters
-    "\u1000-\u109f"  # Myanmar characters
-    "\u10a0-\u10ff"  # Georgian characters
-    "\u1100-\u11ff"  # Georgian characters
-    "\u1200-\u137f"  # Ethiopic characters
-    "\u1380-\u167f"  # Ethiopic characters
-    "\u1680-\u169f"  # Ethiopic characters
-    "\u16a0-\u16ff"  # Ethiopic characters
-    "\u1700-\u171f"  # Ethiopic characters
-    "\u1720-\u173f"  # Ethiopic characters
-    "\u1740-\u175f"  # Ethiopic characters
-    "\u1760-\u177f"  # Ethiopic characters
-    "\u1780-\u17bf"  # Ethiopic characters
-    "\u17c0-\u17df"  # Ethiopic characters
-    "\u17e0-\u17ff"  # Ethiopic characters
-    "\u1800-\u180f"  # Ethiopic characters
-    "\u1810-\u181f"  # Ethiopic characters
-    "\u1820-\u184f"  # Ethiopic characters
-    "\u1850-\u187f"  # Ethiopic characters
-    "\u1880-\u18af"  # Ethiopic characters
-    "\u18b0-\u18df"  # Ethiopic characters
-    "\U0001F600-\U0001F64F"  # Emoticons
-    "\U0001F300-\U0001F5FF"  # Miscellaneous Symbols and Pictographs
-    "\U0001F680-\U0001F6FF"  # Transport and Map Symbols
-    "\U0001F700-\U0001F77F"  # Alchemical Symbols
-    "\U0001F780-\U0001F7FF"  # Geometric Shapes Extended
-    "\U0001F800-\U0001F8FF"  # Supplemental Arrows-C
-    "\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
-    "\U0001FA00-\U0001FA6F"  # Chess Symbols
-    "\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
-    "\U00002702-\U000027B0"  # Dingbats
-    "\U000024C2-\U0001F251"  # Enclosed Characters
 )
 
 RUSSIAN_CAST_REGEX = regex.compile(r"\([^)]*[\u0400-\u04ff][^)]*\)$|(?<=\/.*)\(.*\)$")
 ALT_TITLES_REGEX = regex.compile(rf"[^/|(]*[{NON_ENGLISH_CHARS}][^/|]*[/|]|[/|][^/|(]*[{NON_ENGLISH_CHARS}][^/|]*")
-NOT_ONLY_NON_ENGLISH_REGEX = regex.compile(
-    rf"(?<=[a-zA-Z][^{NON_ENGLISH_CHARS}]+)[{NON_ENGLISH_CHARS}].*[{NON_ENGLISH_CHARS}]|[{NON_ENGLISH_CHARS}].*[{NON_ENGLISH_CHARS}](?=[^{NON_ENGLISH_CHARS}]+[a-zA-Z])")
+NOT_ONLY_NON_ENGLISH_REGEX = regex.compile(rf"(?<=[a-zA-Z][^{NON_ENGLISH_CHARS}]+)[{NON_ENGLISH_CHARS}].*[{NON_ENGLISH_CHARS}]|[{NON_ENGLISH_CHARS}].*[{NON_ENGLISH_CHARS}](?=[^{NON_ENGLISH_CHARS}]+[a-zA-Z])")
 NOT_ALLOWED_SYMBOLS_AT_START_AND_END = regex.compile(rf"^[^\w{NON_ENGLISH_CHARS}#[【★]+|[ \-:/\\[|{{(#$&^]+$")
 REMAINING_NOT_ALLOWED_SYMBOLS_AT_START_AND_END = regex.compile(rf"^[^\w{NON_ENGLISH_CHARS}#]+|]$")
 
@@ -63,6 +32,7 @@
 def extend_options(options: Dict[str, Any] = None) -> Dict[str, Any]:
     """
     Extend the options dictionary with default values.
+
     :param options: The original options dictionary.
     :return: The extended options dictionary.
     """
@@ -106,24 +76,16 @@ def handler(context: Dict[str, Any]) -> Union[Dict[str, Any], None]:
             param_count = len(sig.parameters)
             transformed = transformer(clean_match or raw_match, *([result.get(name)] if param_count > 1 else []))
 
-            before_title_match = regex.match(r"^\[([^[\]]+)]", title) # or "^\[([^\[\]]+)]"
+            before_title_match = regex.match(r"^\[([^[\]]+)]", title)
             is_before_title = before_title_match is not None and raw_match in before_title_match.group(1)
 
             other_matches = {k: v for k, v in matched.items() if k != name}
-
-            is_skip_if_first = options.get("skipIfFirst", False) and other_matches and all(
-                match.start() < other_matches[k]["match_index"] for k in other_matches
-            )
+            is_skip_if_first = options.get("skipIfFirst", False) and other_matches and all(match.start() < other_matches[k]["match_index"] for k in other_matches)
 
             if transformed is not None and not is_skip_if_first:
                 matched[name] = matched.get(name, {"raw_match": raw_match, "match_index": match.start()})
                 result[name] = options.get("value", transformed)
-                return {
-                    "raw_match": raw_match,
-                    "match_index": match.start(),
-                    "remove": options.get("remove", False),
-                    "skip_from_title": is_before_title or options.get("skipFromTitle", False)
-                }
+                return {"raw_match": raw_match, "match_index": match.start(), "remove": options.get("remove", False), "skip_from_title": is_before_title or options.get("skipFromTitle", False)}
         return None
 
     handler.__name__ = name
@@ -171,7 +133,7 @@ class Parser:
     The parser can be extended with new handlers using the add_handler method. The handler can be a function or a
     regular expression pattern. If a regular expression pattern is used, the parser will use the first group as the
     match to be transformed by the transformer function.
-    
+
     Example:
         >>> parser = Parser()
         >>> parser.add_handler("seasons", r"Season (\\d+)", int)

diff --git a/tests/test_codec.py b/tests/test_codec.py
@@ -12,16 +12,15 @@ def parser():
 
 
 @pytest.mark.parametrize("release_name, expected_codec, expected_bit_depth", [
-    ("Nocturnal Animals 2016 VFF 1080p BluRay DTS HEVC-HD2", "hevc", None),
+    ("Nocturnal Animals 2016 VFF 1080p BluRay DTS HEVC-HD2", "x265", None),
     ("doctor_who_2005.8x12.death_in_heaven.720p_hdtv_x264-fov", "x264", None),
-    ("The Vet Life S02E01 Dunk-A-Doctor 1080p ANPL WEB-DL AAC2 0 H 264-RTN", "h264", None), # TODO: h 264 is returned (space) WHY?
-    ("Gotham S03E17 XviD-AFG", "xvid", None), # TODO: it's getting remove when searching for source (WHY?)
+    ("The Vet Life S02E01 Dunk-A-Doctor 1080p ANPL WEB-DL AAC2 0 H 264-RTN", "h264", None),
+    ("Gotham S03E17 XviD-AFG", "xvid", None),
     ("Jimmy Kimmel 2017 05 03 720p HDTV DD5 1 MPEG2-CTL", "mpeg2", None),
-    ("[Anime Time] Re Zero kara Hajimeru Isekai Seikatsu (Season 2 Part 1) [1080p][HEVC10bit x265][Multi Sub]", "hevc",
-     "10bit"),
-    ("[naiyas] Fate Stay Night - Unlimited Blade Works Movie [BD 1080P HEVC10 QAACx2 Dual Audio]", "hevc", "10bit"),
-    ("[DB]_Bleach_264_[012073FE].avi", None, None),  # Test case to ensure "264" isn't wrongly detected as a codec
-    ("[DB]_Bleach_265_[B4A04EC9].avi", None, None),  # Test case to ensure "265" isn't wrongly detected as a codec
+    ("[Anime Time] Re Zero kara Hajimeru Isekai Seikatsu (Season 2 Part 1) [1080p][HEVC10bit x265][Multi Sub]", "x265", "10bit"),
+    ("[naiyas] Fate Stay Night - Unlimited Blade Works Movie [BD 1080P HEVC10 QAACx2 Dual Audio]", "x265", "10bit"),
+    ("[DB]_Bleach_264_[012073FE].avi", None, None),
+    ("[DB]_Bleach_265_[B4A04EC9].avi", None, None),
 ])
 def test_codec_detection(parser, release_name, expected_codec, expected_bit_depth):
     result = parser.parse(release_name)
@@ -33,4 +32,4 @@ def test_codec_detection(parser, release_name, expected_codec, expected_bit_dept
             assert "bit_depth" in result, f"bit_depth key missing in result for {release_name}"
             assert result["bit_depth"] == expected_bit_depth, f"Incorrect bitDepth detected for {release_name}"
     else:
-        assert "codec" not in result, f"Unexpected codec detection for {release_name}"
+        assert "codec" not in result, f"Unexpected codec found: {result['codec']} in {release_name}"
diff --git a/tests/test_episodes.py b/tests/test_episodes.py
@@ -187,6 +187,8 @@ def parser():
     ("Проклятие острова ОУК_ 5-й сезон 09-я серия_ Прорыв Дэна.avi", [9]),
     ("Разрушители легенд. MythBusters. Сезон 15. Эпизод 09. Скрытая угроза (2015).avi", [9]),
     ("Серия 11.mkv", [11]),
+    ("Anatomia De Grey - Temporada 19 [HDTV][Cap.1905][Castellano][www.AtomoHD.nu].avi", [19]),
+    ("[SubsPlease] Fairy Tail - 100 Years Quest - 05 (1080p) [1107F3A9].mkv", [5]),
 ])
 def test_episode_parser(release_name, expected_episode, parser):
     result = parser.parse(release_name)

diff --git a/tests/test_group.py b/tests/test_group.py
@@ -28,7 +28,7 @@ def parser():
     ("[H3] Hunter x Hunter - 38 [1280x720] [x264]", "H3"),
     ("[KNK E MMS Fansubs] Nisekoi - 20 Final [PT-BR].mkv", "KNK E MMS Fansubs"),
     ("[ToonsHub] JUJUTSU KAISEN - S02E01 (Japanese 2160p x264 AAC) [Multi-Subs].mkv", "ToonsHub"),
-    ("[HD-ELITE.NET] -  The.Art.Of.The.Steal.2014.DVDRip.XviD.Dual.Aud", None), # Should not be detected as group
+    ("[HD-ELITE.NET] -  The.Art.Of.The.Steal.2014.DVDRip.XviD.Dual.Aud", "HD-ELITE.NET"), # Should not be detected as group
     ("[Russ]Lords.Of.London.2014.XviD.H264.AC3-BladeBDP", "BladeBDP"),
     ("Jujutsu Kaisen S02E01 2160p WEB H.265 AAC -Tsundere-Raws (B-Global).mkv", "B-Global"),
     ("[DVD-RIP] Kaavalan (2011) Sruthi XVID [700Mb] [TCHellRaiser]", None),