From 588ecb1b7bdda91e9fedb5783d3f80c23eb0c453 Mon Sep 17 00:00:00 2001 From: davidemarcoli Date: Mon, 29 Apr 2024 12:05:03 +0200 Subject: [PATCH 01/10] Add handler to remove spaces in codec names --- PTT/handlers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/PTT/handlers.py b/PTT/handlers.py index 5aa7edc..1ea747a 100644 --- a/PTT/handlers.py +++ b/PTT/handlers.py @@ -118,6 +118,10 @@ def handle_bit_depth(context): parser.add_handler("codec", regex.compile(r"\b[xh][-. ]?26[45]", regex.IGNORECASE), lowercase, {"remove": True}) parser.add_handler("codec", regex.compile(r"\bhevc(?:\s?10)?\b", regex.IGNORECASE), value("hevc"), {"remove": True, "skipIfAlreadyFound": False}) parser.add_handler("codec", regex.compile(r"\b(?:dvix|mpeg2|divx|xvid|avc)\b", regex.IGNORECASE), lowercase, {"remove": True, "skipIfAlreadyFound": False}) + def handle_space_in_codec(context): + if context["result"].get("codec"): + context["result"]["codec"] = regex.sub("[ .-]", "", context["result"]["codec"]) + parser.add_handler("codec", handle_space_in_codec) # Audio parser.add_handler("audio", regex.compile(r"7\.1[. ]?Atmos\b", regex.IGNORECASE), value("7.1 Atmos"), {"remove": True}) From 02d00354db5b40adeeaf9a023c8bb16a9715e5b1 Mon Sep 17 00:00:00 2001 From: davidemarcoli Date: Mon, 29 Apr 2024 13:40:59 +0200 Subject: [PATCH 02/10] Fix end of title index --- PTT/handlers.py | 2 +- PTT/parse.py | 20 +++++++++++++++++++- tests/test_main.py | 12 ------------ tests/test_parser.py | 4 ++-- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/PTT/handlers.py b/PTT/handlers.py index 1ea747a..f9b1907 100644 --- a/PTT/handlers.py +++ b/PTT/handlers.py @@ -95,7 +95,7 @@ def add_defaults(parser: Parser): parser.add_handler("source", regex.compile(r"\bWEB[ .-]*DL(?:Rip)?\b", regex.IGNORECASE), value("WEB-DL"), {"remove": True}) parser.add_handler("source", regex.compile(r"\bWEB[ .-]*Rip\b", regex.IGNORECASE), value("WEBRip"), {"remove": True}) parser.add_handler("source", regex.compile(r"\b(?:DL|WEB|BD|BR)MUX\b", regex.IGNORECASE), none, {"remove": True}) - parser.add_handler("source", regex.compile(r"\b(DivX|XviD)\b"), none, {"remove": True}) + parser.add_handler("source", regex.compile(r"\b(DivX|XviD)\b"), none, {"remove": True}) # TODO: In the js implementation it's true. But then a test case fails in our implementation and i'm not sure why # Video depth parser.add_handler("bit_depth", regex.compile(r"\bhevc\s?10\b", regex.IGNORECASE), value("10bit")) diff --git a/PTT/parse.py b/PTT/parse.py index 52fa177..a521354 100644 --- a/PTT/parse.py +++ b/PTT/parse.py @@ -51,7 +51,7 @@ def handler(context): param_count = len(sig.parameters) transformed = transformer(clean_match or raw_match, *([result.get(name)] if param_count > 1 else [])) - before_title_match = regex.match(r'^\[([^\[\]]+)]', title) + before_title_match = regex.match(r'^\[([^[\]]+)]', title) # or '^\[([^\[\]]+)]' is_before_title = before_title_match is not None and raw_match in before_title_match.group(1) other_matches = {k: v for k, v in matched.items() if k != name} @@ -85,14 +85,24 @@ def clean_title(raw_title): cleaned_title = regex.sub(r"\.", " ", cleaned_title) cleaned_title = regex.sub(r"_", " ", cleaned_title) + print(cleaned_title) cleaned_title = regex.sub(r"[[(]movie[)\]]", "", cleaned_title, flags=regex.IGNORECASE) + print(cleaned_title) cleaned_title = NOT_ALLOWED_SYMBOLS_AT_START_AND_END.sub("", cleaned_title) + print(cleaned_title) cleaned_title = RUSSIAN_CAST_REGEX.sub("", cleaned_title) + print(cleaned_title) + # maybe [\[\[【★].*[\]】★][ .]?(.+) cleaned_title = regex.sub(r"^[[【★].*[\]】★][ .]?(.+)", r"\1", cleaned_title) + print(cleaned_title) cleaned_title = regex.sub(r"(.+)[ .]?[[【★].*[\]】★]$", r"\1", cleaned_title) + print(cleaned_title) cleaned_title = ALT_TITLES_REGEX.sub("", cleaned_title) + print(cleaned_title) cleaned_title = NOT_ONLY_NON_ENGLISH_REGEX.sub("", cleaned_title) + print(cleaned_title) cleaned_title = REMAINING_NOT_ALLOWED_SYMBOLS_AT_START_AND_END.sub("", cleaned_title) + print(cleaned_title) # Trim the resulting title cleaned_title = cleaned_title.strip() @@ -138,7 +148,12 @@ def parse(self, title): if handler.handler_name == DEBUG_HANDLER: print(f"Result: {match_result}") + print(handler.handler_name) + print("Title before: " + title) + if match_result is None: + print("Title after: " + title) + print(end_of_title) continue if match_result.get('remove', False): @@ -152,6 +167,9 @@ def parse(self, title): # adjust title index in case part of it should be removed and skipped end_of_title -= match_result.raw_match.length + print("Title after: " + title) + print(end_of_title) + # if match_result: # raw_match = match_result.group(0) # clean_match = match_result.group(1) if len(match_result.groups()) >= 1 else None diff --git a/tests/test_main.py b/tests/test_main.py index 0ef6347..e410bdd 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -16,9 +16,7 @@ def parser(): 'title': 'sons of anarchy', 'resolution': '480p', 'seasons': [5], - 'season': 5, 'episodes': [10], - 'episode': 10, 'source': 'BluRay', 'codec': 'x264', 'group': 'GAnGSteR' @@ -97,7 +95,6 @@ def parser(): ("Game of Thrones - The Complete Season 3 [HDTV]", { 'title': 'Game of Thrones', 'seasons': [3], - 'season': 3, 'source': 'HDTV' }), ("The Sopranos: The Complete Series (Season 1,2,3,4,5&6) + Extras", { @@ -122,7 +119,6 @@ def parser(): ("You.[Uncut].S01.SweSub.1080p.x264-Justiso", { 'title': 'You', 'seasons': [1], - 'season': 1, 'languages': ['swedish'], 'resolution': '1080p', 'codec': 'x264', @@ -136,7 +132,6 @@ def parser(): }), ("House MD Season 7 Complete MKV", { 'title': 'House MD', - 'season': 7, 'seasons': [7], 'container': 'mkv' }), @@ -149,7 +144,6 @@ def parser(): ("【4月/悠哈璃羽字幕社】[UHA-WINGS][不要输!恶之军团][Makeruna!! Aku no Gundan!][04][1080p AVC_AAC][简繁外挂][sc_tc]", { 'title': 'Makeruna!! Aku no Gundan!', 'episodes': [4], - 'episode': 4, 'resolution': '1080p', 'codec': 'avc', 'audio': 'aac', @@ -158,7 +152,6 @@ def parser(): 'title': 'The Westward Ⅱ', 'year': 2019, 'episodes': [17], - 'episode': 17, 'resolution': '1080p', 'codec': 'avc', 'group': 'GM-Team' @@ -167,7 +160,6 @@ def parser(): 'title': 'Black Mirror', 'year': 2017, 'seasons': [4], - 'season': 4, 'episodes': [1, 2, 3, 4, 5, 6], 'languages': ['english'], 'resolution': '1080p', @@ -176,7 +168,6 @@ def parser(): ("[neoHEVC] Student Council's Discretion / Seitokai no Ichizon [Season 1] [BD 1080p x265 HEVC AAC]", { 'title': "Student Council's Discretion / Seitokai no Ichizon", 'seasons': [1], - 'season': 1, 'resolution': '1080p', 'source': 'BDRip', 'audio': 'aac', @@ -186,7 +177,6 @@ def parser(): ("[Commie] Chihayafuru 3 - 21 [BD 720p AAC] [5F1911ED].mkv", { 'title': "Chihayafuru 3", 'episodes': [21], - 'episode': 21, 'resolution': '720p', 'source': 'BDRip', 'audio': 'aac', @@ -219,13 +209,11 @@ def parser(): ("[720p] The God of Highschool Season 1", { 'title': "The God of Highschool", 'seasons': [1], - 'season': 1, 'resolution': '720p' }), ("Heidi Audio Latino DVDRip [cap. 3 Al 18]", { 'title': "Heidi", 'episodes': [3], - 'episode': 3, 'source': 'DVDRip', 'languages': ['latino'] }) diff --git a/tests/test_parser.py b/tests/test_parser.py index 6365fec..726c76b 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -15,7 +15,7 @@ def test_parsed_output(parser): result = parser.parse(test_case) assert isinstance(result, dict) assert "title" in result - assert "episodeCode" in result + assert "episode_code" in result assert "resolution" in result assert "codec" in result assert "audio" in result @@ -53,7 +53,7 @@ def test_season_parser(parser): def test_episode_code(parser): test_case = "[Golumpa] Fairy Tail - 214 [FuniDub 720p x264 AAC] [5E46AC39]" result = parser.parse(test_case) - assert result["episodeCode"] == "5E46AC39" + assert result["episode_code"] == "5E46AC39" # def test_languages_parser(parser): # test_cases = [ From a59dfa8d2fb4c82f92cd77798ceda8240bd18781 Mon Sep 17 00:00:00 2001 From: davidemarcoli Date: Mon, 29 Apr 2024 13:45:40 +0200 Subject: [PATCH 03/10] Fix semantic mistake --- PTT/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PTT/parse.py b/PTT/parse.py index a521354..6cb6b10 100644 --- a/PTT/parse.py +++ b/PTT/parse.py @@ -159,7 +159,7 @@ def parse(self, title): if match_result.get('remove', False): title = title[:match_result['match_index']] + title[match_result['match_index'] + len( match_result['raw_match']):] - if match_result.get('skip_from_title') and match_result.get('match_index') and match_result[ + if not match_result.get('skip_from_title') and match_result.get('match_index') and match_result[ 'match_index'] < end_of_title: end_of_title = match_result['match_index'] if match_result.get('remove') and match_result.get('skip_from_title') and match_result[ From ba27ab234c6bce9ed6423b39913c1d2acb307a5c Mon Sep 17 00:00:00 2001 From: davidemarcoli Date: Mon, 29 Apr 2024 13:54:59 +0200 Subject: [PATCH 04/10] Fix AttributeError when trying to get match length Fix main tests --- PTT/parse.py | 2 +- tests/test_main.py | 105 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 82 insertions(+), 25 deletions(-) diff --git a/PTT/parse.py b/PTT/parse.py index 6cb6b10..45d0785 100644 --- a/PTT/parse.py +++ b/PTT/parse.py @@ -165,7 +165,7 @@ def parse(self, title): if match_result.get('remove') and match_result.get('skip_from_title') and match_result[ 'match_index'] < end_of_title: # adjust title index in case part of it should be removed and skipped - end_of_title -= match_result.raw_match.length + end_of_title -= len(match_result.get("raw_match", "")) print("Title after: " + title) print(end_of_title) diff --git a/tests/test_main.py b/tests/test_main.py index e410bdd..8e66982 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -19,23 +19,32 @@ def parser(): 'episodes': [10], 'source': 'BluRay', 'codec': 'x264', - 'group': 'GAnGSteR' + 'group': 'GAnGSteR', + 'languages': [] }), ("Color.Of.Night.Unrated.DC.VostFR.BRrip.x264", { 'title': 'Color Of Night', 'unrated': True, 'languages': ['french'], 'source': 'BRRip', - 'codec': 'x264' + 'codec': 'x264', + 'seasons': [], + 'episodes': [] }), ("Da Vinci Code DVDRip", { 'title': 'Da Vinci Code', - 'source': 'DVDRip' + 'source': 'DVDRip', + 'languages': [], + 'seasons': [], + 'episodes': [] }), ("Some.girls.1998.DVDRip", { 'title': 'Some girls', 'source': 'DVDRip', - 'year': 1998 + 'year': 1998, + 'languages': [], + 'seasons': [], + 'episodes': [] }), ("Ecrit.Dans.Le.Ciel.1954.MULTI.DVDRIP.x264.AC3-gismo65", { 'title': 'Ecrit Dans Le Ciel', @@ -45,7 +54,9 @@ def parser(): 'dubbed': True, 'codec': 'x264', 'audio': 'ac3', - 'group': 'gismo65' + 'group': 'gismo65', + 'seasons': [], + 'episodes': [] }), ("2019 After The Fall Of New York 1983 REMASTERED BDRip x264-GHOULS", { 'title': '2019 After The Fall Of New York', @@ -53,7 +64,10 @@ def parser(): 'remastered': True, 'year': 1983, 'codec': 'x264', - 'group': 'GHOULS' + 'group': 'GHOULS', + 'languages': [], + 'seasons': [], + 'episodes': [] }), ("Ghost In The Shell 2017 720p HC HDRip X264 AC3-EVO", { 'title': 'Ghost In The Shell', @@ -63,7 +77,10 @@ def parser(): 'resolution': '720p', 'codec': 'x264', 'audio': 'ac3', - 'group': 'EVO' + 'group': 'EVO', + 'languages': [], + 'seasons': [], + 'episodes': [] }), ("Rogue One 2016 1080p BluRay x264-SPARKS", { 'title': 'Rogue One', @@ -71,7 +88,10 @@ def parser(): 'year': 2016, 'resolution': '1080p', 'codec': 'x264', - 'group': 'SPARKS' + 'group': 'SPARKS', + 'languages': [], + 'seasons': [], + 'episodes': [] }), ("Desperation 2006 Multi Pal DvdR9-TBW1973", { 'title': 'Desperation', @@ -80,7 +100,9 @@ def parser(): 'languages': ['multi audio'], 'dubbed': True, 'region': 'R9', - 'group': 'TBW1973' + 'group': 'TBW1973', + 'seasons': [], + 'episodes': [] }), ("Maman, j'ai raté l'avion 1990 VFI 1080p BluRay DTS x265-HTG", { 'title': "Maman, j'ai raté l'avion", @@ -90,23 +112,31 @@ def parser(): 'resolution': '1080p', 'languages': ['french'], 'codec': 'x265', - 'group': 'HTG' + 'group': 'HTG', + 'seasons': [], + 'episodes': [] }), ("Game of Thrones - The Complete Season 3 [HDTV]", { 'title': 'Game of Thrones', 'seasons': [3], - 'source': 'HDTV' + 'source': 'HDTV', + 'languages': [], + 'episodes': [] }), ("The Sopranos: The Complete Series (Season 1,2,3,4,5&6) + Extras", { 'title': 'The Sopranos', 'seasons': [1, 2, 3, 4, 5, 6], - 'complete': True + 'complete': True, + 'languages': [], + 'episodes': [] }), ("Skins Season S01-S07 COMPLETE UK Soundtrack 720p WEB-DL", { 'seasons': [1, 2, 3, 4, 5, 6, 7], 'title': 'Skins', 'resolution': '720p', - 'source': 'WEB-DL' + 'source': 'WEB-DL', + 'languages': [], + 'episodes': [] }), ("Futurama.COMPLETE.S01-S07.720p.BluRay.x265-HETeam", { 'title': 'Futurama', @@ -114,7 +144,9 @@ def parser(): 'resolution': '720p', 'source': 'BluRay', 'codec': 'x265', - 'group': 'HETeam' + 'group': 'HETeam', + 'languages': [], + 'episodes': [] }), ("You.[Uncut].S01.SweSub.1080p.x264-Justiso", { 'title': 'You', @@ -123,23 +155,32 @@ def parser(): 'resolution': '1080p', 'codec': 'x264', 'group': 'Justiso', + 'episodes': [] }), ("Stephen Colbert 2019 10 25 Eddie Murphy 480p x264-mSD [eztv]", { 'title': 'Stephen Colbert', 'date': '2019-10-25', 'resolution': '480p', 'codec': 'x264', + 'languages': [], + 'seasons': [], + 'episodes': [] }), ("House MD Season 7 Complete MKV", { 'title': 'House MD', 'seasons': [7], - 'container': 'mkv' + 'container': 'mkv', + 'languages': [], + 'episodes': [] }), ("2008 The Incredible Hulk Feature Film.mp4", { 'title': 'The Incredible Hulk Feature Film', 'year': 2008, 'container': 'mp4', - 'extension': 'mp4' + 'extension': 'mp4', + 'languages': [], + 'seasons': [], + 'episodes': [] }), ("【4月/悠哈璃羽字幕社】[UHA-WINGS][不要输!恶之军团][Makeruna!! Aku no Gundan!][04][1080p AVC_AAC][简繁外挂][sc_tc]", { 'title': 'Makeruna!! Aku no Gundan!', @@ -147,6 +188,8 @@ def parser(): 'resolution': '1080p', 'codec': 'avc', 'audio': 'aac', + 'languages': [], + 'seasons': [], }), ("[GM-Team][国漫][西行纪之集结篇][The Westward Ⅱ][2019][17][AVC][GB][1080P]", { 'title': 'The Westward Ⅱ', @@ -154,7 +197,9 @@ def parser(): 'episodes': [17], 'resolution': '1080p', 'codec': 'avc', - 'group': 'GM-Team' + 'group': 'GM-Team', + 'languages': [], + 'seasons': [], }), ("Черное зеркало / Black Mirror / Сезон 4 / Серии 1-6 (6) [2017, США, WEBRip 1080p] MVO + Eng Sub", { 'title': 'Black Mirror', @@ -172,7 +217,9 @@ def parser(): 'source': 'BDRip', 'audio': 'aac', 'codec': 'hevc', - 'group': 'neoHEVC' + 'group': 'neoHEVC', + 'languages': [], + 'episodes': [] }), ("[Commie] Chihayafuru 3 - 21 [BD 720p AAC] [5F1911ED].mkv", { 'title': "Chihayafuru 3", @@ -183,12 +230,16 @@ def parser(): 'container': 'mkv', 'extension': 'mkv', 'episodeCode': "5F1911ED", - 'group': "Commie" + 'group': "Commie", + 'languages': [], + 'seasons': [], }), ("[DVDRip-ITA]The Fast and the Furious: Tokyo Drift [CR-Bt]", { 'title': "The Fast and the Furious: Tokyo Drift", 'source': 'DVDRip', - 'languages': ['italian'] + 'languages': ['italian'], + 'seasons': [], + 'episodes': [] }), ("[BluRay Rip 720p ITA AC3 - ENG AC3 SUB] Hostel[2005]-LIFE[ultimafrontiera]", { 'title': "Hostel", @@ -197,25 +248,31 @@ def parser(): 'source': 'BRRip', 'audio': 'ac3', 'languages': ['english', 'italian'], - 'group': "LIFE" + 'group': "LIFE", + 'seasons': [], + 'episodes': [] }), ("[OFFICIAL ENG SUB] Soul Land Episode 121-125 [1080p][Soft Sub][Web-DL][Douluo Dalu][斗罗大陆]", { 'title': "Soul Land", 'episodes': [121, 122, 123, 124, 125], 'resolution': '1080p', 'source': 'WEB-DL', - 'languages': ['english'] + 'languages': ['english'], + 'seasons': [], }), ("[720p] The God of Highschool Season 1", { 'title': "The God of Highschool", 'seasons': [1], - 'resolution': '720p' + 'resolution': '720p', + 'languages': [], + 'episodes': [] }), ("Heidi Audio Latino DVDRip [cap. 3 Al 18]", { 'title': "Heidi", 'episodes': [3], 'source': 'DVDRip', - 'languages': ['latino'] + 'languages': ['latino'], + 'seasons': [], }) ]) def test_random_releases_parse(parser, release_name, expected): From 7b910ff481795a176f1b19af9b07af2a58e99f48 Mon Sep 17 00:00:00 2001 From: davidemarcoli Date: Mon, 29 Apr 2024 14:10:18 +0200 Subject: [PATCH 05/10] Refactor episodeCode to episode_code in test_main.py --- tests/test_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_main.py b/tests/test_main.py index 8e66982..da1001f 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -229,7 +229,7 @@ def parser(): 'audio': 'aac', 'container': 'mkv', 'extension': 'mkv', - 'episodeCode': "5F1911ED", + 'episode_code': "5F1911ED", 'group': "Commie", 'languages': [], 'seasons': [], From 7418be1e0950925537c8be97bac27167915c0ce1 Mon Sep 17 00:00:00 2001 From: davidemarcoli Date: Mon, 29 Apr 2024 14:10:56 +0200 Subject: [PATCH 06/10] Update date handlers in regex and add month shortening function in transformers --- PTT/handlers.py | 2 +- PTT/transformers.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/PTT/handlers.py b/PTT/handlers.py index f9b1907..30163b0 100644 --- a/PTT/handlers.py +++ b/PTT/handlers.py @@ -29,7 +29,7 @@ def add_defaults(parser: Parser): parser.add_handler("date", regex.compile(r"(?:\W|^)(\[?\]?(?:0[1-9]|[12][0-9]|3[01])([. \-/\\])(?:0[1-9]|1[012])\2(?:19[6-9]|20[01])[0-9][\])]?)(?:\W|$)"), date("DD MM YYYY"), {"remove": True}) parser.add_handler("date", regex.compile(r"(?:\W)(\[?\]?(?:0[1-9]|1[012])([. \-/\\])(?:0[1-9]|[12][0-9]|3[01])\2(?:[0][1-9]|[0126789][0-9])[\])]?)(?:\W|$)"), date("MM DD YY"), {"remove": True}) parser.add_handler("date", regex.compile(r"(?:\W)(\[?\]?(?:0[1-9]|[12][0-9]|3[01])([. \-/\\])(?:0[1-9]|1[012])\2(?:[0][1-9]|[0126789][0-9])[\])]?)(?:\W|$)"), date("DD MM YY"), {"remove": True}) - parser.add_handler("date", regex.compile(r"(?:\W|^)(\[?\]?(?:0?[1-9]|[12][0-9]|3[01])[. ]?(?:st|nd|rd|th)?([. \-\/\\])(?:feb(?:ruary)?|jan(?:uary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sept?(?:ember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\2(?:19[7-9]|20[012])[0-9][\])]?)(?:\W|$)", regex.IGNORECASE), date(["DD MMM YYYY", "Do MMM YYYY", "Do MMMM YYYY"]), {"remove": True}) + parser.add_handler("date", regex.compile(r"(?:\W|^)([([]?(?:0?[1-9]|[12][0-9]|3[01])[. ]?(?:st|nd|rd|th)?([. \-/\\])(?:feb(?:ruary)?|jan(?:uary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sept?(?:ember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\2(?:19[7-9]|20[012])[0-9][)\]]?)(?=\W|$)", regex.IGNORECASE), date(["DD MMM YYYY", "Do MMM YYYY", "Do MMMM YYYY"]), {"remove": True}) parser.add_handler("date", regex.compile(r"(?:\W|^)(\[?\]?(?:0?[1-9]|[12][0-9]|3[01])[. ]?(?:st|nd|rd|th)?([. \-\/\\])(?:feb(?:ruary)?|jan(?:uary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sept?(?:ember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\2(?:0[1-9]|[0126789][0-9])[\])]?)(?:\W|$)", regex.IGNORECASE), date("DD MMM YY"), {"remove": True}) parser.add_handler("date", regex.compile(r"(?:\W|^)(\[?\]?20[012][0-9](?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01])[\])]?)(?:\W|$)"), date("YYYYMMDD"), {"remove": True}) diff --git a/PTT/transformers.py b/PTT/transformers.py index eca8208..8f8096d 100644 --- a/PTT/transformers.py +++ b/PTT/transformers.py @@ -1,3 +1,4 @@ +import re import arrow import regex @@ -34,10 +35,31 @@ def lowercase(input_value): def uppercase(input_value): return input_value.upper() +month_mapping = { + r"\bJanu\b": "Jan", + r"\bFebr\b": "Feb", + r"\bMarc\b": "Mar", + r"\bApri\b": "Apr", + r"\bMay\b": "May", + r"\bJune\b": "Jun", + r"\bJuly\b": "Jul", + r"\bAugu\b": "Aug", + r"\bSept\b": "Sep", + r"\bOcto\b": "Oct", + r"\bNove\b": "Nov", + r"\bDece\b": "Dec", +} + + +def convert_months(date_str): + for month, shortened in month_mapping.items(): + date_str = re.sub(month, shortened, date_str) + return date_str def date(date_format): def inner(input_value): sanitized = regex.sub(r"\W+", " ", input_value).strip() + sanitized = convert_months(sanitized) print(f"Attempting to parse date: {sanitized}") formats = [date_format] if not isinstance(date_format, list) else date_format for fmt in formats: From 8ffdee601330688e7a3826222e866a8d77259429 Mon Sep 17 00:00:00 2001 From: davidemarcoli Date: Mon, 29 Apr 2024 15:58:41 +0200 Subject: [PATCH 07/10] Replace 're' module with 'regex' in date conversion function --- PTT/transformers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/PTT/transformers.py b/PTT/transformers.py index 8f8096d..770f3a4 100644 --- a/PTT/transformers.py +++ b/PTT/transformers.py @@ -1,5 +1,3 @@ -import re - import arrow import regex @@ -53,7 +51,7 @@ def uppercase(input_value): def convert_months(date_str): for month, shortened in month_mapping.items(): - date_str = re.sub(month, shortened, date_str) + date_str = regex.sub(month, shortened, date_str, flags=regex.IGNORECASE) return date_str def date(date_format): From e9de80d49b356ba3bb2dcb2ba13550fa8ef121af Mon Sep 17 00:00:00 2001 From: davidemarcoli Date: Mon, 29 Apr 2024 16:42:09 +0200 Subject: [PATCH 08/10] Fix wrong episode --- PTT/handlers.py | 24 ++++++++---------------- PTT/parse.py | 24 +++++++++--------------- 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/PTT/handlers.py b/PTT/handlers.py index 30163b0..a898f54 100644 --- a/PTT/handlers.py +++ b/PTT/handlers.py @@ -136,7 +136,7 @@ def handle_space_in_codec(context): parser.add_handler("audio", regex.compile(r"\bQ?AAC(?:[. ]?2[. ]0|x2)?\b", regex.IGNORECASE), value("aac"), {"remove": True}) # Group - parser.add_handler("group", regex.compile(r"- ?(?!\d+$|S\d+|\d+x|ep?\d+|[^[]+]$)([^\-. []+[^\-. [)\]\d][^\-. [)\]]*)(?:\[[\w.-]+])?(?=\.\w{2,4}$|$)", regex.IGNORECASE), none, {"remove": True}) + parser.add_handler("group", regex.compile(r"- ?(?!\d+$|S\d+|\d+x|ep?\d+|[^[]+]$)([^\-. []+[^\-. [)\]\d][^\-. [)\]]*)(?:\[[\w.-]+])?(?=\.\w{2,4}$|$)", regex.IGNORECASE), none, {"remove": False}) # TODO: I js implementation, it's True, but doesn't get removed?!.... # Container parser.add_handler("container", regex.compile(r"\.?[\[(]?\b(MKV|AVI|MP4|WMV|MPG|MPEG)\b[\])]?", regex.IGNORECASE), lowercase) @@ -241,21 +241,13 @@ def handle_episodes(context): beginning_title = title[:end_index] middle_title = title[start_index:end_index] - # Regex patterns to capture episode information, avoiding common prefixes like "movie" or "film" - regex_patterns = [ - r'(? Date: Mon, 29 Apr 2024 16:43:23 +0200 Subject: [PATCH 09/10] Update DivX/XviD handler to not remove the match --- PTT/handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PTT/handlers.py b/PTT/handlers.py index a898f54..fda6061 100644 --- a/PTT/handlers.py +++ b/PTT/handlers.py @@ -95,7 +95,7 @@ def add_defaults(parser: Parser): parser.add_handler("source", regex.compile(r"\bWEB[ .-]*DL(?:Rip)?\b", regex.IGNORECASE), value("WEB-DL"), {"remove": True}) parser.add_handler("source", regex.compile(r"\bWEB[ .-]*Rip\b", regex.IGNORECASE), value("WEBRip"), {"remove": True}) parser.add_handler("source", regex.compile(r"\b(?:DL|WEB|BD|BR)MUX\b", regex.IGNORECASE), none, {"remove": True}) - parser.add_handler("source", regex.compile(r"\b(DivX|XviD)\b"), none, {"remove": True}) # TODO: In the js implementation it's true. But then a test case fails in our implementation and i'm not sure why + parser.add_handler("source", regex.compile(r"\b(DivX|XviD)\b"), none, {"remove": False}) # TODO: In the js implementation it's true. But then a test case fails in our implementation and i'm not sure why # Video depth parser.add_handler("bit_depth", regex.compile(r"\bhevc\s?10\b", regex.IGNORECASE), value("10bit")) From e63334b28b597816392ec66bba83684aaba0d402 Mon Sep 17 00:00:00 2001 From: davidemarcoli Date: Mon, 29 Apr 2024 17:27:48 +0200 Subject: [PATCH 10/10] Update regex patterns and test cases for episodes and season parsing --- PTT/handlers.py | 4 +-- PTT/parse.py | 63 ++++++++++++++---------------------------- PTT/transformers.py | 1 - tests/test_episodes.py | 7 +++-- tests/test_season.py | 16 +++++++++++ 5 files changed, 42 insertions(+), 49 deletions(-) diff --git a/PTT/handlers.py b/PTT/handlers.py index fda6061..a7c51a4 100644 --- a/PTT/handlers.py +++ b/PTT/handlers.py @@ -174,9 +174,9 @@ def handle_volumes(context): # Seasons parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)((?:s\d{1,2}[., +/\\&-]+)+s\d{1,2}\b)", regex.IGNORECASE), range_func, { "remove": True }) parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)[([]?(s\d{2,}-\d{2,}\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True }) - parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)[([]?(s[1-9]-[2-9]\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True }) + parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)[([]?(s[1-9]-[2-9])[)\]]?", regex.IGNORECASE), range_func, { "remove": True }) parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:seasons?|[Сс]езони?|temporadas?)[. ]?[-:]?[. ]?[([]?((?:\d{1,2}[., /\\&]+)+\d{1,2}\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True }) - parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:seasons|[Сс]езони?|temporadas?)[. ]?[-:]?[. ]?[([]?((?:\d{1,2}[. -]+)+[1-9]\d?\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True }) + parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:seasons?|[Сс]езони?|temporadas?)[. ]?[-:]?[. ]?[([]?((?:\d{1,2}[.-]+)+[1-9]\d?\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True }) parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?season[. ]?[([]?((?:\d{1,2}[. -]+)+[1-9]\d?\b)[)\]]?(?!.*\.\w{2,4}$)", regex.IGNORECASE), range_func, { "remove": True }) parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?\bseasons?\b[. -]?(\d{1,2}[. -]?(?:to|thru|and|\+|:)[. -]?\d{1,2})\b", regex.IGNORECASE), range_func, { "remove": True }) parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:saison|seizoen|season|series|temp(?:orada)?):?[. ]?(\d{1,2})", regex.IGNORECASE), array(integer)) diff --git a/PTT/parse.py b/PTT/parse.py index 133744e..ba6c900 100644 --- a/PTT/parse.py +++ b/PTT/parse.py @@ -11,7 +11,7 @@ NOT_ALLOWED_SYMBOLS_AT_START_AND_END = regex.compile(rf"^[^\w{NON_ENGLISH_CHARS}#[【★]+|[ \-:/\\[|{{(#$&^]+$") REMAINING_NOT_ALLOWED_SYMBOLS_AT_START_AND_END = regex.compile(rf"^[^\w{NON_ENGLISH_CHARS}#]+|]$") -DEBUG_HANDLER = None +DEBUG_HANDLER = "seasons" def extend_options(options=None): @@ -37,13 +37,13 @@ def handler(context): if name in result and options.get('skipIfAlreadyFound', False): return None - if name == DEBUG_HANDLER: - print(f"Regexp Pattern: {reg_exp.pattern}") - print(f"Title: {title}") + # if name == DEBUG_HANDLER: + # print(f"Regexp Pattern: {reg_exp.pattern}") + # print(f"Title: {title}") match = reg_exp.search(title) - if name == DEBUG_HANDLER: - print(f"Match: {match}") + # if name == DEBUG_HANDLER: + # print(f"Match: {match}") if match: raw_match = match.group(0) clean_match = match.group(1) if len(match.groups()) >= 1 else raw_match @@ -55,8 +55,8 @@ def handler(context): is_before_title = before_title_match is not None and raw_match in before_title_match.group(1) other_matches = {k: v for k, v in matched.items() if k != name} - if name == DEBUG_HANDLER: - print(f"Other Matches: {other_matches}") + # if name == DEBUG_HANDLER: + # print(f"Other Matches: {other_matches}") is_skip_if_first = options.get('skipIfFirst', False) and other_matches and all( match.start() < other_matches[k]['match_index'] for k in other_matches ) @@ -136,17 +136,17 @@ def parse(self, title): } ) - if handler.handler_name == DEBUG_HANDLER: - print(f"Result: {match_result}") - - if DEBUG_HANDLER: - print(handler.handler_name) - print("Title before: " + title) + # if handler.handler_name == DEBUG_HANDLER: + # print(f"Result: {match_result}") + # + # if DEBUG_HANDLER: + # print(handler.handler_name) + # print("Title before: " + title) if match_result is None: - if DEBUG_HANDLER: - print("Title after: " + title) - print(end_of_title) + # if DEBUG_HANDLER: + # print("Title after: " + title) + # print(end_of_title) continue if match_result.get('remove', False): @@ -160,32 +160,9 @@ def parse(self, title): # adjust title index in case part of it should be removed and skipped end_of_title -= len(match_result.get("raw_match", "")) - if DEBUG_HANDLER: - print("Title after: " + title) - print(end_of_title) - - # if match_result: - # raw_match = match_result.group(0) - # clean_match = match_result.group(1) if len(match_result.groups()) >= 1 else None - # transformed_match = raw_match if clean_match is None else clean_match - # if handler["transformer"]: - # transformed = handler["transformer"](transformed_match) - # else: - # transformed = transformed_match - # - # # If the handler demands removal, adjust the title and end_of_title accordingly. - # if options.get("remove", False) and match_result.start() < end_of_title: - # title = title[:match_result.start()] + title[match_result.end():] - # end_of_title -= len(raw_match) - # - # # Save matched data and result. - # matched[handler["name"]] = {"raw_match": raw_match, "match_index": match_result.start()} - # result[handler["name"]] = transformed - # - # # If skipping from title, adjust the title and potentially end_of_title. - # if options.get("skipFromTitle", False) and match_result.start() < end_of_title: - # title = title.replace(raw_match, "", 1) - # end_of_title = min(end_of_title, match_result.start()) + # if DEBUG_HANDLER: + # print("Title after: " + title) + # print(end_of_title) if not result.get("episodes"): result["episodes"] = [] diff --git a/PTT/transformers.py b/PTT/transformers.py index 770f3a4..237fdae 100644 --- a/PTT/transformers.py +++ b/PTT/transformers.py @@ -58,7 +58,6 @@ def date(date_format): def inner(input_value): sanitized = regex.sub(r"\W+", " ", input_value).strip() sanitized = convert_months(sanitized) - print(f"Attempting to parse date: {sanitized}") formats = [date_format] if not isinstance(date_format, list) else date_format for fmt in formats: try: diff --git a/tests/test_episodes.py b/tests/test_episodes.py index b3b86dc..a2519e0 100644 --- a/tests/test_episodes.py +++ b/tests/test_episodes.py @@ -40,13 +40,14 @@ def parser(): ("Orange Is The New Black Season 5 Episodes 1-10 INCOMPLETE (LEAKED)", list(range(1, 11))), ("Vikings.Season.05.Ep(01-10).720p.WebRip.2Ch.x265.PSA", list(range(1, 11))), ("Naruto Shippuden Ep 107 - Strange Bedfellows.mkv", [107]), - - ("Friends - [7x23-24] - The One with Monica and Chandler's Wedding + Audio Commentary.mkv", [23, 24]), + ("Friends - [7x23-24] - The One with Monica and Chandler's Wedding + Audio Commentary.mkv", list(range(23, 25))), ("Yu-Gi-Oh 3x089 - Awakening of Evil (Part 4).avi", [89]), ("611-612 - Desperate Measures, Means & Ends.mp4", [611, 612]), ("[TBox] Dragon Ball Z Full 1-291(Subbed Jap Vers)", list(range(1, 292))), ("Naruto Shippuden - 107 - Strange Bedfellows.mkv", [107]), - ("[AnimeRG] Naruto Shippuden - 107 [720p] [x265] [pseudo].mkv", [107]) + ("[AnimeRG] Naruto Shippuden - 107 [720p] [x265] [pseudo].mkv", [107]), + ("Game.of.Thrones.S01.e01-02.2160p.UHD.BluRay.x265-Morpheus", list(range(1, 3))), + ("Breaking Bad S03e01-13 (1080p Ita Eng Spa h265 SubS) 2ndREPACK byMe7alh", list(range(1, 14))), ]) def test_episode_parser(release_name, expected_episode, parser): result = parser.parse(release_name) diff --git a/tests/test_season.py b/tests/test_season.py index 3ca7fdf..adcc20d 100644 --- a/tests/test_season.py +++ b/tests/test_season.py @@ -40,6 +40,22 @@ def parser(): ("Stargate Atlantis ALL Seasons - S01 / S02 / S03 / S04 / S05", list(range(1, 6))), ("Stargate Atlantis Complete (Season 1 2 3 4 5) 720p HEVC x265", list(range(1, 6))), ("Skam.S01-S02-S03.SweSub.720p.WEB-DL.H264", list(range(1, 4))), + ("Игра престолов / Game of Thrones [S1-8] (2011-2019) Blu-Ray Remux 2160p | 4K | HEVC | HDR | Dolby Vision | D, P", list(range(1, 9))), + ("Игра престолов / Game of Thrones (2011-2019) BDRemux [H.264/1080p] (S1-8E1-73 of 73) AlexFilm, LostFilm, FoxLife, Ren-TV, Kravec, Jaskier, FocusStudio, NewStudio, FOX, AMS, OmskBird", list(range(1, 9))), + ("Game.of.Thrones.S01-07.LostFilm.AlexFilm.Dub.Eng.BDRip.1080p", list(range(1, 8))), + ("Game.of.Thrones.S01.2160p.UHD.BluRay.x265-SpaceHD13", [1]), + ("Game_Of_Thrones_Season 01", [1]), + ("Game.of.Thrones.S01.e01-02.2160p.UHD.BluRay.x265-Morpheus", [1]), + ("Spongebob Squarepants Season 1-8 DvdRip", list(range(1, 9))), + ("[DBDRaws][海绵宝宝 第一季spongebob squarepants Season 1][0120TV][1080P][WebRip][HEVC10bit][AC3][MKV]", [1]), + ("Supernatural_s15_LostFilm_1080p", [15]), + ("Supernatural (2005) Season 15 S15 (1080p BluRay x265 HEVC 10bit AAC 5.1 Silence)", [15]), + ("Supernatural Season 1-10 1 2 3 4 5 6 7 8 9 10 Complete HD {KURD}", list(range(1, 11))), + ("Breaking Bad - Complete Season 1,2,3,4,5 + Extras - BluRay 1080p", list(range(1, 6))), + ("Breaking Bad Season 1, 2 & 3 + Extras Blu-ray BDRip TSV", list(range(1, 4))), + ("Silo.Season.1.2160p.DV.H265-MIXED", [1]), + ("Stranger.Things.S01-S04.1080p.WEB-DL.DDP5.1.x264-AOC", list(range(1, 5))), + ("Stranger.Things.Netflix.S01 S04.Complete.1080p 720p.MKV.MP4.AAC.WEB-DL [62.1GB]", list(range(1, 5))), ]) def test_season_detection(parser, release_name, expected_seasons): result = parser.parse(release_name)