Skip to content

Commit

Permalink
Update regex patterns and test cases for episodes and season parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
davidemarcoli committed Apr 29, 2024
1 parent 2265598 commit e63334b
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 49 deletions.
4 changes: 2 additions & 2 deletions PTT/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,9 @@ def handle_volumes(context):
# Seasons
parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)((?:s\d{1,2}[., +/\\&-]+)+s\d{1,2}\b)", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)[([]?(s\d{2,}-\d{2,}\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)[([]?(s[1-9]-[2-9]\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)[([]?(s[1-9]-[2-9])[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:seasons?|[Сс]езони?|temporadas?)[. ]?[-:]?[. ]?[([]?((?:\d{1,2}[., /\\&]+)+\d{1,2}\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:seasons|[Сс]езони?|temporadas?)[. ]?[-:]?[. ]?[([]?((?:\d{1,2}[. -]+)+[1-9]\d?\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:seasons?|[Сс]езони?|temporadas?)[. ]?[-:]?[. ]?[([]?((?:\d{1,2}[.-]+)+[1-9]\d?\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?season[. ]?[([]?((?:\d{1,2}[. -]+)+[1-9]\d?\b)[)\]]?(?!.*\.\w{2,4}$)", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?\bseasons?\b[. -]?(\d{1,2}[. -]?(?:to|thru|and|\+|:)[. -]?\d{1,2})\b", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:saison|seizoen|season|series|temp(?:orada)?):?[. ]?(\d{1,2})", regex.IGNORECASE), array(integer))
Expand Down
63 changes: 20 additions & 43 deletions PTT/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
NOT_ALLOWED_SYMBOLS_AT_START_AND_END = regex.compile(rf"^[^\w{NON_ENGLISH_CHARS}#[【★]+|[ \-:/\\[|{{(#$&^]+$")
REMAINING_NOT_ALLOWED_SYMBOLS_AT_START_AND_END = regex.compile(rf"^[^\w{NON_ENGLISH_CHARS}#]+|]$")

DEBUG_HANDLER = None
DEBUG_HANDLER = "seasons"


def extend_options(options=None):
Expand All @@ -37,13 +37,13 @@ def handler(context):
if name in result and options.get('skipIfAlreadyFound', False):
return None

if name == DEBUG_HANDLER:
print(f"Regexp Pattern: {reg_exp.pattern}")
print(f"Title: {title}")
# if name == DEBUG_HANDLER:
# print(f"Regexp Pattern: {reg_exp.pattern}")
# print(f"Title: {title}")

match = reg_exp.search(title)
if name == DEBUG_HANDLER:
print(f"Match: {match}")
# if name == DEBUG_HANDLER:
# print(f"Match: {match}")
if match:
raw_match = match.group(0)
clean_match = match.group(1) if len(match.groups()) >= 1 else raw_match
Expand All @@ -55,8 +55,8 @@ def handler(context):
is_before_title = before_title_match is not None and raw_match in before_title_match.group(1)

other_matches = {k: v for k, v in matched.items() if k != name}
if name == DEBUG_HANDLER:
print(f"Other Matches: {other_matches}")
# if name == DEBUG_HANDLER:
# print(f"Other Matches: {other_matches}")
is_skip_if_first = options.get('skipIfFirst', False) and other_matches and all(
match.start() < other_matches[k]['match_index'] for k in other_matches
)
Expand Down Expand Up @@ -136,17 +136,17 @@ def parse(self, title):
}
)

if handler.handler_name == DEBUG_HANDLER:
print(f"Result: {match_result}")

if DEBUG_HANDLER:
print(handler.handler_name)
print("Title before: " + title)
# if handler.handler_name == DEBUG_HANDLER:
# print(f"Result: {match_result}")
#
# if DEBUG_HANDLER:
# print(handler.handler_name)
# print("Title before: " + title)

if match_result is None:
if DEBUG_HANDLER:
print("Title after: " + title)
print(end_of_title)
# if DEBUG_HANDLER:
# print("Title after: " + title)
# print(end_of_title)
continue

if match_result.get('remove', False):
Expand All @@ -160,32 +160,9 @@ def parse(self, title):
# adjust title index in case part of it should be removed and skipped
end_of_title -= len(match_result.get("raw_match", ""))

if DEBUG_HANDLER:
print("Title after: " + title)
print(end_of_title)

# if match_result:
# raw_match = match_result.group(0)
# clean_match = match_result.group(1) if len(match_result.groups()) >= 1 else None
# transformed_match = raw_match if clean_match is None else clean_match
# if handler["transformer"]:
# transformed = handler["transformer"](transformed_match)
# else:
# transformed = transformed_match
#
# # If the handler demands removal, adjust the title and end_of_title accordingly.
# if options.get("remove", False) and match_result.start() < end_of_title:
# title = title[:match_result.start()] + title[match_result.end():]
# end_of_title -= len(raw_match)
#
# # Save matched data and result.
# matched[handler["name"]] = {"raw_match": raw_match, "match_index": match_result.start()}
# result[handler["name"]] = transformed
#
# # If skipping from title, adjust the title and potentially end_of_title.
# if options.get("skipFromTitle", False) and match_result.start() < end_of_title:
# title = title.replace(raw_match, "", 1)
# end_of_title = min(end_of_title, match_result.start())
# if DEBUG_HANDLER:
# print("Title after: " + title)
# print(end_of_title)

if not result.get("episodes"):
result["episodes"] = []
Expand Down
1 change: 0 additions & 1 deletion PTT/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def date(date_format):
def inner(input_value):
sanitized = regex.sub(r"\W+", " ", input_value).strip()
sanitized = convert_months(sanitized)
print(f"Attempting to parse date: {sanitized}")
formats = [date_format] if not isinstance(date_format, list) else date_format
for fmt in formats:
try:
Expand Down
7 changes: 4 additions & 3 deletions tests/test_episodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,14 @@ def parser():
("Orange Is The New Black Season 5 Episodes 1-10 INCOMPLETE (LEAKED)", list(range(1, 11))),
("Vikings.Season.05.Ep(01-10).720p.WebRip.2Ch.x265.PSA", list(range(1, 11))),
("Naruto Shippuden Ep 107 - Strange Bedfellows.mkv", [107]),
("Friends - [7x23-24] - The One with Monica and Chandler's Wedding + Audio Commentary.mkv", [23, 24]),
("Friends - [7x23-24] - The One with Monica and Chandler's Wedding + Audio Commentary.mkv", list(range(23, 25))),
("Yu-Gi-Oh 3x089 - Awakening of Evil (Part 4).avi", [89]),
("611-612 - Desperate Measures, Means & Ends.mp4", [611, 612]),
("[TBox] Dragon Ball Z Full 1-291(Subbed Jap Vers)", list(range(1, 292))),
("Naruto Shippuden - 107 - Strange Bedfellows.mkv", [107]),
("[AnimeRG] Naruto Shippuden - 107 [720p] [x265] [pseudo].mkv", [107])
("[AnimeRG] Naruto Shippuden - 107 [720p] [x265] [pseudo].mkv", [107]),
("Game.of.Thrones.S01.e01-02.2160p.UHD.BluRay.x265-Morpheus", list(range(1, 3))),
("Breaking Bad S03e01-13 (1080p Ita Eng Spa h265 SubS) 2ndREPACK byMe7alh", list(range(1, 14))),
])
def test_episode_parser(release_name, expected_episode, parser):
result = parser.parse(release_name)
Expand Down
16 changes: 16 additions & 0 deletions tests/test_season.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ def parser():
("Stargate Atlantis ALL Seasons - S01 / S02 / S03 / S04 / S05", list(range(1, 6))),
("Stargate Atlantis Complete (Season 1 2 3 4 5) 720p HEVC x265", list(range(1, 6))),
("Skam.S01-S02-S03.SweSub.720p.WEB-DL.H264", list(range(1, 4))),
("Игра престолов / Game of Thrones [S1-8] (2011-2019) Blu-Ray Remux 2160p | 4K | HEVC | HDR | Dolby Vision | D, P", list(range(1, 9))),
("Игра престолов / Game of Thrones (2011-2019) BDRemux [H.264/1080p] (S1-8E1-73 of 73) AlexFilm, LostFilm, FoxLife, Ren-TV, Kravec, Jaskier, FocusStudio, NewStudio, FOX, AMS, OmskBird", list(range(1, 9))),
("Game.of.Thrones.S01-07.LostFilm.AlexFilm.Dub.Eng.BDRip.1080p", list(range(1, 8))),
("Game.of.Thrones.S01.2160p.UHD.BluRay.x265-SpaceHD13", [1]),
("Game_Of_Thrones_Season 01", [1]),
("Game.of.Thrones.S01.e01-02.2160p.UHD.BluRay.x265-Morpheus", [1]),
("Spongebob Squarepants Season 1-8 DvdRip", list(range(1, 9))),
("[DBDRaws][海绵宝宝 第一季spongebob squarepants Season 1][0120TV][1080P][WebRip][HEVC10bit][AC3][MKV]", [1]),
("Supernatural_s15_LostFilm_1080p", [15]),
("Supernatural (2005) Season 15 S15 (1080p BluRay x265 HEVC 10bit AAC 5.1 Silence)", [15]),
("Supernatural Season 1-10 1 2 3 4 5 6 7 8 9 10 Complete HD {KURD}", list(range(1, 11))),
("Breaking Bad - Complete Season 1,2,3,4,5 + Extras - BluRay 1080p", list(range(1, 6))),
("Breaking Bad Season 1, 2 & 3 + Extras Blu-ray BDRip TSV", list(range(1, 4))),
("Silo.Season.1.2160p.DV.H265-MIXED", [1]),
("Stranger.Things.S01-S04.1080p.WEB-DL.DDP5.1.x264-AOC", list(range(1, 5))),
("Stranger.Things.Netflix.S01 S04.Complete.1080p 720p.MKV.MP4.AAC.WEB-DL [62.1GB]", list(range(1, 5))),
])
def test_season_detection(parser, release_name, expected_seasons):
result = parser.parse(release_name)
Expand Down

0 comments on commit e63334b

Please sign in to comment.