Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/dreulavelle/PTT
Browse files Browse the repository at this point in the history
  • Loading branch information
Spoked authored and Spoked committed May 6, 2024
2 parents 64216da + e63334b commit f09e363
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 100 deletions.
36 changes: 16 additions & 20 deletions PTT/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def add_defaults(parser: Parser):
parser.add_handler("date", regex.compile(r"(?:\W|^)(\[?\]?(?:0[1-9]|[12][0-9]|3[01])([. \-/\\])(?:0[1-9]|1[012])\2(?:19[6-9]|20[01])[0-9][\])]?)(?:\W|$)"), date("DD MM YYYY"), {"remove": True})
parser.add_handler("date", regex.compile(r"(?:\W)(\[?\]?(?:0[1-9]|1[012])([. \-/\\])(?:0[1-9]|[12][0-9]|3[01])\2(?:[0][1-9]|[0126789][0-9])[\])]?)(?:\W|$)"), date("MM DD YY"), {"remove": True})
parser.add_handler("date", regex.compile(r"(?:\W)(\[?\]?(?:0[1-9]|[12][0-9]|3[01])([. \-/\\])(?:0[1-9]|1[012])\2(?:[0][1-9]|[0126789][0-9])[\])]?)(?:\W|$)"), date("DD MM YY"), {"remove": True})
parser.add_handler("date", regex.compile(r"(?:\W|^)(\[?\]?(?:0?[1-9]|[12][0-9]|3[01])[. ]?(?:st|nd|rd|th)?([. \-\/\\])(?:feb(?:ruary)?|jan(?:uary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sept?(?:ember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\2(?:19[7-9]|20[012])[0-9][\])]?)(?:\W|$)", regex.IGNORECASE), date(["DD MMM YYYY", "Do MMM YYYY", "Do MMMM YYYY"]), {"remove": True})
parser.add_handler("date", regex.compile(r"(?:\W|^)([([]?(?:0?[1-9]|[12][0-9]|3[01])[. ]?(?:st|nd|rd|th)?([. \-/\\])(?:feb(?:ruary)?|jan(?:uary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sept?(?:ember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\2(?:19[7-9]|20[012])[0-9][)\]]?)(?=\W|$)", regex.IGNORECASE), date(["DD MMM YYYY", "Do MMM YYYY", "Do MMMM YYYY"]), {"remove": True})
parser.add_handler("date", regex.compile(r"(?:\W|^)(\[?\]?(?:0?[1-9]|[12][0-9]|3[01])[. ]?(?:st|nd|rd|th)?([. \-\/\\])(?:feb(?:ruary)?|jan(?:uary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sept?(?:ember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\2(?:0[1-9]|[0126789][0-9])[\])]?)(?:\W|$)", regex.IGNORECASE), date("DD MMM YY"), {"remove": True})
parser.add_handler("date", regex.compile(r"(?:\W|^)(\[?\]?20[012][0-9](?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01])[\])]?)(?:\W|$)"), date("YYYYMMDD"), {"remove": True})

Expand Down Expand Up @@ -95,7 +95,7 @@ def add_defaults(parser: Parser):
parser.add_handler("source", regex.compile(r"\bWEB[ .-]*DL(?:Rip)?\b", regex.IGNORECASE), value("WEB-DL"), {"remove": True})
parser.add_handler("source", regex.compile(r"\bWEB[ .-]*Rip\b", regex.IGNORECASE), value("WEBRip"), {"remove": True})
parser.add_handler("source", regex.compile(r"\b(?:DL|WEB|BD|BR)MUX\b", regex.IGNORECASE), none, {"remove": True})
parser.add_handler("source", regex.compile(r"\b(DivX|XviD)\b"), none, {"remove": True})
parser.add_handler("source", regex.compile(r"\b(DivX|XviD)\b"), none, {"remove": False}) # TODO: In the js implementation it's true. But then a test case fails in our implementation and i'm not sure why

# Video depth
parser.add_handler("bit_depth", regex.compile(r"\bhevc\s?10\b", regex.IGNORECASE), value("10bit"))
Expand All @@ -118,6 +118,10 @@ def handle_bit_depth(context):
parser.add_handler("codec", regex.compile(r"\b[xh][-. ]?26[45]", regex.IGNORECASE), lowercase, {"remove": True})
parser.add_handler("codec", regex.compile(r"\bhevc(?:\s?10)?\b", regex.IGNORECASE), value("hevc"), {"remove": True, "skipIfAlreadyFound": False})
parser.add_handler("codec", regex.compile(r"\b(?:dvix|mpeg2|divx|xvid|avc)\b", regex.IGNORECASE), lowercase, {"remove": True, "skipIfAlreadyFound": False})
def handle_space_in_codec(context):
if context["result"].get("codec"):
context["result"]["codec"] = regex.sub("[ .-]", "", context["result"]["codec"])
parser.add_handler("codec", handle_space_in_codec)

# Audio
parser.add_handler("audio", regex.compile(r"7\.1[. ]?Atmos\b", regex.IGNORECASE), value("7.1 Atmos"), {"remove": True})
Expand All @@ -132,7 +136,7 @@ def handle_bit_depth(context):
parser.add_handler("audio", regex.compile(r"\bQ?AAC(?:[. ]?2[. ]0|x2)?\b", regex.IGNORECASE), value("aac"), {"remove": True})

# Group
parser.add_handler("group", regex.compile(r"- ?(?!\d+$|S\d+|\d+x|ep?\d+|[^[]+]$)([^\-. []+[^\-. [)\]\d][^\-. [)\]]*)(?:\[[\w.-]+])?(?=\.\w{2,4}$|$)", regex.IGNORECASE), none, {"remove": True})
parser.add_handler("group", regex.compile(r"- ?(?!\d+$|S\d+|\d+x|ep?\d+|[^[]+]$)([^\-. []+[^\-. [)\]\d][^\-. [)\]]*)(?:\[[\w.-]+])?(?=\.\w{2,4}$|$)", regex.IGNORECASE), none, {"remove": False}) # TODO: I js implementation, it's True, but doesn't get removed?!....

# Container
parser.add_handler("container", regex.compile(r"\.?[\[(]?\b(MKV|AVI|MP4|WMV|MPG|MPEG)\b[\])]?", regex.IGNORECASE), lowercase)
Expand Down Expand Up @@ -170,9 +174,9 @@ def handle_volumes(context):
# Seasons
parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)((?:s\d{1,2}[., +/\\&-]+)+s\d{1,2}\b)", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)[([]?(s\d{2,}-\d{2,}\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)[([]?(s[1-9]-[2-9]\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:complete\W|seasons?\W|\W|^)[([]?(s[1-9]-[2-9])[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:seasons?|[Сс]езони?|temporadas?)[. ]?[-:]?[. ]?[([]?((?:\d{1,2}[., /\\&]+)+\d{1,2}\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:seasons|[Сс]езони?|temporadas?)[. ]?[-:]?[. ]?[([]?((?:\d{1,2}[. -]+)+[1-9]\d?\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:seasons?|[Сс]езони?|temporadas?)[. ]?[-:]?[. ]?[([]?((?:\d{1,2}[.-]+)+[1-9]\d?\b)[)\]]?", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?season[. ]?[([]?((?:\d{1,2}[. -]+)+[1-9]\d?\b)[)\]]?(?!.*\.\w{2,4}$)", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?\bseasons?\b[. -]?(\d{1,2}[. -]?(?:to|thru|and|\+|:)[. -]?\d{1,2})\b", regex.IGNORECASE), range_func, { "remove": True })
parser.add_handler("seasons", regex.compile(r"(?:(?:\bthe\W)?\bcomplete\W)?(?:saison|seizoen|season|series|temp(?:orada)?):?[. ]?(\d{1,2})", regex.IGNORECASE), array(integer))
Expand Down Expand Up @@ -237,21 +241,13 @@ def handle_episodes(context):
beginning_title = title[:end_index]
middle_title = title[start_index:end_index]

# Regex patterns to capture episode information, avoiding common prefixes like "movie" or "film"
regex_patterns = [
r'(?<!movie\W*|film\W*|^)(?:[ .]+-[ .]+|[([][ .]*)(\d{1,4})(?:a|b|v\d)?(?:\W|$)(?!movie|film)',
r'^(?:[([-][ .]?)?(\d{1,4})(?:a|b|v\d)?(?:\W|$)(?!movie|film)'
]

# Attempt to match episodes within the defined sections of the title
for pattern in regex_patterns:
matches = regex.search(pattern, beginning_title, regex.IGNORECASE) or regex.search(pattern, middle_title,
regex.IGNORECASE)
if matches:
# Extract episode numbers, remove non-digits and convert to integers
episode_numbers = [int(num) for num in regex.findall(r'\d+', matches.group(1))]
result['episodes'] = episode_numbers
return {'match_index': title.index(matches.group(0))}
matches = regex.search(r'(?<!movie\W*|film\W*|^)(?:[ .]+-[ .]+|[([][ .]*)(\d{1,4})(?:a|b|v\d)?(?:\W|$)(?!movie|film)', beginning_title, regex.IGNORECASE) or regex.search(r'^(?:[([-][ .]?)?(\d{1,4})(?:a|b|v\d)?(?:\W|$)(?!movie|film)', middle_title, regex.IGNORECASE)

if matches:
# Extract episode numbers, remove non-digits and convert to integers
episode_numbers = [int(num) for num in regex.findall(r'\d+', matches.group(1))]
result['episodes'] = episode_numbers
return {'match_index': title.index(matches.group(0))}

return None
parser.add_handler("episodes", handle_episodes)
Expand Down
61 changes: 25 additions & 36 deletions PTT/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
NOT_ALLOWED_SYMBOLS_AT_START_AND_END = regex.compile(rf"^[^\w{NON_ENGLISH_CHARS}#[【★]+|[ \-:/\\[|{{(#$&^]+$")
REMAINING_NOT_ALLOWED_SYMBOLS_AT_START_AND_END = regex.compile(rf"^[^\w{NON_ENGLISH_CHARS}#]+|]$")

DEBUG_HANDLER = None
DEBUG_HANDLER = "seasons"


def extend_options(options=None):
Expand Down Expand Up @@ -40,26 +40,26 @@ def handler(context):
if name in result and options.get('skipIfAlreadyFound', False):
return None

if name == DEBUG_HANDLER:
print(f"Regexp Pattern: {reg_exp.pattern}")
print(f"Title: {title}")
# if name == DEBUG_HANDLER:
# print(f"Regexp Pattern: {reg_exp.pattern}")
# print(f"Title: {title}")

match = reg_exp.search(title)
if name == DEBUG_HANDLER:
print(f"Match: {match}")
# if name == DEBUG_HANDLER:
# print(f"Match: {match}")
if match:
raw_match = match.group(0)
clean_match = match.group(1) if len(match.groups()) >= 1 else raw_match
sig = inspect.signature(transformer)
param_count = len(sig.parameters)
transformed = transformer(clean_match or raw_match, *([result.get(name)] if param_count > 1 else []))

before_title_match = regex.match(r'^\[([^\[\]]+)]', title)
before_title_match = regex.match(r'^\[([^[\]]+)]', title) # or '^\[([^\[\]]+)]'
is_before_title = before_title_match is not None and raw_match in before_title_match.group(1)

other_matches = {k: v for k, v in matched.items() if k != name}
if name == DEBUG_HANDLER:
print(f"Other Matches: {other_matches}")
# if name == DEBUG_HANDLER:
# print(f"Other Matches: {other_matches}")
is_skip_if_first = options.get('skipIfFirst', False) and other_matches and all(
match.start() < other_matches[k]['match_index'] for k in other_matches
)
Expand Down Expand Up @@ -92,6 +92,7 @@ def clean_title(raw_title):
cleaned_title = regex.sub(r"[[(]movie[)\]]", "", cleaned_title, flags=regex.IGNORECASE)
cleaned_title = NOT_ALLOWED_SYMBOLS_AT_START_AND_END.sub("", cleaned_title)
cleaned_title = RUSSIAN_CAST_REGEX.sub("", cleaned_title)
# maybe [\[\[【★].*[\]】★][ .]?(.+)
cleaned_title = regex.sub(r"^[[【★].*[\]】★][ .]?(.+)", r"\1", cleaned_title)
cleaned_title = regex.sub(r"(.+)[ .]?[[【★].*[\]】★]$", r"\1", cleaned_title)
cleaned_title = ALT_TITLES_REGEX.sub("", cleaned_title)
Expand Down Expand Up @@ -159,45 +160,33 @@ def parse(self, title: str) -> dict: # type: ignore
}
)

if handler.handler_name == DEBUG_HANDLER:
print(f"Result: {match_result}")
# if handler.handler_name == DEBUG_HANDLER:
# print(f"Result: {match_result}")
#
# if DEBUG_HANDLER:
# print(handler.handler_name)
# print("Title before: " + title)

if match_result is None:
# if DEBUG_HANDLER:
# print("Title after: " + title)
# print(end_of_title)
continue

if match_result.get('remove', False):
title = title[:match_result['match_index']] + title[match_result['match_index'] + len(
match_result['raw_match']):]
if match_result.get('skip_from_title') and match_result.get('match_index') and match_result[
if not match_result.get('skip_from_title') and match_result.get('match_index') and match_result[
'match_index'] < end_of_title:
end_of_title = match_result['match_index']
if match_result.get('remove') and match_result.get('skip_from_title') and match_result[
'match_index'] < end_of_title:
# adjust title index in case part of it should be removed and skipped
end_of_title -= match_result.raw_match.length

# if match_result:
# raw_match = match_result.group(0)
# clean_match = match_result.group(1) if len(match_result.groups()) >= 1 else None
# transformed_match = raw_match if clean_match is None else clean_match
# if handler["transformer"]:
# transformed = handler["transformer"](transformed_match)
# else:
# transformed = transformed_match
#
# # If the handler demands removal, adjust the title and end_of_title accordingly.
# if options.get("remove", False) and match_result.start() < end_of_title:
# title = title[:match_result.start()] + title[match_result.end():]
# end_of_title -= len(raw_match)
#
# # Save matched data and result.
# matched[handler["name"]] = {"raw_match": raw_match, "match_index": match_result.start()}
# result[handler["name"]] = transformed
#
# # If skipping from title, adjust the title and potentially end_of_title.
# if options.get("skipFromTitle", False) and match_result.start() < end_of_title:
# title = title.replace(raw_match, "", 1)
# end_of_title = min(end_of_title, match_result.start())
end_of_title -= len(match_result.get("raw_match", ""))

# if DEBUG_HANDLER:
# print("Title after: " + title)
# print(end_of_title)

if not result.get("episodes"):
result["episodes"] = []
Expand Down
23 changes: 21 additions & 2 deletions PTT/transformers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

import arrow
import regex

Expand Down Expand Up @@ -34,11 +33,31 @@ def lowercase(input_value):
def uppercase(input_value):
return input_value.upper()

month_mapping = {
r"\bJanu\b": "Jan",
r"\bFebr\b": "Feb",
r"\bMarc\b": "Mar",
r"\bApri\b": "Apr",
r"\bMay\b": "May",
r"\bJune\b": "Jun",
r"\bJuly\b": "Jul",
r"\bAugu\b": "Aug",
r"\bSept\b": "Sep",
r"\bOcto\b": "Oct",
r"\bNove\b": "Nov",
r"\bDece\b": "Dec",
}


def convert_months(date_str):
for month, shortened in month_mapping.items():
date_str = regex.sub(month, shortened, date_str, flags=regex.IGNORECASE)
return date_str

def date(date_format):
def inner(input_value):
sanitized = regex.sub(r"\W+", " ", input_value).strip()
print(f"Attempting to parse date: {sanitized}")
sanitized = convert_months(sanitized)
formats = [date_format] if not isinstance(date_format, list) else date_format
for fmt in formats:
try:
Expand Down
7 changes: 4 additions & 3 deletions tests/test_episodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,14 @@ def parser():
("Orange Is The New Black Season 5 Episodes 1-10 INCOMPLETE (LEAKED)", list(range(1, 11))),
("Vikings.Season.05.Ep(01-10).720p.WebRip.2Ch.x265.PSA", list(range(1, 11))),
("Naruto Shippuden Ep 107 - Strange Bedfellows.mkv", [107]),
("Friends - [7x23-24] - The One with Monica and Chandler's Wedding + Audio Commentary.mkv", [23, 24]),
("Friends - [7x23-24] - The One with Monica and Chandler's Wedding + Audio Commentary.mkv", list(range(23, 25))),
("Yu-Gi-Oh 3x089 - Awakening of Evil (Part 4).avi", [89]),
("611-612 - Desperate Measures, Means & Ends.mp4", [611, 612]),
("[TBox] Dragon Ball Z Full 1-291(Subbed Jap Vers)", list(range(1, 292))),
("Naruto Shippuden - 107 - Strange Bedfellows.mkv", [107]),
("[AnimeRG] Naruto Shippuden - 107 [720p] [x265] [pseudo].mkv", [107])
("[AnimeRG] Naruto Shippuden - 107 [720p] [x265] [pseudo].mkv", [107]),
("Game.of.Thrones.S01.e01-02.2160p.UHD.BluRay.x265-Morpheus", list(range(1, 3))),
("Breaking Bad S03e01-13 (1080p Ita Eng Spa h265 SubS) 2ndREPACK byMe7alh", list(range(1, 14))),
])
def test_episode_parser(release_name, expected_episode, parser):
result = parser.parse(release_name)
Expand Down
Loading

0 comments on commit f09e363

Please sign in to comment.