diff --git a/scrapers/paulo_amorim.py b/scrapers/paulo_amorim.py index a6da123..c62098f 100644 --- a/scrapers/paulo_amorim.py +++ b/scrapers/paulo_amorim.py @@ -143,6 +143,25 @@ def _get_movie_excerpt(self): .text ) + def _get_movies_from_table(self, feature_timetable): + for feature_tr in feature_timetable.find_all("tr"): + feature_tds = feature_tr.find_all("td") + for movie in self.movies: + if movie["title"].lower() == feature_tds[2].text.lower(): + # Movie will be featured today + time_str = ( + unicodedata.normalize("NFKC", feature_tds[0].text) + .strip("\n") + .strip() + .split(" ")[0] + ) + hour_str, min_str = time_str.split("h") + if min_str: + parsed_time = dt_time(int(hour_str), int(min_str)) + else: + parsed_time = dt_time(int(hour_str)) + movie["time"].append(parsed_time) + def _get_today_str(self): """returns de current day in {XX de mês} format, with and without a leading zero @@ -190,23 +209,7 @@ def _get_todays_features(self): feature_timetable = p_tag.find_next_sibling("table") if feature_timetable: - for feature_tr in feature_timetable.find_all("tr"): - feature_tds = feature_tr.find_all("td") - for movie in self.movies: - if movie["title"].lower() == feature_tds[2].text.lower(): - # Movie will be featured today - time_str = ( - unicodedata.normalize("NFKC", feature_tds[0].text) - .strip("\n") - .strip() - .split(" ")[0] - ) - hour_str, min_str = time_str.split("h") - if min_str: - parsed_time = dt_time(int(hour_str), int(min_str)) - else: - parsed_time = dt_time(int(hour_str)) - movie["time"].append(parsed_time) + self._get_movies_from_table(feature_timetable) else: for strong in p_tag.find_all("strong"): for movie in self.movies: @@ -231,6 +234,89 @@ def _get_todays_features(self): movie["time"].append(parsed_time) features = [movie for movie in self.movies if len(movie["time"]) > 0] + if len(features) == 0: + # they are probably all in one big unformatted table + #
31 de outubro | quinta | + #||
+ # | ||
14h15 | + #PA | + #Megalópolis | + #
+ # | ||
1 de novembro | sexta | + #||
+ # | ||
14h15 | + #PA | + #... | + #
19h | + #PA | + #Movie name | + #