diff --git a/pesuacademy/models/__init__.py b/pesuacademy/models/__init__.py index 297e623..525e957 100644 --- a/pesuacademy/models/__init__.py +++ b/pesuacademy/models/__init__.py @@ -1,3 +1,4 @@ +from .announcement import Announcement from .course import Course, Attendance from .profile import ( Profile, diff --git a/pesuacademy/models/announcement.py b/pesuacademy/models/announcement.py new file mode 100644 index 0000000..ad4b611 --- /dev/null +++ b/pesuacademy/models/announcement.py @@ -0,0 +1,27 @@ +import datetime +from typing import Optional + + +class AnnouncementFile: + def __init__(self, name: str, content: bytes): + self.name = name + self.content = content + + +class Announcement: + def __init__( + self, + title: str, + date: datetime.date, + content: str, + img: str = None, + files: Optional[list[AnnouncementFile]] = None, + ): + self.title = title + self.date = date + self.content = content + self.img = img + self.files = files + + def __str__(self): + return f"{self.__dict__}" diff --git a/pesuacademy/models/profile.py b/pesuacademy/models/profile.py index c1d34d5..783cd46 100644 --- a/pesuacademy/models/profile.py +++ b/pesuacademy/models/profile.py @@ -38,6 +38,7 @@ def __init__( branch: str, semester: str, section: str, + img: str, program: Optional[str] = None, email: Optional[str] = None, mobile: Optional[str] = None, @@ -53,6 +54,7 @@ def __init__( self.section = section self.email = email self.mobile = mobile + self.img = img self.aadhar = aadhar self.name_as_in_aadhar = name_as_in_aadhar diff --git a/pesuacademy/pages/__init__.py b/pesuacademy/pages/__init__.py index ec47eb2..87b6160 100644 --- a/pesuacademy/pages/__init__.py +++ b/pesuacademy/pages/__init__.py @@ -1,3 +1,4 @@ +from .announcements import AnnouncementPageHandler from .attendance import AttendancePageHandler from .courses import CoursesPageHandler from .profile import ProfilePageHandler diff --git a/pesuacademy/pages/announcements.py b/pesuacademy/pages/announcements.py new file mode 100644 index 0000000..e9c4588 --- /dev/null +++ b/pesuacademy/pages/announcements.py @@ -0,0 +1,124 @@ +import datetime +import re +from typing import Optional + +import urllib3 + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +import requests_html +from bs4 import BeautifulSoup + +from pesuacademy.models.announcement import Announcement, AnnouncementFile + + +class AnnouncementPageHandler: + @staticmethod + def get_announcement_by_id( + session: requests_html.HTMLSession, csrf_token: str, announcement_id: str + ) -> Announcement: + url = "https://www.pesuacademy.com/Academy/s/studentProfilePESUAdmin" + data = { + "controllerMode": "6411", + "actionType": "4", + "AnnouncementId": announcement_id, + "menuId": "667", + } + headers = { + "accept": "*/*", + "accept-language": "en-IN,en-US;q=0.9,en-GB;q=0.8,en;q=0.7", + "content-type": "application/x-www-form-urlencoded", + "origin": "https://www.pesuacademy.com", + "priority": "u=1, i", + "referer": "https://www.pesuacademy.com/Academy/s/studentProfilePESU", + "sec-ch-ua": '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"Windows"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "x-csrf-token": csrf_token, + "x-requested-with": "XMLHttpRequest", + } + + response = session.post(url, data=data, headers=headers) + if response.status_code != 200: + data["actionType"] = "6" + response = session.post(url, data=data, headers=headers) + + soup = BeautifulSoup(response.text, "lxml") + + title = soup.find("h4", class_="text-info").text.strip() + date = soup.find("span", class_="text-muted text-date pull-right").text.strip() + date = datetime.datetime.strptime(date, "%d-%B-%Y").date() + + content_tag = soup.find("div", class_="col-md-12") + if content_tag is None: + content_tag = soup.find("div", class_="col-md-8") + paragraph_or_list_tags = content_tag.find_all(["p", "li"]) + content = "\n".join([tag.text.strip() for tag in paragraph_or_list_tags]) + + img_tag = soup.find("img", class_="img-responsive") + img = img_tag.attrs["src"] if img_tag else None + + attachment_tags = [ + tag + for tag in content_tag.find_all("a") + if tag.text.strip().endswith(".pdf") + ] + attachments = list() + for attachment_tag in attachment_tags: + attachment_name = attachment_tag.text.strip() + pattern = re.compile(r"handleDownloadAnoncemntdoc\('(\d+)'\)") + attachment_id = re.findall(pattern, attachment_tag.attrs["href"])[0] + response = session.get( + f"https://pesuacademy.com/Academy/s/studentProfilePESUAdmin/downloadAnoncemntdoc/{attachment_id}", + headers={"x-csrf-token": csrf_token}, + verify=False, + ) + attachment_bytes = response.content + attachments.append( + AnnouncementFile(name=attachment_name, content=attachment_bytes) + ) + + return Announcement( + title=title, date=date, content=content, img=img, files=attachments + ) + + def get_page( + self, + session: requests_html.HTMLSession, + csrf_token: str, + start_date: Optional[datetime.date] = None, + end_date: Optional[datetime.date] = None, + ) -> list[Announcement]: + url = "https://www.pesuacademy.com/Academy/s/studentProfilePESUAdmin" + query = { + "menuId": "667", + "controllerMode": "6411", + "actionType": "5", + "_": str(int(datetime.datetime.now().timestamp() * 1000)), + } + response = session.get(url, allow_redirects=False, params=query) + if response.status_code != 200: + raise ConnectionError("Unable to fetch announcement data.") + soup = BeautifulSoup(response.text, "lxml") + + announcement_ids = soup.find_all("a", class_="pull-right readmorelink") + pattern = re.compile(r"handleShowMoreAnnouncement\(\d+, \d+,(\d+)\)") + announcement_ids = [ + pattern.match(ann.attrs.get("onclick")).group(1) for ann in announcement_ids + ] + + announcements = list() + for announcement_id in announcement_ids: + announcement = self.get_announcement_by_id( + session, csrf_token, announcement_id + ) + if start_date and announcement.date < start_date: + continue + if end_date and announcement.date > end_date: + continue + announcements.append(announcement) + announcements.sort(key=lambda x: x.date, reverse=True) + return announcements diff --git a/pesuacademy/pesuacademy.py b/pesuacademy/pesuacademy.py index b019c95..b0b69f4 100644 --- a/pesuacademy/pesuacademy.py +++ b/pesuacademy/pesuacademy.py @@ -7,7 +7,7 @@ from pesuacademy.models.seating_information import SeatingInformation from pesuacademy.util.page import PageHandler from .exceptions import CSRFTokenError, AuthenticationError -from .models import Profile, ClassAndSectionInfo, Course +from .models import Profile, ClassAndSectionInfo, Course, Announcement class PESUAcademy: @@ -157,6 +157,7 @@ def attendance(self, semester: Optional[int] = None) -> dict[int, list[Course]]: attendance_info = self.page_handler.get_attendance(semester) return attendance_info + def seating_information(self) -> list[SeatingInformation]: """ Get the seating information of the currently authenticated user. @@ -167,3 +168,20 @@ def seating_information(self) -> list[SeatingInformation]: raise AuthenticationError("You need to authenticate first.") seating_info = self.page_handler.get_seating_info() return seating_info + + def announcements( + self, start_date: Optional[str] = None, end_date: Optional[str] = None + ) -> list[Announcement]: + """ + Get the announcements from the PESU Academy website. + + :param start_date: The start date of the announcements to fetch in "yyyy-mm-dd" format. If not provided, all + announcements from the beginning are fetched. + :param end_date: The end date of the announcements to fetch in "yyyy-mm-dd" format. If not provided, all + announcements till the end are fetched. + :return: The list of announcements. + """ + announcements = self.page_handler.get_announcements( + self._csrf_token, start_date, end_date + ) + return announcements diff --git a/pesuacademy/util/page.py b/pesuacademy/util/page.py index 0abd282..7eb6b58 100644 --- a/pesuacademy/util/page.py +++ b/pesuacademy/util/page.py @@ -14,6 +14,7 @@ def __init__(self, session: requests_html.HTMLSession): self.course_page_handler = pages.CoursesPageHandler() self.attendance_page_handler = pages.AttendancePageHandler() self.profile_page_handler = pages.ProfilePageHandler() + self.announcement_handler = pages.AnnouncementPageHandler() def set_semester_id_to_number_mapping(self, csrf_token: str): try: @@ -83,3 +84,17 @@ def get_attendance(self, semester: Optional[int] = None): def get_seating_info(self): return pages.SeatingInformationHandler.get_page(self.__session) + + def get_announcements( + self, + csrf_token: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + ): + if start_date is not None: + start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d").date() + if end_date is not None: + end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d").date() + return self.announcement_handler.get_page( + self.__session, csrf_token, start_date, end_date + ) diff --git a/pesuacademy/util/profile.py b/pesuacademy/util/profile.py index df5e6f4..4cc0707 100644 --- a/pesuacademy/util/profile.py +++ b/pesuacademy/util/profile.py @@ -70,6 +70,7 @@ def create_personal_details_object_from_profile_page( :param soup: The BeautifulSoup object of the page. :return: The PersonalDetails object. """ + personal_details = dict() personal_details_section = soup.find( "div", attrs={"class": "elem-info-wrapper box-shadow clearfix"} @@ -111,6 +112,8 @@ def create_personal_details_object_from_profile_page( value = None if value == "NA" else value personal_details[key] = value + image_b64_encoded = soup.find("img", attrs={"class": "media-object"})["src"] + return PersonalDetails( name=personal_details["name"], prn=personal_details["pesu_id"], @@ -119,6 +122,7 @@ def create_personal_details_object_from_profile_page( branch=personal_details["branch"], semester=personal_details["semester"], section=personal_details["section"], + img=image_b64_encoded, email=personal_details["email_id"], mobile=personal_details["contact_no"], aadhar=personal_details["aadhar_no"],