From 7a33529116a16010c3e2a529995d4680cbeedc49 Mon Sep 17 00:00:00 2001 From: aditeyabaral Date: Sat, 20 Apr 2024 00:01:35 +0530 Subject: [PATCH 1/3] Add profile picture to profile object --- pesuacademy/models/profile.py | 2 ++ pesuacademy/util/profile.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/pesuacademy/models/profile.py b/pesuacademy/models/profile.py index c1d34d5..783cd46 100644 --- a/pesuacademy/models/profile.py +++ b/pesuacademy/models/profile.py @@ -38,6 +38,7 @@ def __init__( branch: str, semester: str, section: str, + img: str, program: Optional[str] = None, email: Optional[str] = None, mobile: Optional[str] = None, @@ -53,6 +54,7 @@ def __init__( self.section = section self.email = email self.mobile = mobile + self.img = img self.aadhar = aadhar self.name_as_in_aadhar = name_as_in_aadhar diff --git a/pesuacademy/util/profile.py b/pesuacademy/util/profile.py index df5e6f4..4cc0707 100644 --- a/pesuacademy/util/profile.py +++ b/pesuacademy/util/profile.py @@ -70,6 +70,7 @@ def create_personal_details_object_from_profile_page( :param soup: The BeautifulSoup object of the page. :return: The PersonalDetails object. """ + personal_details = dict() personal_details_section = soup.find( "div", attrs={"class": "elem-info-wrapper box-shadow clearfix"} @@ -111,6 +112,8 @@ def create_personal_details_object_from_profile_page( value = None if value == "NA" else value personal_details[key] = value + image_b64_encoded = soup.find("img", attrs={"class": "media-object"})["src"] + return PersonalDetails( name=personal_details["name"], prn=personal_details["pesu_id"], @@ -119,6 +122,7 @@ def create_personal_details_object_from_profile_page( branch=personal_details["branch"], semester=personal_details["semester"], section=personal_details["section"], + img=image_b64_encoded, email=personal_details["email_id"], mobile=personal_details["contact_no"], aadhar=personal_details["aadhar_no"], From be409b5cd6267e0d73435197e20e2deb13b5b720 Mon Sep 17 00:00:00 2001 From: aditeyabaral Date: Tue, 23 Apr 2024 01:31:46 +0530 Subject: [PATCH 2/3] Add fetching of announcements --- pesuacademy/models/__init__.py | 1 + pesuacademy/models/announcement.py | 27 +++++++ pesuacademy/pages/__init__.py | 1 + pesuacademy/pages/announcements.py | 113 +++++++++++++++++++++++++++++ pesuacademy/pesuacademy.py | 11 ++- pesuacademy/util/page.py | 4 + 6 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 pesuacademy/models/announcement.py create mode 100644 pesuacademy/pages/announcements.py diff --git a/pesuacademy/models/__init__.py b/pesuacademy/models/__init__.py index aff3c33..fbd99d7 100644 --- a/pesuacademy/models/__init__.py +++ b/pesuacademy/models/__init__.py @@ -1,3 +1,4 @@ +from .announcement import Announcement from .course import Course, Attendance from .profile import ( Profile, diff --git a/pesuacademy/models/announcement.py b/pesuacademy/models/announcement.py new file mode 100644 index 0000000..ad4b611 --- /dev/null +++ b/pesuacademy/models/announcement.py @@ -0,0 +1,27 @@ +import datetime +from typing import Optional + + +class AnnouncementFile: + def __init__(self, name: str, content: bytes): + self.name = name + self.content = content + + +class Announcement: + def __init__( + self, + title: str, + date: datetime.date, + content: str, + img: str = None, + files: Optional[list[AnnouncementFile]] = None, + ): + self.title = title + self.date = date + self.content = content + self.img = img + self.files = files + + def __str__(self): + return f"{self.__dict__}" diff --git a/pesuacademy/pages/__init__.py b/pesuacademy/pages/__init__.py index c667d39..d0e5915 100644 --- a/pesuacademy/pages/__init__.py +++ b/pesuacademy/pages/__init__.py @@ -1,3 +1,4 @@ +from .announcements import AnnouncementPageHandler from .attendance import AttendancePageHandler from .courses import CoursesPageHandler from .profile import ProfilePageHandler diff --git a/pesuacademy/pages/announcements.py b/pesuacademy/pages/announcements.py new file mode 100644 index 0000000..c9cf5aa --- /dev/null +++ b/pesuacademy/pages/announcements.py @@ -0,0 +1,113 @@ +import datetime +import re + +import urllib3 + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +import requests_html +from bs4 import BeautifulSoup + +from pesuacademy.models.announcement import Announcement, AnnouncementFile + + +class AnnouncementPageHandler: + @staticmethod + def get_announcement_by_id( + session: requests_html.HTMLSession, csrf_token: str, announcement_id: str + ) -> Announcement: + url = "https://www.pesuacademy.com/Academy/s/studentProfilePESUAdmin" + data = { + "controllerMode": "6411", + "actionType": "4", + "AnnouncementId": announcement_id, + "menuId": "667", + } + headers = { + "accept": "*/*", + "accept-language": "en-IN,en-US;q=0.9,en-GB;q=0.8,en;q=0.7", + "content-type": "application/x-www-form-urlencoded", + "origin": "https://www.pesuacademy.com", + "priority": "u=1, i", + "referer": "https://www.pesuacademy.com/Academy/s/studentProfilePESU", + "sec-ch-ua": '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"Windows"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "x-csrf-token": csrf_token, + "x-requested-with": "XMLHttpRequest", + } + + response = session.post(url, data=data, headers=headers) + if response.status_code != 200: + data["actionType"] = "6" + response = session.post(url, data=data, headers=headers) + + soup = BeautifulSoup(response.text, "lxml") + + title = soup.find("h4", class_="text-info").text.strip() + date = soup.find("span", class_="text-muted text-date pull-right").text.strip() + date = datetime.datetime.strptime(date, "%d-%B-%Y").date() + + content_tag = soup.find("div", class_="col-md-12") + if content_tag is None: + content_tag = soup.find("div", class_="col-md-8") + paragraph_or_list_tags = content_tag.find_all(["p", "li"]) + content = "\n".join([tag.text.strip() for tag in paragraph_or_list_tags]) + + img_tag = soup.find("img", class_="img-responsive") + img = img_tag.attrs["src"] if img_tag else None + + attachment_tags = [ + tag + for tag in content_tag.find_all("a") + if tag.text.strip().endswith(".pdf") + ] + attachments = list() + for attachment_tag in attachment_tags: + attachment_name = attachment_tag.text.strip() + pattern = re.compile(r"handleDownloadAnoncemntdoc\('(\d+)'\)") + attachment_id = re.findall(pattern, attachment_tag.attrs["href"])[0] + response = session.get( + f"https://pesuacademy.com/Academy/s/studentProfilePESUAdmin/downloadAnoncemntdoc/{attachment_id}", + headers={"x-csrf-token": csrf_token}, + verify=False, + ) + attachment_bytes = response.content + attachments.append( + AnnouncementFile(name=attachment_name, content=attachment_bytes) + ) + + return Announcement( + title=title, date=date, content=content, img=img, files=attachments + ) + + def get_page( + self, session: requests_html.HTMLSession, csrf_token: str + ) -> list[Announcement]: + url = "https://www.pesuacademy.com/Academy/s/studentProfilePESUAdmin" + query = { + "menuId": "667", + "controllerMode": "6411", + "actionType": "5", + "_": str(int(datetime.datetime.now().timestamp() * 1000)), + } + response = session.get(url, allow_redirects=False, params=query) + if response.status_code != 200: + raise ConnectionError("Unable to fetch announcement data.") + soup = BeautifulSoup(response.text, "lxml") + + announcement_ids = soup.find_all("a", class_="pull-right readmorelink") + pattern = re.compile(r"handleShowMoreAnnouncement\(\d+, \d+,(\d+)\)") + announcement_ids = [ + pattern.match(ann.attrs.get("onclick")).group(1) for ann in announcement_ids + ] + + announcements = list() + for announcement_id in announcement_ids: + announcements.append( + self.get_announcement_by_id(session, csrf_token, announcement_id) + ) + return announcements diff --git a/pesuacademy/pesuacademy.py b/pesuacademy/pesuacademy.py index 9032018..e066dfb 100644 --- a/pesuacademy/pesuacademy.py +++ b/pesuacademy/pesuacademy.py @@ -6,7 +6,7 @@ from pesuacademy import util from pesuacademy.util.page import PageHandler from .exceptions import CSRFTokenError, AuthenticationError -from .models import Profile, ClassAndSectionInfo, Course +from .models import Profile, ClassAndSectionInfo, Course, Announcement class PESUAcademy: @@ -155,3 +155,12 @@ def attendance(self, semester: Optional[int] = None) -> dict[int, list[Course]]: raise AuthenticationError("You need to authenticate first.") attendance_info = self.page_handler.get_attendance(semester) return attendance_info + + def announcements(self) -> list[Announcement]: + """ + Get the announcements from the PESU Academy website. + + :return: The list of announcements. + """ + announcements = self.page_handler.get_announcements(self._csrf_token) + return announcements diff --git a/pesuacademy/util/page.py b/pesuacademy/util/page.py index c46018a..8e77678 100644 --- a/pesuacademy/util/page.py +++ b/pesuacademy/util/page.py @@ -14,6 +14,7 @@ def __init__(self, session: requests_html.HTMLSession): self.course_page_handler = pages.CoursesPageHandler() self.attendance_page_handler = pages.AttendancePageHandler() self.profile_page_handler = pages.ProfilePageHandler() + self.announcement_handler = pages.AnnouncementPageHandler() def set_semester_id_to_number_mapping(self, csrf_token: str): try: @@ -80,3 +81,6 @@ def get_courses(self, semester: Optional[int] = None): def get_attendance(self, semester: Optional[int] = None): semester_ids = self.get_semester_ids_from_semester_number(semester) return self.attendance_page_handler.get_page(self.__session, semester_ids) + + def get_announcements(self, csrf_token: str): + return self.announcement_handler.get_page(self.__session, csrf_token) From f6b708aee41adce0556d8a3856443adcf87d96ec Mon Sep 17 00:00:00 2001 From: aditeyabaral Date: Tue, 23 Apr 2024 01:43:19 +0530 Subject: [PATCH 3/3] Add arguments to filter announcements --- pesuacademy/pages/announcements.py | 17 ++++++++++++++--- pesuacademy/pesuacademy.py | 12 ++++++++++-- pesuacademy/util/page.py | 15 +++++++++++++-- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/pesuacademy/pages/announcements.py b/pesuacademy/pages/announcements.py index c9cf5aa..e9c4588 100644 --- a/pesuacademy/pages/announcements.py +++ b/pesuacademy/pages/announcements.py @@ -1,5 +1,6 @@ import datetime import re +from typing import Optional import urllib3 @@ -85,7 +86,11 @@ def get_announcement_by_id( ) def get_page( - self, session: requests_html.HTMLSession, csrf_token: str + self, + session: requests_html.HTMLSession, + csrf_token: str, + start_date: Optional[datetime.date] = None, + end_date: Optional[datetime.date] = None, ) -> list[Announcement]: url = "https://www.pesuacademy.com/Academy/s/studentProfilePESUAdmin" query = { @@ -107,7 +112,13 @@ def get_page( announcements = list() for announcement_id in announcement_ids: - announcements.append( - self.get_announcement_by_id(session, csrf_token, announcement_id) + announcement = self.get_announcement_by_id( + session, csrf_token, announcement_id ) + if start_date and announcement.date < start_date: + continue + if end_date and announcement.date > end_date: + continue + announcements.append(announcement) + announcements.sort(key=lambda x: x.date, reverse=True) return announcements diff --git a/pesuacademy/pesuacademy.py b/pesuacademy/pesuacademy.py index e066dfb..1675a50 100644 --- a/pesuacademy/pesuacademy.py +++ b/pesuacademy/pesuacademy.py @@ -156,11 +156,19 @@ def attendance(self, semester: Optional[int] = None) -> dict[int, list[Course]]: attendance_info = self.page_handler.get_attendance(semester) return attendance_info - def announcements(self) -> list[Announcement]: + def announcements( + self, start_date: Optional[str] = None, end_date: Optional[str] = None + ) -> list[Announcement]: """ Get the announcements from the PESU Academy website. + :param start_date: The start date of the announcements to fetch in "yyyy-mm-dd" format. If not provided, all + announcements from the beginning are fetched. + :param end_date: The end date of the announcements to fetch in "yyyy-mm-dd" format. If not provided, all + announcements till the end are fetched. :return: The list of announcements. """ - announcements = self.page_handler.get_announcements(self._csrf_token) + announcements = self.page_handler.get_announcements( + self._csrf_token, start_date, end_date + ) return announcements diff --git a/pesuacademy/util/page.py b/pesuacademy/util/page.py index 8e77678..626cec5 100644 --- a/pesuacademy/util/page.py +++ b/pesuacademy/util/page.py @@ -82,5 +82,16 @@ def get_attendance(self, semester: Optional[int] = None): semester_ids = self.get_semester_ids_from_semester_number(semester) return self.attendance_page_handler.get_page(self.__session, semester_ids) - def get_announcements(self, csrf_token: str): - return self.announcement_handler.get_page(self.__session, csrf_token) + def get_announcements( + self, + csrf_token: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + ): + if start_date is not None: + start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d").date() + if end_date is not None: + end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d").date() + return self.announcement_handler.get_page( + self.__session, csrf_token, start_date, end_date + )