From 4b77e0ab031f8a2bce383d27fbd92e0b74025575 Mon Sep 17 00:00:00 2001 From: Aditeya Baral Date: Thu, 25 Apr 2024 23:59:16 +0530 Subject: [PATCH 1/2] Add announcements and profile picture to Profile (#9) * Add profile picture to profile object * Add fetching of announcements * Add arguments to filter announcements --- pesuacademy/models/__init__.py | 1 + pesuacademy/models/announcement.py | 27 +++++++ pesuacademy/models/profile.py | 2 + pesuacademy/pages/__init__.py | 1 + pesuacademy/pages/announcements.py | 124 +++++++++++++++++++++++++++++ pesuacademy/pesuacademy.py | 19 ++++- pesuacademy/util/page.py | 15 ++++ pesuacademy/util/profile.py | 4 + 8 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 pesuacademy/models/announcement.py create mode 100644 pesuacademy/pages/announcements.py diff --git a/pesuacademy/models/__init__.py b/pesuacademy/models/__init__.py index aff3c33..fbd99d7 100644 --- a/pesuacademy/models/__init__.py +++ b/pesuacademy/models/__init__.py @@ -1,3 +1,4 @@ +from .announcement import Announcement from .course import Course, Attendance from .profile import ( Profile, diff --git a/pesuacademy/models/announcement.py b/pesuacademy/models/announcement.py new file mode 100644 index 0000000..ad4b611 --- /dev/null +++ b/pesuacademy/models/announcement.py @@ -0,0 +1,27 @@ +import datetime +from typing import Optional + + +class AnnouncementFile: + def __init__(self, name: str, content: bytes): + self.name = name + self.content = content + + +class Announcement: + def __init__( + self, + title: str, + date: datetime.date, + content: str, + img: str = None, + files: Optional[list[AnnouncementFile]] = None, + ): + self.title = title + self.date = date + self.content = content + self.img = img + self.files = files + + def __str__(self): + return f"{self.__dict__}" diff --git a/pesuacademy/models/profile.py b/pesuacademy/models/profile.py index c1d34d5..783cd46 100644 --- a/pesuacademy/models/profile.py +++ b/pesuacademy/models/profile.py @@ -38,6 +38,7 @@ def __init__( branch: str, semester: str, section: str, + img: str, program: Optional[str] = None, email: Optional[str] = None, mobile: Optional[str] = None, @@ -53,6 +54,7 @@ def __init__( self.section = section self.email = email self.mobile = mobile + self.img = img self.aadhar = aadhar self.name_as_in_aadhar = name_as_in_aadhar diff --git a/pesuacademy/pages/__init__.py b/pesuacademy/pages/__init__.py index c667d39..d0e5915 100644 --- a/pesuacademy/pages/__init__.py +++ b/pesuacademy/pages/__init__.py @@ -1,3 +1,4 @@ +from .announcements import AnnouncementPageHandler from .attendance import AttendancePageHandler from .courses import CoursesPageHandler from .profile import ProfilePageHandler diff --git a/pesuacademy/pages/announcements.py b/pesuacademy/pages/announcements.py new file mode 100644 index 0000000..e9c4588 --- /dev/null +++ b/pesuacademy/pages/announcements.py @@ -0,0 +1,124 @@ +import datetime +import re +from typing import Optional + +import urllib3 + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +import requests_html +from bs4 import BeautifulSoup + +from pesuacademy.models.announcement import Announcement, AnnouncementFile + + +class AnnouncementPageHandler: + @staticmethod + def get_announcement_by_id( + session: requests_html.HTMLSession, csrf_token: str, announcement_id: str + ) -> Announcement: + url = "https://www.pesuacademy.com/Academy/s/studentProfilePESUAdmin" + data = { + "controllerMode": "6411", + "actionType": "4", + "AnnouncementId": announcement_id, + "menuId": "667", + } + headers = { + "accept": "*/*", + "accept-language": "en-IN,en-US;q=0.9,en-GB;q=0.8,en;q=0.7", + "content-type": "application/x-www-form-urlencoded", + "origin": "https://www.pesuacademy.com", + "priority": "u=1, i", + "referer": "https://www.pesuacademy.com/Academy/s/studentProfilePESU", + "sec-ch-ua": '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"Windows"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "x-csrf-token": csrf_token, + "x-requested-with": "XMLHttpRequest", + } + + response = session.post(url, data=data, headers=headers) + if response.status_code != 200: + data["actionType"] = "6" + response = session.post(url, data=data, headers=headers) + + soup = BeautifulSoup(response.text, "lxml") + + title = soup.find("h4", class_="text-info").text.strip() + date = soup.find("span", class_="text-muted text-date pull-right").text.strip() + date = datetime.datetime.strptime(date, "%d-%B-%Y").date() + + content_tag = soup.find("div", class_="col-md-12") + if content_tag is None: + content_tag = soup.find("div", class_="col-md-8") + paragraph_or_list_tags = content_tag.find_all(["p", "li"]) + content = "\n".join([tag.text.strip() for tag in paragraph_or_list_tags]) + + img_tag = soup.find("img", class_="img-responsive") + img = img_tag.attrs["src"] if img_tag else None + + attachment_tags = [ + tag + for tag in content_tag.find_all("a") + if tag.text.strip().endswith(".pdf") + ] + attachments = list() + for attachment_tag in attachment_tags: + attachment_name = attachment_tag.text.strip() + pattern = re.compile(r"handleDownloadAnoncemntdoc\('(\d+)'\)") + attachment_id = re.findall(pattern, attachment_tag.attrs["href"])[0] + response = session.get( + f"https://pesuacademy.com/Academy/s/studentProfilePESUAdmin/downloadAnoncemntdoc/{attachment_id}", + headers={"x-csrf-token": csrf_token}, + verify=False, + ) + attachment_bytes = response.content + attachments.append( + AnnouncementFile(name=attachment_name, content=attachment_bytes) + ) + + return Announcement( + title=title, date=date, content=content, img=img, files=attachments + ) + + def get_page( + self, + session: requests_html.HTMLSession, + csrf_token: str, + start_date: Optional[datetime.date] = None, + end_date: Optional[datetime.date] = None, + ) -> list[Announcement]: + url = "https://www.pesuacademy.com/Academy/s/studentProfilePESUAdmin" + query = { + "menuId": "667", + "controllerMode": "6411", + "actionType": "5", + "_": str(int(datetime.datetime.now().timestamp() * 1000)), + } + response = session.get(url, allow_redirects=False, params=query) + if response.status_code != 200: + raise ConnectionError("Unable to fetch announcement data.") + soup = BeautifulSoup(response.text, "lxml") + + announcement_ids = soup.find_all("a", class_="pull-right readmorelink") + pattern = re.compile(r"handleShowMoreAnnouncement\(\d+, \d+,(\d+)\)") + announcement_ids = [ + pattern.match(ann.attrs.get("onclick")).group(1) for ann in announcement_ids + ] + + announcements = list() + for announcement_id in announcement_ids: + announcement = self.get_announcement_by_id( + session, csrf_token, announcement_id + ) + if start_date and announcement.date < start_date: + continue + if end_date and announcement.date > end_date: + continue + announcements.append(announcement) + announcements.sort(key=lambda x: x.date, reverse=True) + return announcements diff --git a/pesuacademy/pesuacademy.py b/pesuacademy/pesuacademy.py index 9032018..1675a50 100644 --- a/pesuacademy/pesuacademy.py +++ b/pesuacademy/pesuacademy.py @@ -6,7 +6,7 @@ from pesuacademy import util from pesuacademy.util.page import PageHandler from .exceptions import CSRFTokenError, AuthenticationError -from .models import Profile, ClassAndSectionInfo, Course +from .models import Profile, ClassAndSectionInfo, Course, Announcement class PESUAcademy: @@ -155,3 +155,20 @@ def attendance(self, semester: Optional[int] = None) -> dict[int, list[Course]]: raise AuthenticationError("You need to authenticate first.") attendance_info = self.page_handler.get_attendance(semester) return attendance_info + + def announcements( + self, start_date: Optional[str] = None, end_date: Optional[str] = None + ) -> list[Announcement]: + """ + Get the announcements from the PESU Academy website. + + :param start_date: The start date of the announcements to fetch in "yyyy-mm-dd" format. If not provided, all + announcements from the beginning are fetched. + :param end_date: The end date of the announcements to fetch in "yyyy-mm-dd" format. If not provided, all + announcements till the end are fetched. + :return: The list of announcements. + """ + announcements = self.page_handler.get_announcements( + self._csrf_token, start_date, end_date + ) + return announcements diff --git a/pesuacademy/util/page.py b/pesuacademy/util/page.py index c46018a..626cec5 100644 --- a/pesuacademy/util/page.py +++ b/pesuacademy/util/page.py @@ -14,6 +14,7 @@ def __init__(self, session: requests_html.HTMLSession): self.course_page_handler = pages.CoursesPageHandler() self.attendance_page_handler = pages.AttendancePageHandler() self.profile_page_handler = pages.ProfilePageHandler() + self.announcement_handler = pages.AnnouncementPageHandler() def set_semester_id_to_number_mapping(self, csrf_token: str): try: @@ -80,3 +81,17 @@ def get_courses(self, semester: Optional[int] = None): def get_attendance(self, semester: Optional[int] = None): semester_ids = self.get_semester_ids_from_semester_number(semester) return self.attendance_page_handler.get_page(self.__session, semester_ids) + + def get_announcements( + self, + csrf_token: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + ): + if start_date is not None: + start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d").date() + if end_date is not None: + end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d").date() + return self.announcement_handler.get_page( + self.__session, csrf_token, start_date, end_date + ) diff --git a/pesuacademy/util/profile.py b/pesuacademy/util/profile.py index df5e6f4..4cc0707 100644 --- a/pesuacademy/util/profile.py +++ b/pesuacademy/util/profile.py @@ -70,6 +70,7 @@ def create_personal_details_object_from_profile_page( :param soup: The BeautifulSoup object of the page. :return: The PersonalDetails object. """ + personal_details = dict() personal_details_section = soup.find( "div", attrs={"class": "elem-info-wrapper box-shadow clearfix"} @@ -111,6 +112,8 @@ def create_personal_details_object_from_profile_page( value = None if value == "NA" else value personal_details[key] = value + image_b64_encoded = soup.find("img", attrs={"class": "media-object"})["src"] + return PersonalDetails( name=personal_details["name"], prn=personal_details["pesu_id"], @@ -119,6 +122,7 @@ def create_personal_details_object_from_profile_page( branch=personal_details["branch"], semester=personal_details["semester"], section=personal_details["section"], + img=image_b64_encoded, email=personal_details["email_id"], mobile=personal_details["contact_no"], aadhar=personal_details["aadhar_no"], From d37f5afd6ea34456e2bdbbed82c040c159f8005b Mon Sep 17 00:00:00 2001 From: Suraj B M <100959814+silicoflare@users.noreply.github.com> Date: Tue, 30 Apr 2024 13:00:15 +0530 Subject: [PATCH 2/2] add function to get seating info data (#10) * build: add virtualenv directory to gitignore * feat: create model for seating info * feat: create seating info page handler * feat: create seating info utility function * feat: add seating info exports * feat: add seating info handler to main package * Minor refactor and handle no seating bug * Reformat code --------- Co-authored-by: aditeyabaral --- .gitignore | 3 +- pesuacademy/models/__init__.py | 1 + pesuacademy/models/seating_information.py | 19 ++++++++ pesuacademy/pages/__init__.py | 1 + pesuacademy/pages/seating_information.py | 58 +++++++++++++++++++++++ pesuacademy/pesuacademy.py | 12 +++++ pesuacademy/util/page.py | 3 ++ 7 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 pesuacademy/models/seating_information.py create mode 100644 pesuacademy/pages/seating_information.py diff --git a/.gitignore b/.gitignore index 6f34aad..146d240 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ __pycache__/ build/ dist/ *.egg-info/ -test* \ No newline at end of file +test* +.venv \ No newline at end of file diff --git a/pesuacademy/models/__init__.py b/pesuacademy/models/__init__.py index fbd99d7..525e957 100644 --- a/pesuacademy/models/__init__.py +++ b/pesuacademy/models/__init__.py @@ -10,3 +10,4 @@ AddressDetails, QualifyingExamination, ) +from .seating_information import SeatingInformation diff --git a/pesuacademy/models/seating_information.py b/pesuacademy/models/seating_information.py new file mode 100644 index 0000000..7ffa414 --- /dev/null +++ b/pesuacademy/models/seating_information.py @@ -0,0 +1,19 @@ +class SeatingInformation: + def __init__( + self, + name: str, + course_code: str, + date: str, + time: str, + terminal: str, + block: str, + ): + self.name = name + self.course_code = course_code + self.date = date + self.time = time + self.terminal = terminal + self.block = block + + def __str__(self): + return f"{self.__dict__}" diff --git a/pesuacademy/pages/__init__.py b/pesuacademy/pages/__init__.py index d0e5915..87b6160 100644 --- a/pesuacademy/pages/__init__.py +++ b/pesuacademy/pages/__init__.py @@ -2,3 +2,4 @@ from .attendance import AttendancePageHandler from .courses import CoursesPageHandler from .profile import ProfilePageHandler +from .seating_information import SeatingInformationHandler diff --git a/pesuacademy/pages/seating_information.py b/pesuacademy/pages/seating_information.py new file mode 100644 index 0000000..131984a --- /dev/null +++ b/pesuacademy/pages/seating_information.py @@ -0,0 +1,58 @@ +import datetime + +import requests_html +from bs4 import BeautifulSoup + +from pesuacademy.models import SeatingInformation + + +class SeatingInformationHandler: + @staticmethod + def get_seating_information_from_page( + soup: BeautifulSoup, + ) -> list[SeatingInformation]: + info_table = soup.find("table", attrs={"id": "seatinginfo"}) + tablebody = info_table.find("tbody") + tablerows = tablebody.find_all("tr") + seating_info = list() + for row in tablerows: + columns = row.find_all("td") + assn_name = columns[0].text.strip() + course_code = columns[1].text.strip() + date = columns[2].text.strip() + time = columns[3].text.strip() + terminal = columns[4].text.strip() + block = columns[5].text.strip() + seating_info.append( + SeatingInformation(assn_name, course_code, date, time, terminal, block) + ) + return seating_info + + @staticmethod + def get_page(session: requests_html.HTMLSession) -> list[SeatingInformation]: + try: + profile_url = ( + "https://www.pesuacademy.com/Academy/s/studentProfilePESUAdmin" + ) + query = { + "menuId": "655", + "url": "studentProfilePESUAdmin", + "controllerMode": "6404", + "actionType": "5", + "id": "0", + "selectedData": "0", + "_": str(int(datetime.datetime.now().timestamp() * 1000)), + } + response = session.get(profile_url, allow_redirects=False, params=query) + if response.status_code != 200: + raise ConnectionError("Unable to fetch seating info.") + soup = BeautifulSoup(response.text, "lxml") + if ( + (no_seating_tag := soup.find("h5")) is not None + and no_seating_tag.text == "No Test Seating Info is available" + ): + return [] + else: + return SeatingInformationHandler.get_seating_information_from_page(soup) + except Exception: + raise ConnectionError("Unable to fetch seating info.") diff --git a/pesuacademy/pesuacademy.py b/pesuacademy/pesuacademy.py index 1675a50..bf5ced0 100644 --- a/pesuacademy/pesuacademy.py +++ b/pesuacademy/pesuacademy.py @@ -4,6 +4,7 @@ from bs4 import BeautifulSoup from pesuacademy import util +from pesuacademy.models.seating_information import SeatingInformation from pesuacademy.util.page import PageHandler from .exceptions import CSRFTokenError, AuthenticationError from .models import Profile, ClassAndSectionInfo, Course, Announcement @@ -156,6 +157,17 @@ def attendance(self, semester: Optional[int] = None) -> dict[int, list[Course]]: attendance_info = self.page_handler.get_attendance(semester) return attendance_info + def seating_information(self) -> list[SeatingInformation]: + """ + Get the seating information of the currently authenticated user. + + :return: The seating information. + """ + if not self._authenticated: + raise AuthenticationError("You need to authenticate first.") + seating_info = self.page_handler.get_seating_info() + return seating_info + def announcements( self, start_date: Optional[str] = None, end_date: Optional[str] = None ) -> list[Announcement]: diff --git a/pesuacademy/util/page.py b/pesuacademy/util/page.py index 626cec5..7eb6b58 100644 --- a/pesuacademy/util/page.py +++ b/pesuacademy/util/page.py @@ -82,6 +82,9 @@ def get_attendance(self, semester: Optional[int] = None): semester_ids = self.get_semester_ids_from_semester_number(semester) return self.attendance_page_handler.get_page(self.__session, semester_ids) + def get_seating_info(self): + return pages.SeatingInformationHandler.get_page(self.__session) + def get_announcements( self, csrf_token: str,