From 66c8e5ebd500e384a48bf0779d155b05271e59cd Mon Sep 17 00:00:00 2001 From: Isaac Beh Date: Sat, 27 Jul 2024 13:51:39 +1000 Subject: [PATCH] Whatsdue: fixed to work with the new JAC system (#207) * Whatsdue: fixed to work with the new JAC system (compared to the old ECPS) * Fixed docstrings, typos and better naming --- uqcsbot/utils/uq_course_utils.py | 239 ++++++++++++++----------------- uqcsbot/whatsdue.py | 53 ++++--- 2 files changed, 138 insertions(+), 154 deletions(-) diff --git a/uqcsbot/utils/uq_course_utils.py b/uqcsbot/utils/uq_course_utils.py index f879d79..25fab8e 100644 --- a/uqcsbot/utils/uq_course_utils.py +++ b/uqcsbot/utils/uq_course_utils.py @@ -3,7 +3,7 @@ from datetime import datetime from dateutil import parser from bs4 import BeautifulSoup, element -from typing import List, Dict, Optional, Literal, Tuple +from typing import Optional, Literal from dataclasses import dataclass import json import re @@ -27,7 +27,7 @@ class Offering: CampusType = Literal["St Lucia", "Gatton", "Herston"] # The codes used internally within UQ systems - campus_codes: Dict[CampusType, str] = { + campus_codes: dict[CampusType, str] = { "St Lucia": "STLUC", "Gatton": "GATTN", "Herston": "HERST", @@ -35,7 +35,7 @@ class Offering: ModeType = Literal["Internal", "External", "Flexible Delivery", "Intensive"] # The codes used internally within UQ systems - mode_codes: Dict[ModeType, str] = { + mode_codes: dict[ModeType, str] = { "Internal": "IN", "External": "EX", "Flexible Delivery": "FD", @@ -43,7 +43,7 @@ class Offering: } SemesterType = Literal["1", "2", "Summer"] - semester_codes: Dict[SemesterType, int] = {"1": 1, "2": 2, "Summer": 3} + semester_codes: dict[SemesterType, int] = {"1": 1, "2": 2, "Summer": 3} semester: SemesterType campus: CampusType @@ -68,20 +68,20 @@ def __init__( def get_semester_code(self) -> int: """ - Returns the code used interally within UQ for the semester of the offering. + Returns the code used internally within UQ for the semester of the offering. """ return self.semester_codes[self.semester] def get_campus_code(self) -> str: """ - Returns the code used interally within UQ for the campus of the offering. + Returns the code used internally within UQ for the campus of the offering. """ self.campus return self.campus_codes[self.campus] def get_mode_code(self) -> str: """ - Returns the code used interally within UQ for the mode of the offering. + Returns the code used internally within UQ for the mode of the offering. """ return self.mode_codes[self.mode] @@ -111,54 +111,58 @@ def estimate_current_semester() -> SemesterType: @dataclass class AssessmentItem: course_name: str + category: str task: str - due_date: str + task_details_url: str + due_date: str # This often also contains a lot of description weight: str - def get_parsed_due_date(self): + def get_parsed_due_date(self) -> Optional[tuple[datetime, datetime]]: """ - Returns the parsed due date for the given assessment item as a datetime - object. If the date cannot be parsed, a DateSyntaxException is raised. + Returns the minimum and maximum date for a particular assessment item, or None if no dates can be parsed. These will be the same if only a single date can be parsed, or will be the earliest and latest dates if multiple dates can be parsed (e.g. assignment series, or when blocks of dates are scheduled as the due dates for talks). """ - if self.due_date == "Examination Period": + if self.due_date.startswith("End of Semester Exam Period"): return get_current_exam_period() parser_info = parser.parserinfo(dayfirst=True) - try: - # If a date range is detected, attempt to split into start and end - # dates. Else, attempt to just parse the whole thing. - if " - " in self.due_date: - start_date, end_date = self.due_date.split(" - ", 1) - start_datetime = parser.parse(start_date, parser_info) - end_datetime = parser.parse(end_date, parser_info) - return start_datetime, end_datetime - due_datetime = parser.parse(self.due_date, parser_info) - return due_datetime, due_datetime - except Exception: - raise DateSyntaxException(self.due_date, self.course_name) - - def is_after(self, cutoff: datetime): + potential_date_strings: list[str] = re.findall( + r"\d\d?/\d\d?/\d\d\d\d( \d\d?(:\d\d)?( [ap]m)?)?", self.due_date + ) + dates: list[datetime] = [] + + for potential_date_string in potential_date_strings: + try: + date = parser.parse(potential_date_string, parser_info) + dates.append(date) + except Exception: + # No need to do anything if the date cannot be parsed + pass + + if dates: + return min(dates), max(dates) + return None + + def is_after(self, cutoff: datetime) -> bool: """ - Returns whether the assessment occurs after the given cutoff. + Returns whether the assessment ends after the given cutoff. """ - try: - start_datetime, end_datetime = self.get_parsed_due_date() - except DateSyntaxException: + date_range = self.get_parsed_due_date() + if date_range is None: # If we can't parse a date, we're better off keeping it just in case. return True - return end_datetime >= cutoff if end_datetime else start_datetime >= cutoff - def is_before(self, cutoff: datetime): + _, end_datetime = date_range + return end_datetime >= cutoff + + def is_before(self, cutoff: datetime) -> bool: """ - Returns whether the assessment occurs before the given cutoff. + Returns whether the assessment starts before the given cutoff. """ - try: - start_datetime, _ = self.get_parsed_due_date() - except DateSyntaxException: - # TODO bot.logger.error(e.message) + date_range = self.get_parsed_due_date() + if date_range is None: # If we can't parse a date, we're better off keeping it just in case. - # TODO(mitch): Keep track of these instances to attempt to accurately - # parse them in future. Will require manual detection + parsing. return True + + start_datetime, _ = date_range return start_datetime <= cutoff def get_weight_as_int(self) -> Optional[int]: @@ -171,18 +175,6 @@ def get_weight_as_int(self) -> Optional[int]: return None -class DateSyntaxException(Exception): - """ - Raised when an unparsable date syntax is encountered. - """ - - def __init__(self, date: str, course_name: str): - self.message = f"Could not parse date '{date}' for course '{course_name}'." - self.date = date - self.course_name = course_name - super().__init__(self.message, self.date, self.course_name) - - class CourseNotFoundException(Exception): """ Raised when a given course cannot be found for UQ. @@ -214,15 +206,27 @@ class AssessmentNotFoundException(Exception): Raised when the assessment table cannot be found for assess page. """ - def __init__(self, course_names: List[str], offering: Optional[Offering] = None): + def __init__(self, course_name: str, offering: Optional[Offering] = None): if offering is None: - self.message = ( - f"Could not find the assessment table for '{', '.join(course_names)}'." - ) + self.message = f"Could not find the assessment table for '{course_name}'." else: - self.message = f"Could not find the assessment table for '{', '.join(course_names)}' during semester {offering.semester} at {offering.campus} done in mode '{offering.mode}'." - self.course_names = course_names - super().__init__(self.message, self.course_names) + self.message = f"Could not find the assessment table for '{course_name}' during semester {offering.semester} at {offering.campus} done in mode '{offering.mode}'." + self.course_name = course_name + super().__init__(self.message, self.course_name) + + +class AssessmentNotParseableException(Exception): + """ + Raised when the assessment cannot be parsed from the ECP. + """ + + def __init__(self, course_name: str, course_profile_url: str): + self.message = ( + f"Could not parse an assessment for '{course_name}': {course_profile_url}" + ) + self.course_name = course_name + self.course_profile_url = course_profile_url + super().__init__(self.message, self.course_name) class HttpException(Exception): @@ -239,10 +243,10 @@ def __init__(self, url: str, status_code: int): def get_uq_request( - url: str, params: Optional[Dict[str, str]] = None + url: str, params: Optional[dict[str, str]] = None ) -> requests.Response: """ - Handles specific error handelling and header provision for requests.get to + Handles specific error handeling and header provision for requests.get to uq course urls """ headers = {"User-Agent": "UQCS"} @@ -296,17 +300,7 @@ def get_course_profile_url( return url -def get_course_profile_id(course_name: str, offering: Optional[Offering] = None) -> int: - """ - Returns the ID to the latest course profile for the given course. - """ - profile_url = get_course_profile_url(course_name, offering=offering) - # The profile url looks like this - # https://course-profiles.uq.edu.au/student_section_loader/section_1/100728 - return int(profile_url[profile_url.rindex("/") + 1 :]) - - -def get_current_exam_period(): +def get_current_exam_period() -> tuple[datetime, datetime]: """ Returns the start and end datetimes for the current semester's exam period. @@ -332,52 +326,30 @@ def get_current_exam_period(): return start_datetime, end_datetime -def get_course_assessment_page( - course_names: List[str], offering: Optional[Offering] -) -> str: +def get_course_assessment_items( + course_name: str, + offering: Offering, +) -> list[AssessmentItem]: """ - Determines the course ids from the course names and returns the - url to the assessment table for the provided courses + Returns all the assessment for the given course. """ - profile_ids = map( - lambda course: str(get_course_profile_id(course, offering=offering)), - course_names, - ) - return BASE_ASSESSMENT_URL + ",".join(profile_ids) - + course_profile_url = get_course_profile_url(course_name, offering=offering) + course_assessment_url = course_profile_url + "#assessment" -def get_course_assessment( - course_names: List[str], - cutoff: Tuple[Optional[datetime], Optional[datetime]] = (None, None), - assessment_url: Optional[str] = None, - offering: Optional[Offering] = None, -) -> List[AssessmentItem]: - """ - Returns all the course assessment for the given - courses that occur after the given cutoff. - """ - if assessment_url is None: - joined_assessment_url = get_course_assessment_page(course_names, offering) - else: - joined_assessment_url = assessment_url - http_response = get_uq_request(joined_assessment_url) + http_response = get_uq_request(course_assessment_url) if http_response.status_code != requests.codes.ok: - raise HttpException(joined_assessment_url, http_response.status_code) + raise HttpException(course_assessment_url, http_response.status_code) html = BeautifulSoup(http_response.content, "html.parser") - assessment_table = html.find("table", class_="tblborder") + + assessment_table = html.find("div", class_="assessment-summary-table") if not isinstance(assessment_table, element.Tag): - raise AssessmentNotFoundException(course_names, offering) + raise AssessmentNotFoundException(course_name, offering) # Start from 1st index to skip over the row containing column names. - assessment = assessment_table.findAll("tr")[1:] - parsed_assessment = map(get_parsed_assessment_item, assessment) - # If no cutoff is specified, set cutoff to UNIX epoch (i.e. filter nothing). - cutoff_min = cutoff[0] or datetime.min - cutoff_max = cutoff[1] or datetime.max - filtered_assessment = filter( - lambda item: item.is_after(cutoff_min) and item.is_before(cutoff_max), - parsed_assessment, - ) - return list(filtered_assessment) + assessment_table = assessment_table.findAll("tr")[1:] + return [ + get_parsed_assessment_item(row, course_name, course_profile_url) + for row in assessment_table + ] def get_element_inner_html(dom_element: element.Tag): @@ -388,31 +360,32 @@ def get_element_inner_html(dom_element: element.Tag): def get_parsed_assessment_item( - assessment_item_tag: element.Tag, + assessment_item_tag: element.Tag, course_name: str, course_profile_url: str ) -> AssessmentItem: """ Returns the parsed assessment details for the - given assessment item table row element. - - Note: Because of the inconsistency of UQ assessment details, I've had to - make some fairly strict assumptions about the structure of each field. - This is likely insufficient to handle every course's - structure, and thus is subject to change. + given assessment item table row element in the ECP. """ - course_name, task, due_date, weight = assessment_item_tag.findAll("div") - # Handles courses of the form 'CSSE1001 - Sem 1 2018 - St Lucia - Internal'. - # Thus, this bit of code will extract the course. - course_name = course_name.text.strip().split(" - ")[0] - # Handles tasks of the form 'Computer Exercise
Assignment 2'. - task = get_element_inner_html(task).strip().replace("
", " - ") - # Handles due dates of the form '26 Mar 18 - 27 Mar 18
Held in Week 6 - # Learning Lab Sessions (Monday/Tuesday)'. Thus, this bit of code will - # keep only the date portion of the field. - due_date = get_element_inner_html(due_date).strip().split("
")[0] - # Handles weights of the form '30%
Alternative to oral presentation'. - # Thus, this bit of code will keep only the weight portion of the field. - weight = get_element_inner_html(weight).strip().split("
")[0] - return AssessmentItem(course_name, task, due_date, weight) + assessment_cells: element.ResultSet[element.Tag] = assessment_item_tag.findAll("td") + category, task, weight, due_date = assessment_cells + + category = category.text.strip() + + task = task.findChild("a") + if not isinstance(task, element.Tag): + raise AssessmentNotParseableException(course_name, course_profile_url) + task_description_url = course_profile_url + task.attrs["href"] + task = task.text.strip() + + weight = weight.text.strip() + + due_date_paragraphs: element.ResultSet[element.Tag] = due_date.findAll("p") + due_date_lines = (p.text.strip() for p in due_date_paragraphs) + due_date = "\n".join(line for line in due_date_lines if line) + + return AssessmentItem( + course_name, category, task, task_description_url, due_date, weight + ) class Exam: @@ -433,7 +406,7 @@ def get_past_exams_page_url(course_code: str) -> str: return BASE_PAST_EXAMS_URL + course_code -def get_past_exams(course_code: str) -> List[Exam]: +def get_past_exams(course_code: str) -> list[Exam]: """ Takes the course code and generates each result in the format: ('year Sem X:', link) @@ -450,7 +423,7 @@ def get_past_exams(course_code: str) -> List[Exam]: return [] exam_list_json = exam_list_json[0] - exam_list: List[Exam] = [] + exam_list: list[Exam] = [] for exam_json in exam_list_json: year = int(exam_json[0]["examYear"]) # Semesters are given as "Sem.1", so we will change this to "Sem 1" diff --git a/uqcsbot/whatsdue.py b/uqcsbot/whatsdue.py index 7b3a51c..d20cf88 100644 --- a/uqcsbot/whatsdue.py +++ b/uqcsbot/whatsdue.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta import logging -from typing import Optional, Callable, Literal, Dict +from typing import Optional, Callable, Literal import discord from discord import app_commands @@ -9,15 +9,14 @@ from uqcsbot.yelling import yelling_exemptor from uqcsbot.utils.uq_course_utils import ( - DateSyntaxException, + AssessmentNotFoundException, Offering, CourseNotFoundException, HttpException, ProfileNotFoundException, AssessmentItem, - get_course_assessment, - get_course_assessment_page, - get_course_profile_id, + get_course_assessment_items, + get_course_profile_url, ) AssessmentSortType = Literal["Date", "Course Name", "Weight"] @@ -27,14 +26,14 @@ def sort_by_date(item: AssessmentItem): - """Provides a key to sort assessment dates by. If the date cannot be parsed, will put it with items occuring during exam block.""" - try: - return item.get_parsed_due_date()[0] - except DateSyntaxException: - return datetime.max + """Provides a key to sort assessment dates by. If the date cannot be parsed, will put it with items occurring during exam block.""" + due_date = item.get_parsed_due_date() + if due_date: + return due_date[0] + return datetime.max -SORT_METHODS: Dict[ +SORT_METHODS: dict[ AssessmentSortType, Callable[[AssessmentItem], int | str | datetime] ] = { "Date": sort_by_date, @@ -55,8 +54,8 @@ def __init__(self, bot: commands.Bot): semester="The semester to get assessment for. Defaults to what UQCSbot believes is the current semester.", campus="The campus the course is held at. Defaults to St Lucia. Note that many external courses are 'hosted' at St Lucia.", mode="The mode of the course. Defaults to Internal.", - courses="Course codes seperated by spaces", - sort_order="The order to sort courses by. Defualts to Date.", + courses="Course codes separated by spaces", + sort_order="The order to sort courses by. Defaults to Date.", reverse_sort="Whether to reverse the sort order. Defaults to false.", show_ecp_links="Show the first ECP link for each course page. Defaults to false.", ) @@ -88,27 +87,39 @@ async def whatsdue( # If full output is not specified, set the cutoff to today's date. cutoff = ( - None if fulloutput else datetime.today(), + datetime.min if fulloutput else datetime.today(), datetime.today() + timedelta(weeks=weeks_to_show) if weeks_to_show > 0 - else None, + else datetime.max, ) + try: - assessment_page = get_course_assessment_page(course_names, offering) - assessment = get_course_assessment(course_names, cutoff, assessment_page) + assessment = [ + get_course_assessment_items(course_name, offering) + for course_name in course_names + ] + assessment = [ + item + for course in assessment + for item in course + if item.is_after(cutoff[0]) and item.is_before(cutoff[1]) + ] except HttpException as e: logging.error(e.message) await interaction.edit_original_response( content=f"An error occurred, please try again." ) return - except (CourseNotFoundException, ProfileNotFoundException) as e: + except ( + CourseNotFoundException, + ProfileNotFoundException, + AssessmentNotFoundException, + ) as e: await interaction.edit_original_response(content=e.message) return embed = discord.Embed( title=f"What's Due: {', '.join(course_names)}", - url=assessment_page, description="*WARNING: Assessment information may vary/change/be entirely different! Use at your own discretion. Check your ECP for a true list of assessment.*", ) if assessment: @@ -116,7 +127,7 @@ async def whatsdue( for assessment_item in assessment: embed.add_field( name=assessment_item.course_name, - value=f"`{assessment_item.weight}` {assessment_item.task} **({assessment_item.due_date})**", + value=f"`{assessment_item.weight}` [{assessment_item.task}]({assessment_item.task_details_url}) ({assessment_item.category})\n{assessment_item.due_date}", inline=False, ) elif fulloutput: @@ -132,7 +143,7 @@ async def whatsdue( if show_ecp_links: ecp_links = [ - f"[{course_name}]({ECP_ASSESSMENT_URL + str(get_course_profile_id(course_name))})" + f"[{course_name}]({get_course_profile_url(course_name) + '#assessment'})" for course_name in course_names ] embed.add_field(