Skip to content

Commit

Permalink
Whatsdue: fixed to work with the new JAC system (#207)
Browse files Browse the repository at this point in the history
* Whatsdue: fixed to work with the new JAC system (compared to the old ECPS)

* Fixed docstrings, typos and better naming
  • Loading branch information
49Indium authored Jul 27, 2024
1 parent f22543a commit 66c8e5e
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 154 deletions.
239 changes: 106 additions & 133 deletions uqcsbot/utils/uq_course_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datetime import datetime
from dateutil import parser
from bs4 import BeautifulSoup, element
from typing import List, Dict, Optional, Literal, Tuple
from typing import Optional, Literal
from dataclasses import dataclass
import json
import re
Expand All @@ -27,23 +27,23 @@ class Offering:

CampusType = Literal["St Lucia", "Gatton", "Herston"]
# The codes used internally within UQ systems
campus_codes: Dict[CampusType, str] = {
campus_codes: dict[CampusType, str] = {
"St Lucia": "STLUC",
"Gatton": "GATTN",
"Herston": "HERST",
}

ModeType = Literal["Internal", "External", "Flexible Delivery", "Intensive"]
# The codes used internally within UQ systems
mode_codes: Dict[ModeType, str] = {
mode_codes: dict[ModeType, str] = {
"Internal": "IN",
"External": "EX",
"Flexible Delivery": "FD",
"Intensive": "IT",
}

SemesterType = Literal["1", "2", "Summer"]
semester_codes: Dict[SemesterType, int] = {"1": 1, "2": 2, "Summer": 3}
semester_codes: dict[SemesterType, int] = {"1": 1, "2": 2, "Summer": 3}

semester: SemesterType
campus: CampusType
Expand All @@ -68,20 +68,20 @@ def __init__(

def get_semester_code(self) -> int:
"""
Returns the code used interally within UQ for the semester of the offering.
Returns the code used internally within UQ for the semester of the offering.
"""
return self.semester_codes[self.semester]

def get_campus_code(self) -> str:
"""
Returns the code used interally within UQ for the campus of the offering.
Returns the code used internally within UQ for the campus of the offering.
"""
self.campus
return self.campus_codes[self.campus]

def get_mode_code(self) -> str:
"""
Returns the code used interally within UQ for the mode of the offering.
Returns the code used internally within UQ for the mode of the offering.
"""
return self.mode_codes[self.mode]

Expand Down Expand Up @@ -111,54 +111,58 @@ def estimate_current_semester() -> SemesterType:
@dataclass
class AssessmentItem:
course_name: str
category: str
task: str
due_date: str
task_details_url: str
due_date: str # This often also contains a lot of description
weight: str

def get_parsed_due_date(self):
def get_parsed_due_date(self) -> Optional[tuple[datetime, datetime]]:
"""
Returns the parsed due date for the given assessment item as a datetime
object. If the date cannot be parsed, a DateSyntaxException is raised.
Returns the minimum and maximum date for a particular assessment item, or None if no dates can be parsed. These will be the same if only a single date can be parsed, or will be the earliest and latest dates if multiple dates can be parsed (e.g. assignment series, or when blocks of dates are scheduled as the due dates for talks).
"""
if self.due_date == "Examination Period":
if self.due_date.startswith("End of Semester Exam Period"):
return get_current_exam_period()
parser_info = parser.parserinfo(dayfirst=True)
try:
# If a date range is detected, attempt to split into start and end
# dates. Else, attempt to just parse the whole thing.
if " - " in self.due_date:
start_date, end_date = self.due_date.split(" - ", 1)
start_datetime = parser.parse(start_date, parser_info)
end_datetime = parser.parse(end_date, parser_info)
return start_datetime, end_datetime
due_datetime = parser.parse(self.due_date, parser_info)
return due_datetime, due_datetime
except Exception:
raise DateSyntaxException(self.due_date, self.course_name)

def is_after(self, cutoff: datetime):
potential_date_strings: list[str] = re.findall(
r"\d\d?/\d\d?/\d\d\d\d( \d\d?(:\d\d)?( [ap]m)?)?", self.due_date
)
dates: list[datetime] = []

for potential_date_string in potential_date_strings:
try:
date = parser.parse(potential_date_string, parser_info)
dates.append(date)
except Exception:
# No need to do anything if the date cannot be parsed
pass

if dates:
return min(dates), max(dates)
return None

def is_after(self, cutoff: datetime) -> bool:
"""
Returns whether the assessment occurs after the given cutoff.
Returns whether the assessment ends after the given cutoff.
"""
try:
start_datetime, end_datetime = self.get_parsed_due_date()
except DateSyntaxException:
date_range = self.get_parsed_due_date()
if date_range is None:
# If we can't parse a date, we're better off keeping it just in case.
return True
return end_datetime >= cutoff if end_datetime else start_datetime >= cutoff

def is_before(self, cutoff: datetime):
_, end_datetime = date_range
return end_datetime >= cutoff

def is_before(self, cutoff: datetime) -> bool:
"""
Returns whether the assessment occurs before the given cutoff.
Returns whether the assessment starts before the given cutoff.
"""
try:
start_datetime, _ = self.get_parsed_due_date()
except DateSyntaxException:
# TODO bot.logger.error(e.message)
date_range = self.get_parsed_due_date()
if date_range is None:
# If we can't parse a date, we're better off keeping it just in case.
# TODO(mitch): Keep track of these instances to attempt to accurately
# parse them in future. Will require manual detection + parsing.
return True

start_datetime, _ = date_range
return start_datetime <= cutoff

def get_weight_as_int(self) -> Optional[int]:
Expand All @@ -171,18 +175,6 @@ def get_weight_as_int(self) -> Optional[int]:
return None


class DateSyntaxException(Exception):
"""
Raised when an unparsable date syntax is encountered.
"""

def __init__(self, date: str, course_name: str):
self.message = f"Could not parse date '{date}' for course '{course_name}'."
self.date = date
self.course_name = course_name
super().__init__(self.message, self.date, self.course_name)


class CourseNotFoundException(Exception):
"""
Raised when a given course cannot be found for UQ.
Expand Down Expand Up @@ -214,15 +206,27 @@ class AssessmentNotFoundException(Exception):
Raised when the assessment table cannot be found for assess page.
"""

def __init__(self, course_names: List[str], offering: Optional[Offering] = None):
def __init__(self, course_name: str, offering: Optional[Offering] = None):
if offering is None:
self.message = (
f"Could not find the assessment table for '{', '.join(course_names)}'."
)
self.message = f"Could not find the assessment table for '{course_name}'."
else:
self.message = f"Could not find the assessment table for '{', '.join(course_names)}' during semester {offering.semester} at {offering.campus} done in mode '{offering.mode}'."
self.course_names = course_names
super().__init__(self.message, self.course_names)
self.message = f"Could not find the assessment table for '{course_name}' during semester {offering.semester} at {offering.campus} done in mode '{offering.mode}'."
self.course_name = course_name
super().__init__(self.message, self.course_name)


class AssessmentNotParseableException(Exception):
"""
Raised when the assessment cannot be parsed from the ECP.
"""

def __init__(self, course_name: str, course_profile_url: str):
self.message = (
f"Could not parse an assessment for '{course_name}': {course_profile_url}"
)
self.course_name = course_name
self.course_profile_url = course_profile_url
super().__init__(self.message, self.course_name)


class HttpException(Exception):
Expand All @@ -239,10 +243,10 @@ def __init__(self, url: str, status_code: int):


def get_uq_request(
url: str, params: Optional[Dict[str, str]] = None
url: str, params: Optional[dict[str, str]] = None
) -> requests.Response:
"""
Handles specific error handelling and header provision for requests.get to
Handles specific error handeling and header provision for requests.get to
uq course urls
"""
headers = {"User-Agent": "UQCS"}
Expand Down Expand Up @@ -296,17 +300,7 @@ def get_course_profile_url(
return url


def get_course_profile_id(course_name: str, offering: Optional[Offering] = None) -> int:
"""
Returns the ID to the latest course profile for the given course.
"""
profile_url = get_course_profile_url(course_name, offering=offering)
# The profile url looks like this
# https://course-profiles.uq.edu.au/student_section_loader/section_1/100728
return int(profile_url[profile_url.rindex("/") + 1 :])


def get_current_exam_period():
def get_current_exam_period() -> tuple[datetime, datetime]:
"""
Returns the start and end datetimes for the current semester's exam period.
Expand All @@ -332,52 +326,30 @@ def get_current_exam_period():
return start_datetime, end_datetime


def get_course_assessment_page(
course_names: List[str], offering: Optional[Offering]
) -> str:
def get_course_assessment_items(
course_name: str,
offering: Offering,
) -> list[AssessmentItem]:
"""
Determines the course ids from the course names and returns the
url to the assessment table for the provided courses
Returns all the assessment for the given course.
"""
profile_ids = map(
lambda course: str(get_course_profile_id(course, offering=offering)),
course_names,
)
return BASE_ASSESSMENT_URL + ",".join(profile_ids)

course_profile_url = get_course_profile_url(course_name, offering=offering)
course_assessment_url = course_profile_url + "#assessment"

def get_course_assessment(
course_names: List[str],
cutoff: Tuple[Optional[datetime], Optional[datetime]] = (None, None),
assessment_url: Optional[str] = None,
offering: Optional[Offering] = None,
) -> List[AssessmentItem]:
"""
Returns all the course assessment for the given
courses that occur after the given cutoff.
"""
if assessment_url is None:
joined_assessment_url = get_course_assessment_page(course_names, offering)
else:
joined_assessment_url = assessment_url
http_response = get_uq_request(joined_assessment_url)
http_response = get_uq_request(course_assessment_url)
if http_response.status_code != requests.codes.ok:
raise HttpException(joined_assessment_url, http_response.status_code)
raise HttpException(course_assessment_url, http_response.status_code)
html = BeautifulSoup(http_response.content, "html.parser")
assessment_table = html.find("table", class_="tblborder")

assessment_table = html.find("div", class_="assessment-summary-table")
if not isinstance(assessment_table, element.Tag):
raise AssessmentNotFoundException(course_names, offering)
raise AssessmentNotFoundException(course_name, offering)
# Start from 1st index to skip over the row containing column names.
assessment = assessment_table.findAll("tr")[1:]
parsed_assessment = map(get_parsed_assessment_item, assessment)
# If no cutoff is specified, set cutoff to UNIX epoch (i.e. filter nothing).
cutoff_min = cutoff[0] or datetime.min
cutoff_max = cutoff[1] or datetime.max
filtered_assessment = filter(
lambda item: item.is_after(cutoff_min) and item.is_before(cutoff_max),
parsed_assessment,
)
return list(filtered_assessment)
assessment_table = assessment_table.findAll("tr")[1:]
return [
get_parsed_assessment_item(row, course_name, course_profile_url)
for row in assessment_table
]


def get_element_inner_html(dom_element: element.Tag):
Expand All @@ -388,31 +360,32 @@ def get_element_inner_html(dom_element: element.Tag):


def get_parsed_assessment_item(
assessment_item_tag: element.Tag,
assessment_item_tag: element.Tag, course_name: str, course_profile_url: str
) -> AssessmentItem:
"""
Returns the parsed assessment details for the
given assessment item table row element.
Note: Because of the inconsistency of UQ assessment details, I've had to
make some fairly strict assumptions about the structure of each field.
This is likely insufficient to handle every course's
structure, and thus is subject to change.
given assessment item table row element in the ECP.
"""
course_name, task, due_date, weight = assessment_item_tag.findAll("div")
# Handles courses of the form 'CSSE1001 - Sem 1 2018 - St Lucia - Internal'.
# Thus, this bit of code will extract the course.
course_name = course_name.text.strip().split(" - ")[0]
# Handles tasks of the form 'Computer Exercise<br/>Assignment 2'.
task = get_element_inner_html(task).strip().replace("<br/>", " - ")
# Handles due dates of the form '26 Mar 18 - 27 Mar 18<br/>Held in Week 6
# Learning Lab Sessions (Monday/Tuesday)'. Thus, this bit of code will
# keep only the date portion of the field.
due_date = get_element_inner_html(due_date).strip().split("<br/>")[0]
# Handles weights of the form '30%<br/>Alternative to oral presentation'.
# Thus, this bit of code will keep only the weight portion of the field.
weight = get_element_inner_html(weight).strip().split("<br/>")[0]
return AssessmentItem(course_name, task, due_date, weight)
assessment_cells: element.ResultSet[element.Tag] = assessment_item_tag.findAll("td")
category, task, weight, due_date = assessment_cells

category = category.text.strip()

task = task.findChild("a")
if not isinstance(task, element.Tag):
raise AssessmentNotParseableException(course_name, course_profile_url)
task_description_url = course_profile_url + task.attrs["href"]
task = task.text.strip()

weight = weight.text.strip()

due_date_paragraphs: element.ResultSet[element.Tag] = due_date.findAll("p")
due_date_lines = (p.text.strip() for p in due_date_paragraphs)
due_date = "\n".join(line for line in due_date_lines if line)

return AssessmentItem(
course_name, category, task, task_description_url, due_date, weight
)


class Exam:
Expand All @@ -433,7 +406,7 @@ def get_past_exams_page_url(course_code: str) -> str:
return BASE_PAST_EXAMS_URL + course_code


def get_past_exams(course_code: str) -> List[Exam]:
def get_past_exams(course_code: str) -> list[Exam]:
"""
Takes the course code and generates each result in the format:
('year Sem X:', link)
Expand All @@ -450,7 +423,7 @@ def get_past_exams(course_code: str) -> List[Exam]:
return []
exam_list_json = exam_list_json[0]

exam_list: List[Exam] = []
exam_list: list[Exam] = []
for exam_json in exam_list_json:
year = int(exam_json[0]["examYear"])
# Semesters are given as "Sem.1", so we will change this to "Sem 1"
Expand Down
Loading

0 comments on commit 66c8e5e

Please sign in to comment.