-
Notifications
You must be signed in to change notification settings - Fork 0
/
course.py
133 lines (102 loc) · 3.73 KB
/
course.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import bisect
import json
import re
from dataclasses import asdict, astuple, dataclass, field, fields
from util import dataclassToJson
@dataclass
class Course:
subject: str
course: int
campusId: int
semesterId: int
year: int
creditHours: list = None
sectionIds: list = None
titleId: int = None
descriptionId: int = None
tagIds: set = None
def __hash__(self):
return hash(str(self))
def __eq__(self, c):
return str(self) == str(c)
def __lt__(self, c):
return str(self) < str(c)
def __str__(self):
return f'{self.subject:>4}{self.course:04}{self.campusId:02}{self.semesterId}{self.year}'
# https://docs.python.org/3/library/bisect.html#searching-sorted-lists
def bisectIndex(ls: list, value) -> int:
if value is None:
return -1
index = bisect.bisect_left(ls, value)
if index != len(ls) and ls[index] == value:
return index
return -1
def bisectStartsWith(ls: list, value: str) -> int:
if value is None:
return -1
index = bisect.bisect_right(ls, value) - 1
if index != -1 and value.startswith(ls[index]):
return index
return -1
if __name__ == '__main__':
campuses: list = json.load(open('campus.json', 'r'))
descriptions: list = json.load(open('description.json', 'r'))
sections: list = json.load(open('_pawsSection.raw.json', 'r'))
tags: list = json.load(open('tag.json', 'r'))
titles: list = json.load(open('title.json', 'r'))
tags = [tag['code'] for tag in tags]
tagPattern = r'\((?:'
tagPattern += '|'.join(tags)
tagPattern += r'|/)+\)'
# print(tagPattern)
# \((?:CC|CL|COM|HON|HU|LA|Q|SS|/)+\)
tagPattern = re.compile(tagPattern, flags=re.IGNORECASE)
tagCodePattern = r'('
tagCodePattern += '|'.join(tags)
tagCodePattern += r')'
# print(tagCodePattern)
# (CC|CL|COM|HON|HU|LA|Q|SS)
tagCodePattern = re.compile(tagCodePattern, flags=re.IGNORECASE)
# Use dict to ensure courses are unique
courses = dict()
for index, section in enumerate(sections):
course: int = section['course'][1]
creditHours: list = section['creditHours']
description: str = section['title'][1]
location: str = section['location']
semester: str = section['semester']
subject: str = section['course'][0]
title: str = section['title'][0]
year: int = section['year']
course = Course(
subject=subject,
course=course,
campusId=bisectIndex(campuses, location),
semesterId=['spring', 'summer', 'fall'].index(semester),
year=year
)
course: Course
if str(course) in courses:
course = courses[str(course)]
else:
course.creditHours = creditHours
course.sectionIds = []
course.titleId = bisectIndex(titles, title)
course.descriptionId = bisectStartsWith(descriptions, description)
course.tagIds = set()
courses[str(course)] = course
course.creditHours[0] = min(course.creditHours[0], creditHours[0])
course.creditHours[1] = max(course.creditHours[1], creditHours[1])
course.sectionIds.append(index)
attributeText: str = description[len(descriptions[course.descriptionId]):]
if attributeText != '':
tagText = ''.join(tagPattern.findall(attributeText))
tagIds = [
tags.index(tagCode.upper())
for tagCode in tagCodePattern.findall(tagText)
]
course.tagIds.update(tagIds)
courses = list(courses.values())
for course in courses:
course.tagIds = list(course.tagIds)
dataclassToJson(Course, courses, 'course')