-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
313 lines (247 loc) · 13.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
"""
The main file for the project which runs by default validations for all high school and college applicants.
It also generates a score for each applicant based on predefined criteria.
"""
import csv
import time
from datetime import datetime
from typing import Tuple
import pandas as pd
import constants as cs
from classes import Student
from utils import validations as vali, scoring_util as sutil, util, unittests
# First ones to work on
# TODO: Add school address to spreadsheet
# TODO: Detect wide reviewer spread (lowest and highest is greater than 20? 25? points)
# TODO: If record doesn't write, throw up error
# Backlog
# TODO: Output data to Google Spreadsheets https://www.twilio.tcom/blog/2017/02/an-easy-way-to-read-and-write-to-a-google-spreadsheet-in-python.html https://automatetheboringstuff.com/2e/chapter14/
# TODO: Coursework functionality
# TODO: Extract csv from AwardSpring automatically - https://automatetheboringstuff.com/2e/chapter12/
# https://realpython.com/python-web-scraping-practical-introduction/
# TODO: Implement Sphnix
# TODO: Move constants to Google Spreadsheet for non-dev user to update
# TODO: Email notifications for warnings
# TODO: Incremental changes
# TODO: Detect changes and update Google Sheet rather than rerunning every time
# TODO: Determine school quality
# TODO: Check submission status, if they have not submitted but filled everything out, autowarn?
# TODO: Make High School And College student subclass
# TODO: Store several variables as class variables https://realpython.com/inheritance-composition-python/
# TODO: Host this on an AWS server ? https://realpython.com/python-sql-libraries/
# TODO: Verify ACT/SAT from pdf https://pypi.org/project/pdftotext/
# TODO: Extract coursework from pdf https://pypi.org/project/pdftotext/
# TODO: Figure out how to handle the questions changing
# TODO: Add gitignore with emails and passwords, better secure them
# TODO: Package numpy, scipy
# TODO: ACT/SAT Superscores
# TODO: Ask for SAT II test scores?
# TODO: batchgeo autogen?
# Changes to scholarship setup
# TODO: SAT/ACT not number
def compute_HS_scores(file: str, verbose: bool = False, DEBUG: bool = False, CALL_APIS: bool = False):
"""The main function that computes the high school student's scores and validates their application
Parameters
----------
file : str
The file with all of the student's answers
Returns
-------
"""
with open('Student_Data/' + str(file), 'r', encoding="utf-8-sig") as csvinput:
# get fieldnames from DictReader object and store in list
d_reader = csv.DictReader(csvinput)
headers = d_reader.fieldnames
# Check if the questions exist in the file, most often a change in the year
if not vali.questions_check(headers):
return
# Adding leading columns for the scores the students recieved
headers = ['Total_Score', 'GPA_Score', 'ACTSAT_Score', 'ACTMSATM_Score', 'STEM_Score', 'Reviewer_Score',
'home_to_school_dist', 'home_to_school_time_pt', 'home_to_school_time_car'] + d_reader.fieldnames
writer = csv.DictWriter(open('output.csv', 'w', newline='', encoding='utf-8-sig'), fieldnames=headers)
writer.writeheader()
# Load the conversions and lists into variables for reuse
SAT_to_ACT_dict = util.conversion_dict('SAT_to_ACT.csv', 'int')
SAT_to_ACT_Math_dict = util.conversion_dict('SAT_to_ACT_Math.csv', 'int')
course_scores = util.conversion_dict('Course_scoring.csv', 'str')
school_list, chicago_schools = vali.get_school_list('Illinois_Schools_Fix.csv')
# Iterate through file once to get data for histograms
ACT_Overall, ACTM_Overall = sutil.generate_histo_arrays(file, SAT_to_ACT_dict, SAT_to_ACT_Math_dict)
# TODO: Once live, use the normalized function
# reviewer_scores = sutil.get_reviewer_scores_normalized('Reviewer Scores by Applicant for 2019 Incentive Awards.csv')
reviewer_scores = sutil.get_reviewer_scores('Reviewer Scores by Applicant for 2020 Incentive Awards.csv')
student_list = []
cnt = 0
for line in d_reader:
cnt += 1
# if cnt > 2:
# break
lastName = line[cs.questions['lastName']]
firstName = line[cs.questions['firstName']]
s = Student.Student(firstName, lastName)
s.GPA_Value = util.get_num(line[cs.questions['GPA_Value']])
s.ACT_SAT_value = util.get_num(line[cs.questions['ACT_SAT_value']])
s.ACTM_SATM_value = util.get_num(line[cs.questions['ACTM_SATM_value']])
s.COMMS_value = util.get_num(line[cs.questions['COMMS_value']])
s.NON_ENG_value = line[cs.questions['NON_ENG_value']]
s.student_type = line[cs.questions['student_type']]
s.major = line['Major']
s.other_major = line[cs.questions['other_major']]
s.STEM_Classes = line[cs.questions['STEM_Classes']]
s.College = line[cs.questions['College']]
s.Other_College = line[cs.questions['Other_College']]
s.high_school_full = line[cs.questions['high_school']]
s.high_school_other = line[cs.questions['high_school_other']]
# s.submitted = line['General Application Submitted']
s.address1 = line[cs.questions['address1']]
s.address2 = line[cs.questions['address2']]
s.city = line[cs.questions['city']]
s.state = line[cs.questions['state']]
s.zip_code = line[cs.questions['zip']]
if CALL_APIS is False:
s.cleaned_address1 = line[cs.questions['address1']]
s.cleaned_address2 = line[cs.questions['address2']]
s.cleaned_city = line[cs.questions['city']]
if s.cleaned_city != 'Chicago' and s.firstName == 'ChicagoSchoolNoCHome':
s.ChicagoHome = False
s.validationError = True
s.cleaned_state = line[cs.questions['state']]
s.cleaned_zip_code = line[cs.questions['zip']]
# A basic sanity check that if the GPA and ACT values are populated, then the applicant is probably applying
if 1 == 1 and cs.high_schooler in s.student_type.upper() and s.GPA_Value and s.ACT_SAT_value and s.ACTM_SATM_value and s.firstName != 'Test':
# print(s.lastName, s.firstName)
# Validate the applicant's address is residential and that they live or go to high school in Chicago
vali.address_validation(s, chicago_schools, school_list, verbose, DEBUG, CALL_APIS)
# Validate the applicant is accepted into an ABET engineering program
vali.accred_check(s, verbose, DEBUG)
# Validate the applicants ACT/SAT scores and score their GPA and ACT/SAT
sutil.GPA_Calc(s, True)
sutil.ACT_SAT_Calc(s, SAT_to_ACT_dict, ACT_Overall, 'C', verbose, DEBUG)
sutil.ACT_SAT_Calc(s, SAT_to_ACT_Math_dict, ACTM_Overall, 'M', verbose, DEBUG)
# Score the applicant's verbose
sutil.score_coursework(s, course_scores, True)
# Determine the reviewer scores for the applicant
if lastName.strip().upper() + firstName.strip().upper() in reviewer_scores:
s.reviewer_score = cs.reviewer_multiplier * round(
reviewer_scores[lastName.strip().upper() + firstName.strip().upper()])
else:
s.reviewer_score = 0
if verbose:
print(lastName + ', ' + firstName + ':', s.GPA_Score, s.ACT_SAT_Score, s.ACTM_SATM_Score,
s.reviewer_score)
# TODO: Send email with new students and warnings https://automatetheboringstuff.com/2e/chapter18/
# Write back to output csv file
total = s.GPA_Score + s.ACT_SAT_Score + s.ACTM_SATM_Score + s.reviewer_score + s.STEM_Score
writer.writerow(dict(line,
Total_Score=total,
GPA_Score=s.GPA_Score,
ACTSAT_Score=s.ACT_SAT_Score,
ACTMSATM_Score=s.ACTM_SATM_Score,
STEM_Score=s.STEM_Score,
Reviewer_Score=s.reviewer_score,
home_to_school_dist=s.home_to_school_dist,
home_to_school_time_pt=s.home_to_school_time_pt,
home_to_school_time_car=s.home_to_school_time_car
))
student_list.append(s)
return student_list
def compute_C_scores(file: str, verbose: bool = False, DEBUG: bool = False, CALL_APIS: bool = False):
"""The main function that checks college student's eligibility for the award
Parameters
----------
file : str
The file with all of the student's answers
Returns
-------
"""
with open('Student_Data/' + str(file), 'r', encoding="utf-8-sig") as f:
# get fieldnames from DictReader object and store in list
d_reader = csv.DictReader(f)
headers = d_reader.fieldnames
# Check if the questions exist in the file, most often a change in the year
if not vali.questions_check(headers):
return
recipient_list = vali.get_past_recipients('2019 Recipients.csv')
college_students = []
for line in d_reader:
lastName = line[cs.questions['lastName']]
firstName = line[cs.questions['firstName']]
s = Student.Student(firstName, lastName)
s.student_type = line[cs.questions['student_type']]
s.GPA_Value = line[cs.questions['GPA_Value']]
s.major_school_change = line[cs.questions['major_school_change']]
s.major = line['Major']
s.NON_ENG_value = line[cs.questions['NON_ENG_value']]
if cs.college_student in s.student_type.upper():
# Validate if the student is a past recipient, if not no point in other checks
if vali.past_recipient(s, recipient_list, verbose, DEBUG):
# Validate GPA
vali.college_gpa(s, verbose, DEBUG)
# Validate that the recipient's college and major are still valid
vali.college_school_major(s, verbose, DEBUG)
college_students.append(s)
return college_students
def generate_student_data(file: str, verbose: bool = False, DEBUG: bool = False) -> Tuple[list, list]:
"""Run through the student file to generate a list of the students with their class variable set
Parameters
----------
file : str
The file with all of the student's answers
Returns
-------
high_school_students : list
A list containing instances of the Student class of all the high school students
college_students : list
A list containing instances of the Student class of all the high school students
"""
high_school_students = []
college_students = []
with open('Student_Data/' + str(file), 'r', encoding="utf-8-sig") as f:
# get fieldnames from DictReader object and store in list
d_reader = csv.DictReader(f)
headers = d_reader.fieldnames
# Check if the questions exist in the file, most often a change in the year
if not vali.questions_check(headers):
return high_school_students, college_students
return high_school_students, college_students
def main():
"""
The main function which runs the program
"""
# TODO: Iterate through the students here once and pass student class to the two functions
run_test_data = False
run_all_data = True
create_copy = False
DEBUG = True
verbose = True
# WARNING: If this is True it will call the Google and SmartyStreets API
CALL_APIS = False
# WARNING: If this is True it will call the Google and SmartyStreets API
# Wil need to remove Brewer Dazerrick, Amar Johnson address2
if run_test_data:
filename = 'Validation_Students.csv'
student_data_time = time.time()
validation_HS = compute_HS_scores(filename, verbose, DEBUG, CALL_APIS)
HS_Run = time.time()
print('Runtime of HS Validation: ' + str(HS_Run - student_data_time))
unittests.unit_tests(validation_HS, CALL_APIS)
validation_C = compute_C_scores(filename, verbose, DEBUG, CALL_APIS)
print('Runtime of College Validation: ' + str(time.time() - HS_Run))
unittests.unit_tests(validation_C, CALL_APIS)
print('--------------')
if run_all_data:
filename = 'Student Answers for 2020 Incentive Awards.csv'
if create_copy:
df = pd.read_csv('Student_Data/' + filename)
filename = 'Modified_' + str(datetime.now().strftime("%Y%m%d%H%M%S")) + '_' + filename
df.to_csv('Student_Data/' + 'copy_of_' + filename)
start = time.time()
# generate_student_data(filename)
student_data_time = time.time()
print('Runtime of student data split: ' + str(student_data_time - start))
high_school_students = compute_HS_scores(filename, verbose, DEBUG, CALL_APIS)
HS_Run = time.time()
print('Runtime of HS: ' + str(HS_Run - student_data_time))
# college_students = compute_C_scores(filename, verbose, DEBUG, CALL_APIS)
# print('Runtime of College: ' + str(time.time() - HS_Run))
main()