From ea3115edf88441e0adba5cdccaf5f70e6d358790 Mon Sep 17 00:00:00 2001 From: Michael Herschbach Date: Thu, 26 Sep 2024 15:55:11 -0500 Subject: [PATCH] Fall 2024 notes and updates --- .DS_Store | Bin 0 -> 6148 bytes availabilty_sorter.py | 24 ++++++++++++ csv-time-modifier.py | 49 +++++++++++++++++++++++++ vsvs_scheduler/applicants/classroom.py | 2 +- vsvs_scheduler/f24notes.txt | 43 ++++++++++++++++++++++ vsvs_scheduler/scheduler.py | 2 +- 6 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 .DS_Store create mode 100644 availabilty_sorter.py create mode 100644 csv-time-modifier.py create mode 100644 vsvs_scheduler/f24notes.txt diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d3b6d3bd30f0efa4638f1e231fb0110119e271bb GIT binary patch literal 6148 zcmeHKOHRWu6dZ>jS|On>Sg=9L1&G8ALX|F9u%QQlHVGdoG^js`UG^M?^Ra??wpBGr zyG1H`PkJ85e&f&BvSR>dxXQ1A0e~J|Fz7ScVNx&NvA~y{5RGkPiC4U#z!g)qLr_50 z?i?8wSaa33*RMj()qJ8Cm?OqJK5ok@o0ip-9N928^!Udbb3jPOgl$Y(abNr5*5moC ztY*)w=l-$$O2*RUo68ozk9&;Rdx|?^%)OSk7@sZ1F-KK!j>}Hl->t|q>L6My@IcS- zLSBv)N>QQNOCaOkoj#OH55+dG4WVCWDU(YD$!Ame_|L%XL;n~5|6b*M~Cr`5950_{)J*}?`%J^ z;V_9qO;rI^psB!-`y9yqKidEPZ<6#&6;K8Kl>#PA$LWw$@^@=za&p&3^hdgw#MKTr k6lUyI%viaKFX+}-9!Z9nc&r_=gr+|NRt8N}fj?E?7e;ns*#H0l literal 0 HcmV?d00001 diff --git a/availabilty_sorter.py b/availabilty_sorter.py new file mode 100644 index 0000000..1eb6319 --- /dev/null +++ b/availabilty_sorter.py @@ -0,0 +1,24 @@ +###Annotates individuals input file by number of available blocks, can later be sorted using Excel + +import pandas as pd + +# Load the CSV file +file_path = '/mnt/data/Individual Application Fall 2024.csv' +df = pd.read_csv(file_path) + +# Identify columns that have "Availability" in their name +availability_columns = [col for col in df.columns if 'Availability' in col] + +# Define a function to count the number of entries in availability columns +def count_availability_entries(row): + total_entries = 0 + for col in availability_columns: + if pd.notna(row[col]): + total_entries += len(row[col].split(',')) + return total_entries + +# Create a new column that sums the number of entries for each row +df['Total Availability Entries'] = df.apply(count_availability_entries, axis=1) + +# Display the updated dataframe with the new column (optional for viewing purposes) +import ace_tools as tools; tools.display_dataframe_to_user(name="Updated Availability Data", dataframe=df) diff --git a/csv-time-modifier.py b/csv-time-modifier.py new file mode 100644 index 0000000..ee92ffd --- /dev/null +++ b/csv-time-modifier.py @@ -0,0 +1,49 @@ +#Pads blocks by adding 25 minutes to start time and assuming the class is 1 hour long, +#also adds 25 minutes to end time. Assumption is not necessarily correct for classes +#less than an hour + +import pandas as pd +from datetime import datetime, timedelta + +# Load the provided CSV file +file_path = '/Users/aptitude/Desktop/VSVS_project/modified inputs/FINALEDIT teachers.csv' # Replace with the actual file path +df = pd.read_csv(file_path) + +# Function to subtract 25 minutes from the original start time +def subtract_25_minutes(time_str): + if pd.isna(time_str): + return time_str # Return NaN as is + try: + time_obj = datetime.strptime(time_str, '%I:%M:%S %p') + updated_time = time_obj - timedelta(minutes=25) + return updated_time.strftime('%I:%M:%S %p') + except ValueError: + return time_str # In case of unexpected formats, return the original + +# Function to set the end time to 1 hour and 50 minutes after the new start time +def add_1_hour_50_minutes_to_new_start(start_time_str): + if pd.isna(start_time_str): + return start_time_str # Return NaN as is + try: + time_obj = datetime.strptime(start_time_str, '%I:%M:%S %p') + updated_end_time = time_obj + timedelta(hours=1, minutes=50) + return updated_end_time.strftime('%I:%M:%S %p') + except ValueError: + return start_time_str # In case of unexpected formats, return the original + +# List of columns with start and end times +time_columns = [col for col in df.columns if "Start Time" in col or "End Time" in col] + +# Applying the functions to start and end time columns +for col in time_columns: + if "Start Time" in col: + df[col] = df[col].apply(subtract_25_minutes) + elif "End Time" in col: + corresponding_start_col = col.replace("End Time", "Start Time") + df[col] = df[corresponding_start_col].apply(add_1_hour_50_minutes_to_new_start) + +# Save the updated dataframe to a new CSV +output_path = 'TEACHERTIMES_PLUS_25.csv' # Replace with desired output path +df.to_csv(output_path, index=False) + +print(f"Updated file saved at: {output_path}") diff --git a/vsvs_scheduler/applicants/classroom.py b/vsvs_scheduler/applicants/classroom.py index 4b4c653..4d6cce0 100644 --- a/vsvs_scheduler/applicants/classroom.py +++ b/vsvs_scheduler/applicants/classroom.py @@ -57,7 +57,7 @@ def unassign_volunteers(self): def duration(self): """ Returns the duration of the class in minutes. (includes travel time) """ - return (self.end_time - self.start_time + timedelta(hours=1)).seconds/60 + return (self.end_time - self.start_time).seconds/60 diff --git a/vsvs_scheduler/f24notes.txt b/vsvs_scheduler/f24notes.txt new file mode 100644 index 0000000..d9268f9 --- /dev/null +++ b/vsvs_scheduler/f24notes.txt @@ -0,0 +1,43 @@ +Notes from Fall 2024 Sorting +By Michael Herschbach + +Lots of issues arose when sorting. Sorter did not directly run on form data or +example data when pulled directly from Github. See all 3 input files in VSVS +Google Drive for necessary format changes. Significant preprocessing had to be +done to input files in order to meet required conditions such as Title 1 priority +and more (see below). + +One potential bug was fixed on line 60 of classroom.py: +Previous: +"return (self.end_time - self.start_time + timedelta(hours=1)).seconds/60" +New: +"return (self.end_time - self.start_time).seconds/60" + +Did not have time to thoroughly examine how program is comprehending time blocks but +there seemed to be an issue with padding only being applied to the end of the block, +not the start. + +This fix removes padding from the program. Solution to retain padding in sorting is +introducing a preprocessing algorithm in "csv-time-modifier.py" for teacher input file +which adds 25 minutes of padding to start and finish and cuts class blocks so that +they are all 1 hour long. +Note: some classes are given by the teachers to be less than an hour. This program +incorrectly assumes all classes to be an hour even if they are less. This was manually +fixed later on. + +Because of heavy preference by teachers to have all classes on the same day, classes +were manually collated by assigning each teacher only 1 day. Teachers at title 1 schools +received priority in their first day of choice. Days were assigned to ensure a relatively +even spread of classes for each day of the week. Teachers were also sorted by Title 1 priority +in the input files such that they would be sorted first (it is unclear if this had any effect). +See files "Teacher Priortization" for annotations and "TEACHERTIMES_PLUS_25" for final classroom input +files (which include padding) in Google Drive + +Standby applications were manually appended to the list of regular applications. Input file in +Drive. Additionally, testing was done to sort individuals by availability such that those with +the lowest number of available blocks appeared first in the list. This did not have any impact on +sorting success. Availability sorting algorithm can be found in "availability_sorter.py" in repo + +Final measures of success: +Sorted 65 classes out of 80 +92 unsorted individuals out of 323 \ No newline at end of file diff --git a/vsvs_scheduler/scheduler.py b/vsvs_scheduler/scheduler.py index 8426117..59e007c 100644 --- a/vsvs_scheduler/scheduler.py +++ b/vsvs_scheduler/scheduler.py @@ -9,7 +9,7 @@ class Scheduler: - def __init__(self, earliest: str = "7:15", latest: str = "15:30", max_team_size: int = 5, min_team_size: int = 3): + def __init__(self, earliest: str = "7:15", latest: str = "17:30", max_team_size: int = 5, min_team_size: int = 3): """Scheduler object that holds information about the schedule and the volunteers and classrooms.""" self.earliest_time = datetime.strptime(earliest, "%H:%M")