Fall 2024 notes and updates

Vanderbilt-Student-Volunteers-Science · Sep 26, 2024 · ea3115e · ea3115e
1 parent 2647a77
commit ea3115e
Show file tree

Hide file tree

Showing 6 changed files with 118 additions and 2 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/availabilty_sorter.py b/availabilty_sorter.py
@@ -0,0 +1,24 @@
+###Annotates individuals input file by number of available blocks, can later be sorted using Excel
+
+import pandas as pd
+
+# Load the CSV file
+file_path = '/mnt/data/Individual Application Fall 2024.csv'
+df = pd.read_csv(file_path)
+
+# Identify columns that have "Availability" in their name
+availability_columns = [col for col in df.columns if 'Availability' in col]
+
+# Define a function to count the number of entries in availability columns
+def count_availability_entries(row):
+    total_entries = 0
+    for col in availability_columns:
+        if pd.notna(row[col]):
+            total_entries += len(row[col].split(','))
+    return total_entries
+
+# Create a new column that sums the number of entries for each row
+df['Total Availability Entries'] = df.apply(count_availability_entries, axis=1)
+
+# Display the updated dataframe with the new column (optional for viewing purposes)
+import ace_tools as tools; tools.display_dataframe_to_user(name="Updated Availability Data", dataframe=df)
diff --git a/csv-time-modifier.py b/csv-time-modifier.py
@@ -0,0 +1,49 @@
+#Pads blocks by adding 25 minutes to start time and assuming the class is 1 hour long,
+#also adds 25 minutes to end time. Assumption is not necessarily correct for classes
+#less than an hour
+
+import pandas as pd
+from datetime import datetime, timedelta
+
+# Load the provided CSV file
+file_path = '/Users/aptitude/Desktop/VSVS_project/modified inputs/FINALEDIT teachers.csv'  # Replace with the actual file path
+df = pd.read_csv(file_path)
+
+# Function to subtract 25 minutes from the original start time
+def subtract_25_minutes(time_str):
+    if pd.isna(time_str):
+        return time_str  # Return NaN as is
+    try:
+        time_obj = datetime.strptime(time_str, '%I:%M:%S %p')
+        updated_time = time_obj - timedelta(minutes=25)
+        return updated_time.strftime('%I:%M:%S %p')
+    except ValueError:
+        return time_str  # In case of unexpected formats, return the original
+
+# Function to set the end time to 1 hour and 50 minutes after the new start time
+def add_1_hour_50_minutes_to_new_start(start_time_str):
+    if pd.isna(start_time_str):
+        return start_time_str  # Return NaN as is
+    try:
+        time_obj = datetime.strptime(start_time_str, '%I:%M:%S %p')
+        updated_end_time = time_obj + timedelta(hours=1, minutes=50)
+        return updated_end_time.strftime('%I:%M:%S %p')
+    except ValueError:
+        return start_time_str  # In case of unexpected formats, return the original
+
+# List of columns with start and end times
+time_columns = [col for col in df.columns if "Start Time" in col or "End Time" in col]
+
+# Applying the functions to start and end time columns
+for col in time_columns:
+    if "Start Time" in col:
+        df[col] = df[col].apply(subtract_25_minutes)
+    elif "End Time" in col:
+        corresponding_start_col = col.replace("End Time", "Start Time")
+        df[col] = df[corresponding_start_col].apply(add_1_hour_50_minutes_to_new_start)
+
+# Save the updated dataframe to a new CSV
+output_path = 'TEACHERTIMES_PLUS_25.csv'  # Replace with desired output path
+df.to_csv(output_path, index=False)
+
+print(f"Updated file saved at: {output_path}")
diff --git a/vsvs_scheduler/applicants/classroom.py b/vsvs_scheduler/applicants/classroom.py
@@ -57,7 +57,7 @@ def unassign_volunteers(self):
 
     def duration(self):
         """ Returns the duration of the class in minutes. (includes travel time) """
-        return (self.end_time - self.start_time + timedelta(hours=1)).seconds/60
+        return (self.end_time - self.start_time).seconds/60
 
 
 

diff --git a/vsvs_scheduler/f24notes.txt b/vsvs_scheduler/f24notes.txt
@@ -0,0 +1,43 @@
+Notes from Fall 2024 Sorting
+By Michael Herschbach
+
+Lots of issues arose when sorting. Sorter did not directly run on form data or 
+example data when pulled directly from Github. See all 3 input files in VSVS 
+Google Drive for necessary format changes. Significant preprocessing had to be 
+done to input files in order to meet required conditions such as Title 1 priority
+and more (see below).
+
+One potential bug was fixed on line 60 of classroom.py:
+Previous:
+"return (self.end_time - self.start_time + timedelta(hours=1)).seconds/60"
+New:
+"return (self.end_time - self.start_time).seconds/60"
+
+Did not have time to thoroughly examine how program is comprehending time blocks but
+there seemed to be an issue with padding only being applied to the end of the block,
+not the start.
+
+This fix removes padding from the program. Solution to retain padding in sorting is 
+introducing a preprocessing algorithm in "csv-time-modifier.py" for teacher input file
+which adds 25 minutes of padding to start and finish and cuts class blocks so that 
+they are all 1 hour long.
+Note: some classes are given by the teachers to be less than an hour. This program 
+incorrectly assumes all classes to be an hour even if they are less. This was manually 
+fixed later on.
+
+Because of heavy preference by teachers to have all classes on the same day, classes
+were manually collated by assigning each teacher only 1 day. Teachers at title 1 schools
+received priority in their first day of choice. Days were assigned to ensure a relatively
+even spread of classes for each day of the week. Teachers were also sorted by Title 1 priority
+in the input files such that they would be sorted first (it is unclear if this had any effect). 
+See files "Teacher Priortization" for annotations and "TEACHERTIMES_PLUS_25" for final classroom input
+files (which include padding) in Google Drive
+
+Standby applications were manually appended to the list of regular applications. Input file in
+Drive. Additionally, testing was done to sort individuals by availability such that those with
+the lowest number of available blocks appeared first in the list. This did not have any impact on
+sorting success. Availability sorting algorithm can be found in "availability_sorter.py" in repo
+
+Final measures of success:
+Sorted 65 classes out of 80
+92 unsorted individuals out of 323
diff --git a/vsvs_scheduler/scheduler.py b/vsvs_scheduler/scheduler.py
@@ -9,7 +9,7 @@
 
 
 class Scheduler:
-    def __init__(self, earliest: str = "7:15", latest: str = "15:30", max_team_size: int = 5, min_team_size: int = 3):
+    def __init__(self, earliest: str = "7:15", latest: str = "17:30", max_team_size: int = 5, min_team_size: int = 3):
         """Scheduler object that holds information about the schedule and the volunteers and classrooms."""
 
         self.earliest_time = datetime.strptime(earliest, "%H:%M")