Add function to take n windows and keep dataset balanced

neuroneural · Nov 20, 2024 · de1776d · de1776d · jawardell · Nov 20, 2024
1 parent 7fcff47
commit de1776d
Show file tree

Hide file tree

Showing 4 changed files with 397 additions and 255 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/scripts/experiments/experiment.py b/scripts/experiments/experiment.py
@@ -94,10 +94,31 @@ def main():
             # shuffles the window pairs, preserves the class labels, and group labels
             windows_sh, class_sh, group_sh = shuffle_windows(window_pairs, class_labels, group_labels)
 
+
+
+
+############################################################################################################
+############################################################################################################
+############################################################################################################
+
+            #################   OPTION 1
             # takes the first n window combinations for each subject and class 
-                #keeps the subject and class distribution balanced
+                # keeps the subject and class distribution balanced
             windows_st, class_st, group_st = take_first_n_windows(windows_sh, class_sh, group_sh)
 
+
+            #################   OPTION 2
+            # takes first 1600 windows, regardless of subject and class
+                # does not keep the subject and class distribution balanced
+            windows_st = windows_sh[:1600]
+            class_st = class_sh[:1600]
+            group_st = group_sh[:1600]
+
+############################################################################################################
+############################################################################################################
+############################################################################################################
+
+
             #use the random window combinations to generate the add and concat features
             X_add, y_add, group_add = get_combined_features(windows_st, class_st, group_st, type='add')
             X_concat, y_concat, group_concat = get_combined_features(windows_st, class_st, group_st, type='concat')

diff --git a/scripts/plot.ipynb b/scripts/plot.ipynb
diff --git a/scripts/utils/usp_utils.py b/scripts/utils/usp_utils.py
@@ -649,15 +649,11 @@ def get_combined_features(window_pairs, class_labels, group_labels, type='none')
 
     X = np.array(X)
 
-    #take the first entries of the class labels while asserting that they are the same, add them into the y list
     for label in class_labels:
-        assert label[0] == label[1], 'Class labels should be the same'
-        y.append(label[0])
+        y.append(label)
 
-    #take the first entries of the group labels while asserting that they are the same, add them into the group list
     for label in group_labels:
-        assert label[0] == label[1], 'Group labels should be the same'
-        group.append(label[0])
+        group.append(label)
 
     #use a label encoder to encode the class labels
     le = LabelEncoder()
@@ -691,12 +687,29 @@ def shuffle_windows(window_pairs, class_labels, group_labels):
 
     return shuffled_pairs, shuffled_class_labels, shuffled_group_labels
 
-def take_first_n_windows(shuffled_pairs, shuffled_class_labels, shuffled_group_labels):
-    #[TODO] implement this function
-    #extract the window pairs for one subject
-    #extract the window pairs from one class from the subject
-    #take the first n window pairs, given they are from one subject and one class
-    #append these to a list to return as the n selected windows
-    #keeps the subject distribution balanced as well as the class distribution balanced
+def take_first_n_windows(windows_sh, class_sh, group_sh):
+    windows_st = []
+    class_st = []
+    group_st = []
+
+    win_df = pd.DataFrame({
+        'pair_ix': range(len(windows_sh)), 
+        'window_pair': windows_sh,         
+        'subject': group_sh,      
+        'target': class_sh       
+    })
+    #select the window pairs from one subject from the win_df
+    subjects = np.unique(win_df['subject'])
+    for subject in subjects:
+        subject_windows = win_df[win_df['subject'] == subject]
+        #select the window pairs from one class from the subject
+        for label in ['0', '1']:
+            class_windows = subject_windows[subject_windows['target'] == label]
+            #take the first n window pairs, given they are from one subject and one class
+            n = 80
+            selected_windows = class_windows.iloc[:n]
+            windows_st.extend(selected_windows['window_pair'].values)
+            class_st.extend(selected_windows['target'].values)
+            group_st.extend(selected_windows['subject'].values)
 
-    pass
+    return windows_st, class_st, group_st