Skip to content

Commit

Permalink
Add function to take n windows and keep dataset balanced
Browse files Browse the repository at this point in the history
  • Loading branch information
Joanne Wardell authored and Joanne Wardell committed Nov 20, 2024
1 parent 7fcff47 commit de1776d
Show file tree
Hide file tree
Showing 4 changed files with 397 additions and 255 deletions.
Binary file added .DS_Store
Binary file not shown.
23 changes: 22 additions & 1 deletion scripts/experiments/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,31 @@ def main():
# shuffles the window pairs, preserves the class labels, and group labels
windows_sh, class_sh, group_sh = shuffle_windows(window_pairs, class_labels, group_labels)




############################################################################################################
############################################################################################################
############################################################################################################

################# OPTION 1
# takes the first n window combinations for each subject and class
#keeps the subject and class distribution balanced
# keeps the subject and class distribution balanced
windows_st, class_st, group_st = take_first_n_windows(windows_sh, class_sh, group_sh)


################# OPTION 2
# takes first 1600 windows, regardless of subject and class
# does not keep the subject and class distribution balanced
windows_st = windows_sh[:1600]
class_st = class_sh[:1600]
group_st = group_sh[:1600]

############################################################################################################
############################################################################################################
############################################################################################################


#use the random window combinations to generate the add and concat features
X_add, y_add, group_add = get_combined_features(windows_st, class_st, group_st, type='add')
X_concat, y_concat, group_concat = get_combined_features(windows_st, class_st, group_st, type='concat')
Expand Down
588 changes: 348 additions & 240 deletions scripts/plot.ipynb

Large diffs are not rendered by default.

41 changes: 27 additions & 14 deletions scripts/utils/usp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,15 +649,11 @@ def get_combined_features(window_pairs, class_labels, group_labels, type='none')

X = np.array(X)

#take the first entries of the class labels while asserting that they are the same, add them into the y list
for label in class_labels:
assert label[0] == label[1], 'Class labels should be the same'
y.append(label[0])
y.append(label)

#take the first entries of the group labels while asserting that they are the same, add them into the group list
for label in group_labels:
assert label[0] == label[1], 'Group labels should be the same'
group.append(label[0])
group.append(label)

#use a label encoder to encode the class labels
le = LabelEncoder()
Expand Down Expand Up @@ -691,12 +687,29 @@ def shuffle_windows(window_pairs, class_labels, group_labels):

return shuffled_pairs, shuffled_class_labels, shuffled_group_labels

def take_first_n_windows(shuffled_pairs, shuffled_class_labels, shuffled_group_labels):
#[TODO] implement this function
#extract the window pairs for one subject
#extract the window pairs from one class from the subject
#take the first n window pairs, given they are from one subject and one class
#append these to a list to return as the n selected windows
#keeps the subject distribution balanced as well as the class distribution balanced
def take_first_n_windows(windows_sh, class_sh, group_sh):
windows_st = []
class_st = []
group_st = []

win_df = pd.DataFrame({
'pair_ix': range(len(windows_sh)),
'window_pair': windows_sh,
'subject': group_sh,
'target': class_sh
})
#select the window pairs from one subject from the win_df
subjects = np.unique(win_df['subject'])
for subject in subjects:
subject_windows = win_df[win_df['subject'] == subject]
#select the window pairs from one class from the subject
for label in ['0', '1']:
class_windows = subject_windows[subject_windows['target'] == label]
#take the first n window pairs, given they are from one subject and one class
n = 80
selected_windows = class_windows.iloc[:n]
windows_st.extend(selected_windows['window_pair'].values)
class_st.extend(selected_windows['target'].values)
group_st.extend(selected_windows['subject'].values)

pass
return windows_st, class_st, group_st

1 comment on commit de1776d

@jawardell
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.