Skip to content

Commit

Permalink
Remove discarding segments for GD
Browse files Browse the repository at this point in the history
  • Loading branch information
Erikpostt committed Sep 24, 2024
1 parent 35b79f1 commit 2255651
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions src/paradigma/windowing.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def discard_segments(df, segment_nr_colname, min_length_segment_s, sampling_freq
valid_segments = segment_sizes[segment_sizes >= min_length_segment_s * sampling_frequency].index

# Filter the DataFrame to retain only valid segments
filtered_df = df[df[segment_nr_colname].isin(valid_segments)]
filtered_df = df[df[segment_nr_colname].isin(valid_segments)].copy()

# Reset the segment enumeration starting from 1
filtered_df[segment_nr_colname] = pd.factorize(filtered_df[segment_nr_colname])[0] + 1
Expand All @@ -142,8 +142,7 @@ def categorize_segments(df, segment_nr_colname, sampling_frequency):
long_segments_max_duration = 20 * sampling_frequency # 20 seconds

# Group by the segment column and apply the categorization
def categorize(group):
segment_size = len(group)
def categorize(segment_size):
if segment_size < short_segments_max_duration:
return 1
elif segment_size < moderately_long_segments_max_duration:
Expand All @@ -154,6 +153,6 @@ def categorize(group):
return 4

# Create the new category column
df_segments = df.groupby(segment_nr_colname).transform(lambda x: categorize(x))
segment_sizes = df[segment_nr_colname].value_counts().sort_index()

return pd.merge(df, df_segments, on=segment_nr_colname)
return df[segment_nr_colname].map(segment_sizes).apply(categorize)

0 comments on commit 2255651

Please sign in to comment.