-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add I/O wrappers to high-level gait functions #66
Changes from 1 commit
916de1d
7c85f6d
b1a9bff
9c2fcd9
b28d277
21e4d66
51e82c3
fd6c9cf
d577410
7102355
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,8 @@ | |
import pandas as pd | ||
from pathlib import Path | ||
from typing import Union | ||
from sklearn.linear_model import LogisticRegression | ||
from sklearn.ensemble import RandomForestClassifier | ||
|
||
import tsdf | ||
|
||
|
@@ -18,11 +20,7 @@ | |
from paradigma.util import get_end_iso8601, write_data, read_metadata | ||
|
||
|
||
def extract_gait_features(input_path: Union[str, Path], output_path: Union[str, Path], config: GaitFeatureExtractionConfig) -> None: | ||
# load data | ||
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename) | ||
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns) | ||
|
||
def extract_gait_features(df: pd.DataFrame, config: GaitFeatureExtractionConfig) -> pd.DataFrame: | ||
# group sequences of timestamps into windows | ||
df_windowed = tabulate_windows( | ||
df=df, | ||
|
@@ -41,6 +39,18 @@ def extract_gait_features(input_path: Union[str, Path], output_path: Union[str, | |
# and extract spectral features | ||
df_windowed = extract_spectral_domain_features(config, df_windowed, config.sensor, config.l_accelerometer_cols) | ||
|
||
return df_windowed | ||
|
||
|
||
def extract_gait_features_io(input_path: Union[str, Path], output_path: Union[str, Path], config: GaitFeatureExtractionConfig) -> None: | ||
# Load data | ||
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename) | ||
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns) | ||
|
||
# Extract gait features | ||
df_windowed = extract_gait_features(df, config) | ||
|
||
# Store data | ||
end_iso8601 = get_end_iso8601(start_iso8601=metadata_time.start_iso8601, | ||
window_length_seconds=int(df_windowed[config.time_colname][-1:].values[0] + config.window_length_s)) | ||
|
||
|
@@ -58,12 +68,7 @@ def extract_gait_features(input_path: Union[str, Path], output_path: Union[str, | |
write_data(metadata_time, metadata_samples, output_path, 'gait_meta.json', df_windowed) | ||
|
||
|
||
def detect_gait(input_path: Union[str, Path], output_path: Union[str, Path], path_to_classifier_input: Union[str, Path], config: GaitDetectionConfig) -> None: | ||
|
||
# Load the data | ||
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename) | ||
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns) | ||
|
||
def detect_gait(df: pd.DataFrame, config: GaitDetectionConfig, path_to_classifier_input: Union[str, Path]) -> pd.DataFrame: | ||
# Initialize the classifier | ||
clf = pd.read_pickle(os.path.join(path_to_classifier_input, config.classifier_file_name)) | ||
with open(os.path.join(path_to_classifier_input, config.thresholds_file_name), 'r') as f: | ||
|
@@ -80,7 +85,18 @@ def detect_gait(input_path: Union[str, Path], output_path: Union[str, Path], pat | |
|
||
# Make prediction | ||
df['pred_gait_proba'] = clf.predict_proba(X)[:, 1] | ||
df['pred_gait'] = df['pred_gait_proba'] > threshold | ||
df['pred_gait'] = df['pred_gait_proba'] >= threshold | ||
|
||
return df | ||
|
||
|
||
def detect_gait_io(input_path: Union[str, Path], output_path: Union[str, Path], path_to_classifier_input: Union[str, Path], config: GaitDetectionConfig) -> None: | ||
|
||
# Load the data | ||
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename) | ||
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns) | ||
|
||
df = detect_gait(df, config, path_to_classifier_input) | ||
|
||
# Prepare the metadata | ||
metadata_samples.file_name = 'gait_values.bin' | ||
|
@@ -95,22 +111,7 @@ def detect_gait(input_path: Union[str, Path], output_path: Union[str, Path], pat | |
write_data(metadata_time, metadata_samples, output_path, 'gait_meta.json', df) | ||
|
||
|
||
def extract_arm_swing_features(input_path: Union[str, Path], output_path: Union[str, Path], config: ArmSwingFeatureExtractionConfig) -> None: | ||
# load accelerometer and gyroscope data | ||
l_dfs = [] | ||
for sensor in ['accelerometer', 'gyroscope']: | ||
config.set_sensor(sensor) | ||
meta_filename = f'{sensor}_meta.json' | ||
values_filename = f'{sensor}_samples.bin' | ||
time_filename = f'{sensor}_time.bin' | ||
|
||
metadata_dict = tsdf.load_metadata_from_path(os.path.join(input_path, meta_filename)) | ||
metadata_time = metadata_dict[time_filename] | ||
metadata_samples = metadata_dict[values_filename] | ||
l_dfs.append(tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)) | ||
|
||
df = pd.merge(l_dfs[0], l_dfs[1], on=config.time_colname) | ||
|
||
def extract_arm_swing_features(df: pd.DataFrame, config: ArmSwingFeatureExtractionConfig) -> pd.DataFrame: | ||
# temporary add "random" predictions | ||
df[config.pred_gait_colname] = np.concatenate([np.repeat([1], df.shape[0]//3), np.repeat([0], df.shape[0]//3), np.repeat([1], df.shape[0] + 1 - 2*df.shape[0]//3)], axis=0) | ||
|
||
|
@@ -257,6 +258,27 @@ def extract_arm_swing_features(input_path: Union[str, Path], output_path: Union[ | |
for sensor, l_sensor_colnames in zip(['accelerometer', 'gyroscope'], [config.l_accelerometer_cols, config.l_gyroscope_cols]): | ||
df_windowed = extract_spectral_domain_features(config, df_windowed, sensor, l_sensor_colnames) | ||
|
||
return df_windowed | ||
|
||
|
||
def extract_arm_swing_features_io(input_path: Union[str, Path], output_path: Union[str, Path], config: ArmSwingFeatureExtractionConfig) -> None: | ||
# load accelerometer and gyroscope data | ||
l_dfs = [] | ||
for sensor in ['accelerometer', 'gyroscope']: | ||
config.set_sensor(sensor) | ||
meta_filename = f'{sensor}_meta.json' | ||
values_filename = f'{sensor}_samples.bin' | ||
time_filename = f'{sensor}_time.bin' | ||
|
||
metadata_dict = tsdf.load_metadata_from_path(os.path.join(input_path, meta_filename)) | ||
metadata_time = metadata_dict[time_filename] | ||
metadata_samples = metadata_dict[values_filename] | ||
l_dfs.append(tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)) | ||
|
||
df = pd.merge(l_dfs[0], l_dfs[1], on=config.time_colname) | ||
|
||
df_windowed = extract_arm_swing_features(df, config) | ||
|
||
end_iso8601 = get_end_iso8601(metadata_samples.start_iso8601, | ||
df_windowed[config.time_colname][-1:].values[0] + config.window_length_s) | ||
|
||
|
@@ -274,13 +296,7 @@ def extract_arm_swing_features(input_path: Union[str, Path], output_path: Union[ | |
write_data(metadata_time, metadata_samples, output_path, 'arm_swing_meta.json', df_windowed) | ||
|
||
|
||
def detect_arm_swing(input_path: Union[str, Path], output_path: Union[str, Path], path_to_classifier_input: Union[str, Path], config: ArmSwingDetectionConfig) -> None: | ||
# Load the data | ||
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename) | ||
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns) | ||
|
||
# Initialize the classifier | ||
clf = pd.read_pickle(os.path.join(path_to_classifier_input, config.classifier_file_name)) | ||
def detect_arm_swing(df: pd.DataFrame, config: ArmSwingDetectionConfig, clf: Union[LogisticRegression, RandomForestClassifier]) -> pd.DataFrame: | ||
|
||
# Prepare the data | ||
clf.feature_names_in_ = ['std_norm_acc'] + [f'{x}_power_below_gait' for x in config.l_accelerometer_cols] + \ | ||
|
@@ -292,13 +308,23 @@ def detect_arm_swing(input_path: Union[str, Path], output_path: Union[str, Path] | |
['range_of_motion', 'forward_peak_ang_vel_mean', 'backward_peak_ang_vel_mean', 'forward_peak_ang_vel_std', | ||
'backward_peak_ang_vel_std', 'angle_perc_power', 'angle_dominant_frequency'] + \ | ||
[f'{x}_dominant_frequency' for x in config.l_accelerometer_cols] | ||
|
||
X = df.loc[:, clf.feature_names_in_] | ||
|
||
# Make prediction | ||
# df['pred_arm_swing_proba'] = clf.predict_proba(X)[:, 1] | ||
df['pred_arm_swing'] = clf.predict(X) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is dit alleen een label of een probability? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dit is een probability (zie 'proba' in feature name). De reden dat ik deze weg haal vergt misschien wat uitleg. Windows zijn overlapping, dus om een probability van een timestamp te bepalen pak je de windows die over deze timestamp heen gaan en pakt de mean. Dit kan je het beste doen met de probabilities, omdat de mean van de rounded predictions een ander resultaat kan geven. Bij PPP kijken we alleen niet per se naar voorspellingen per seconde, of per timestamp, en meer over een langere periode, dus is het naar mijn mening niet meer van belang om te dealen met overlapping windows. In dat geval kan je dus gewoon de rounded prediction pakken, in plaats van de probability. |
||
|
||
return df | ||
|
||
def detect_arm_swing_io(input_path: Union[str, Path], output_path: Union[str, Path], path_to_classifier_input: Union[str, Path], config: ArmSwingDetectionConfig) -> None: | ||
# Load the data | ||
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename) | ||
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns) | ||
|
||
# Load the classifier | ||
clf = pd.read_pickle(os.path.join(path_to_classifier_input, config.classifier_file_name)) | ||
|
||
df = detect_arm_swing(df, config, clf) | ||
|
||
# Prepare the metadata | ||
metadata_samples.file_name = 'arm_swing_values.bin' | ||
metadata_time.file_name = 'arm_swing_time.bin' | ||
|
@@ -312,31 +338,7 @@ def detect_arm_swing(input_path: Union[str, Path], output_path: Union[str, Path] | |
write_data(metadata_time, metadata_samples, output_path, 'arm_swing_meta.json', df) | ||
|
||
|
||
def quantify_arm_swing(path_to_feature_input: Union[str, Path], path_to_prediction_input: Union[str, Path], output_path: Union[str, Path], config: ArmSwingQuantificationConfig) -> None: | ||
# Load the features & predictions | ||
metadata_time, metadata_samples = read_metadata(path_to_feature_input, config.meta_filename, config.time_filename, config.values_filename) | ||
df_features = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns) | ||
|
||
metadata_dict = tsdf.load_metadata_from_path(os.path.join(path_to_prediction_input, config.meta_filename)) | ||
metadata_time = metadata_dict[config.time_filename] | ||
metadata_samples = metadata_dict[config.values_filename] | ||
df_predictions = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns) | ||
|
||
# Validate | ||
# dataframes have same length | ||
assert df_features.shape[0] == df_predictions.shape[0] | ||
|
||
# dataframes have same time column | ||
assert df_features['time'].equals(df_predictions['time']) | ||
|
||
# Prepare the data | ||
|
||
# subset features | ||
l_feature_cols = ['time', 'range_of_motion', 'forward_peak_ang_vel_mean', 'backward_peak_ang_vel_mean'] | ||
df_features = df_features[l_feature_cols] | ||
|
||
# concatenate features and predictions | ||
df = pd.concat([df_features, df_predictions[config.pred_arm_swing_colname]], axis=1) | ||
def quantify_arm_swing(df: pd.DataFrame, config: ArmSwingQuantificationConfig) -> pd.DataFrame: | ||
|
||
# temporarily for testing: manually determine predictions | ||
df[config.pred_arm_swing_colname] = np.concatenate([np.repeat([1], df.shape[0]//3), np.repeat([0], df.shape[0]//3), np.repeat([1], df.shape[0] - 2*df.shape[0]//3)], axis=0) | ||
|
@@ -379,6 +381,35 @@ def quantify_arm_swing(path_to_feature_input: Union[str, Path], path_to_predicti | |
df_aggregates['segment_duration_ms'] = df_aggregates['segment_duration_s'] * 1000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this necessary for your code? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was already present in the main branch. However, I think the reason why this is useful (especially in tsdf) is because the time column can now be stored as integer, instead of float. |
||
df_aggregates = df_aggregates.drop(columns=['segment_nr']) | ||
|
||
return df_aggregates | ||
|
||
|
||
def quantify_arm_swing_io(path_to_feature_input: Union[str, Path], path_to_prediction_input: Union[str, Path], output_path: Union[str, Path], config: ArmSwingQuantificationConfig) -> None: | ||
# Load the features & predictions | ||
metadata_time, metadata_samples = read_metadata(path_to_feature_input, config.meta_filename, config.time_filename, config.values_filename) | ||
df_features = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns) | ||
|
||
metadata_dict = tsdf.load_metadata_from_path(os.path.join(path_to_prediction_input, config.meta_filename)) | ||
metadata_time = metadata_dict[config.time_filename] | ||
metadata_samples = metadata_dict[config.values_filename] | ||
df_predictions = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns) | ||
|
||
# Validate | ||
# Dataframes have same length | ||
assert df_features.shape[0] == df_predictions.shape[0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Kan dit ook niet zo zijn dan? Of is dit redundant? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In principe kan dit niet anders zijn, dus is het redundant. Het is een test die we in een heel vroeg stadium hebben toegevoegd, en die we later wel kunnen verwijderen als we weten dat het geen invloed heeft. Echter is het wel van belang dat ze hetzelfde zijn, anders kan de concatenation in een later stadium niet werken. Eigenlijk werkt de concatenation wel, maar krijg je vreemde resultaten, wat nog erger is (daarom heb je liever een warning/error). |
||
|
||
# Dataframes have same time column | ||
assert df_features['time'].equals(df_predictions['time']) | ||
|
||
# Subset features | ||
l_feature_cols = ['time', 'range_of_motion', 'forward_peak_ang_vel_mean', 'backward_peak_ang_vel_mean'] | ||
df_features = df_features[l_feature_cols] | ||
|
||
# Concatenate features and predictions | ||
df = pd.concat([df_features, df_predictions[config.pred_arm_swing_colname]], axis=1) | ||
|
||
df_aggregates = quantify_arm_swing(df, config) | ||
|
||
# Store data | ||
metadata_samples.file_name = 'arm_swing_values.bin' | ||
metadata_time.file_name = 'arm_swing_time.bin' | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sharp ;)