Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add I/O wrappers to high-level gait functions #66

Merged
merged 10 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions docs/notebooks/gait/gait_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -19,14 +19,14 @@
"\n",
"import os\n",
"from paradigma.preprocessing_config import IMUPreprocessingConfig\n",
"from paradigma.gait_analysis import extract_gait_features, detect_gait, extract_arm_swing_features, detect_arm_swing, quantify_arm_swing\n",
"from paradigma.gait_analysis import extract_gait_features_io, detect_gait_io, extract_arm_swing_features_io, detect_arm_swing_io, quantify_arm_swing_io\n",
"from paradigma.gait_analysis_config import GaitFeatureExtractionConfig, GaitDetectionConfig, ArmSwingFeatureExtractionConfig, ArmSwingDetectionConfig, ArmSwingQuantificationConfig\n",
"from paradigma.imu_preprocessing import preprocess_imu_data"
"from paradigma.imu_preprocessing import preprocess_imu_data_io"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 5,
"metadata": {
"tags": [
"parameters"
Expand Down Expand Up @@ -54,12 +54,12 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"config = IMUPreprocessingConfig()\n",
"preprocess_imu_data(path_to_sensor_data, path_to_preprocessed_data, config)"
"preprocess_imu_data_io(path_to_sensor_data, path_to_preprocessed_data, config)"
]
},
{
Expand All @@ -71,13 +71,13 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"config = GaitFeatureExtractionConfig()\n",
"#config.set_sampling_frequency(50)\n",
"extract_gait_features(path_to_preprocessed_data, path_to_extracted_features, config)"
"extract_gait_features_io(path_to_preprocessed_data, path_to_extracted_features, config)"
]
},
{
Expand All @@ -89,12 +89,12 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"config = GaitDetectionConfig()\n",
"detect_gait(path_to_extracted_features, path_to_predictions, path_to_classifier, config)"
"detect_gait_io(path_to_extracted_features, path_to_predictions, path_to_classifier, config)"
]
},
{
Expand All @@ -106,12 +106,12 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"config = ArmSwingFeatureExtractionConfig()\n",
"extract_arm_swing_features(path_to_preprocessed_data, path_to_extracted_features, config)"
"extract_arm_swing_features_io(path_to_preprocessed_data, path_to_extracted_features, config)"
]
},
{
Expand All @@ -123,12 +123,12 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"config = ArmSwingDetectionConfig()\n",
"detect_arm_swing(path_to_extracted_features, path_to_predictions, path_to_classifier, config)"
"detect_arm_swing_io(path_to_extracted_features, path_to_predictions, path_to_classifier, config)"
]
},
{
Expand All @@ -140,12 +140,12 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"config = ArmSwingQuantificationConfig()\n",
"quantify_arm_swing(path_to_extracted_features, path_to_predictions, path_to_quantification, config)"
"quantify_arm_swing_io(path_to_extracted_features, path_to_predictions, path_to_quantification, config)"
]
}
],
Expand All @@ -165,7 +165,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.11.5"
}
},
"nbformat": 4,
Expand Down
155 changes: 93 additions & 62 deletions src/paradigma/gait_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import pandas as pd
from pathlib import Path
from typing import Union
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

import tsdf

Expand All @@ -18,11 +20,7 @@
from paradigma.util import get_end_iso8601, write_data, read_metadata


def extract_gait_features(input_path: Union[str, Path], output_path: Union[str, Path], config: GaitFeatureExtractionConfig) -> None:
# load data
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename)
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

def extract_gait_features(df: pd.DataFrame, config: GaitFeatureExtractionConfig) -> pd.DataFrame:
# group sequences of timestamps into windows
df_windowed = tabulate_windows(
df=df,
Expand All @@ -41,6 +39,18 @@ def extract_gait_features(input_path: Union[str, Path], output_path: Union[str,
# and extract spectral features
df_windowed = extract_spectral_domain_features(config, df_windowed, config.sensor, config.l_accelerometer_cols)

return df_windowed


def extract_gait_features_io(input_path: Union[str, Path], output_path: Union[str, Path], config: GaitFeatureExtractionConfig) -> None:
# Load data
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename)
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

# Extract gait features
df_windowed = extract_gait_features(df, config)

# Store data
end_iso8601 = get_end_iso8601(start_iso8601=metadata_time.start_iso8601,
window_length_seconds=int(df_windowed[config.time_colname][-1:].values[0] + config.window_length_s))

Expand All @@ -58,12 +68,7 @@ def extract_gait_features(input_path: Union[str, Path], output_path: Union[str,
write_data(metadata_time, metadata_samples, output_path, 'gait_meta.json', df_windowed)


def detect_gait(input_path: Union[str, Path], output_path: Union[str, Path], path_to_classifier_input: Union[str, Path], config: GaitDetectionConfig) -> None:

# Load the data
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename)
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

def detect_gait(df: pd.DataFrame, config: GaitDetectionConfig, path_to_classifier_input: Union[str, Path]) -> pd.DataFrame:
# Initialize the classifier
clf = pd.read_pickle(os.path.join(path_to_classifier_input, config.classifier_file_name))
with open(os.path.join(path_to_classifier_input, config.thresholds_file_name), 'r') as f:
Expand All @@ -80,7 +85,18 @@ def detect_gait(input_path: Union[str, Path], output_path: Union[str, Path], pat

# Make prediction
df['pred_gait_proba'] = clf.predict_proba(X)[:, 1]
df['pred_gait'] = df['pred_gait_proba'] > threshold
df['pred_gait'] = df['pred_gait_proba'] >= threshold
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sharp ;)


return df


def detect_gait_io(input_path: Union[str, Path], output_path: Union[str, Path], path_to_classifier_input: Union[str, Path], config: GaitDetectionConfig) -> None:

# Load the data
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename)
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

df = detect_gait(df, config, path_to_classifier_input)

# Prepare the metadata
metadata_samples.file_name = 'gait_values.bin'
Expand All @@ -95,22 +111,7 @@ def detect_gait(input_path: Union[str, Path], output_path: Union[str, Path], pat
write_data(metadata_time, metadata_samples, output_path, 'gait_meta.json', df)


def extract_arm_swing_features(input_path: Union[str, Path], output_path: Union[str, Path], config: ArmSwingFeatureExtractionConfig) -> None:
# load accelerometer and gyroscope data
l_dfs = []
for sensor in ['accelerometer', 'gyroscope']:
config.set_sensor(sensor)
meta_filename = f'{sensor}_meta.json'
values_filename = f'{sensor}_samples.bin'
time_filename = f'{sensor}_time.bin'

metadata_dict = tsdf.load_metadata_from_path(os.path.join(input_path, meta_filename))
metadata_time = metadata_dict[time_filename]
metadata_samples = metadata_dict[values_filename]
l_dfs.append(tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns))

df = pd.merge(l_dfs[0], l_dfs[1], on=config.time_colname)

def extract_arm_swing_features(df: pd.DataFrame, config: ArmSwingFeatureExtractionConfig) -> pd.DataFrame:
# temporary add "random" predictions
df[config.pred_gait_colname] = np.concatenate([np.repeat([1], df.shape[0]//3), np.repeat([0], df.shape[0]//3), np.repeat([1], df.shape[0] + 1 - 2*df.shape[0]//3)], axis=0)

Expand Down Expand Up @@ -257,6 +258,27 @@ def extract_arm_swing_features(input_path: Union[str, Path], output_path: Union[
for sensor, l_sensor_colnames in zip(['accelerometer', 'gyroscope'], [config.l_accelerometer_cols, config.l_gyroscope_cols]):
df_windowed = extract_spectral_domain_features(config, df_windowed, sensor, l_sensor_colnames)

return df_windowed


def extract_arm_swing_features_io(input_path: Union[str, Path], output_path: Union[str, Path], config: ArmSwingFeatureExtractionConfig) -> None:
# load accelerometer and gyroscope data
l_dfs = []
for sensor in ['accelerometer', 'gyroscope']:
config.set_sensor(sensor)
meta_filename = f'{sensor}_meta.json'
values_filename = f'{sensor}_samples.bin'
time_filename = f'{sensor}_time.bin'

metadata_dict = tsdf.load_metadata_from_path(os.path.join(input_path, meta_filename))
metadata_time = metadata_dict[time_filename]
metadata_samples = metadata_dict[values_filename]
l_dfs.append(tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns))

df = pd.merge(l_dfs[0], l_dfs[1], on=config.time_colname)

df_windowed = extract_arm_swing_features(df, config)

end_iso8601 = get_end_iso8601(metadata_samples.start_iso8601,
df_windowed[config.time_colname][-1:].values[0] + config.window_length_s)

Expand All @@ -274,13 +296,7 @@ def extract_arm_swing_features(input_path: Union[str, Path], output_path: Union[
write_data(metadata_time, metadata_samples, output_path, 'arm_swing_meta.json', df_windowed)


def detect_arm_swing(input_path: Union[str, Path], output_path: Union[str, Path], path_to_classifier_input: Union[str, Path], config: ArmSwingDetectionConfig) -> None:
# Load the data
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename)
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

# Initialize the classifier
clf = pd.read_pickle(os.path.join(path_to_classifier_input, config.classifier_file_name))
def detect_arm_swing(df: pd.DataFrame, config: ArmSwingDetectionConfig, clf: Union[LogisticRegression, RandomForestClassifier]) -> pd.DataFrame:

# Prepare the data
clf.feature_names_in_ = ['std_norm_acc'] + [f'{x}_power_below_gait' for x in config.l_accelerometer_cols] + \
Expand All @@ -292,13 +308,23 @@ def detect_arm_swing(input_path: Union[str, Path], output_path: Union[str, Path]
['range_of_motion', 'forward_peak_ang_vel_mean', 'backward_peak_ang_vel_mean', 'forward_peak_ang_vel_std',
'backward_peak_ang_vel_std', 'angle_perc_power', 'angle_dominant_frequency'] + \
[f'{x}_dominant_frequency' for x in config.l_accelerometer_cols]

X = df.loc[:, clf.feature_names_in_]

# Make prediction
# df['pred_arm_swing_proba'] = clf.predict_proba(X)[:, 1]
df['pred_arm_swing'] = clf.predict(X)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is dit alleen een label of een probability?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dit is een probability (zie 'proba' in feature name). De reden dat ik deze weg haal vergt misschien wat uitleg.

Windows zijn overlapping, dus om een probability van een timestamp te bepalen pak je de windows die over deze timestamp heen gaan en pakt de mean. Dit kan je het beste doen met de probabilities, omdat de mean van de rounded predictions een ander resultaat kan geven. Bij PPP kijken we alleen niet per se naar voorspellingen per seconde, of per timestamp, en meer over een langere periode, dus is het naar mijn mening niet meer van belang om te dealen met overlapping windows. In dat geval kan je dus gewoon de rounded prediction pakken, in plaats van de probability.


return df

def detect_arm_swing_io(input_path: Union[str, Path], output_path: Union[str, Path], path_to_classifier_input: Union[str, Path], config: ArmSwingDetectionConfig) -> None:
# Load the data
metadata_time, metadata_samples = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename)
df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

# Load the classifier
clf = pd.read_pickle(os.path.join(path_to_classifier_input, config.classifier_file_name))

df = detect_arm_swing(df, config, clf)

# Prepare the metadata
metadata_samples.file_name = 'arm_swing_values.bin'
metadata_time.file_name = 'arm_swing_time.bin'
Expand All @@ -312,31 +338,7 @@ def detect_arm_swing(input_path: Union[str, Path], output_path: Union[str, Path]
write_data(metadata_time, metadata_samples, output_path, 'arm_swing_meta.json', df)


def quantify_arm_swing(path_to_feature_input: Union[str, Path], path_to_prediction_input: Union[str, Path], output_path: Union[str, Path], config: ArmSwingQuantificationConfig) -> None:
# Load the features & predictions
metadata_time, metadata_samples = read_metadata(path_to_feature_input, config.meta_filename, config.time_filename, config.values_filename)
df_features = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

metadata_dict = tsdf.load_metadata_from_path(os.path.join(path_to_prediction_input, config.meta_filename))
metadata_time = metadata_dict[config.time_filename]
metadata_samples = metadata_dict[config.values_filename]
df_predictions = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

# Validate
# dataframes have same length
assert df_features.shape[0] == df_predictions.shape[0]

# dataframes have same time column
assert df_features['time'].equals(df_predictions['time'])

# Prepare the data

# subset features
l_feature_cols = ['time', 'range_of_motion', 'forward_peak_ang_vel_mean', 'backward_peak_ang_vel_mean']
df_features = df_features[l_feature_cols]

# concatenate features and predictions
df = pd.concat([df_features, df_predictions[config.pred_arm_swing_colname]], axis=1)
def quantify_arm_swing(df: pd.DataFrame, config: ArmSwingQuantificationConfig) -> pd.DataFrame:

# temporarily for testing: manually determine predictions
df[config.pred_arm_swing_colname] = np.concatenate([np.repeat([1], df.shape[0]//3), np.repeat([0], df.shape[0]//3), np.repeat([1], df.shape[0] - 2*df.shape[0]//3)], axis=0)
Expand Down Expand Up @@ -379,6 +381,35 @@ def quantify_arm_swing(path_to_feature_input: Union[str, Path], path_to_predicti
df_aggregates['segment_duration_ms'] = df_aggregates['segment_duration_s'] * 1000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this necessary for your code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was already present in the main branch. However, I think the reason why this is useful (especially in tsdf) is because the time column can now be stored as integer, instead of float.

df_aggregates = df_aggregates.drop(columns=['segment_nr'])

return df_aggregates


def quantify_arm_swing_io(path_to_feature_input: Union[str, Path], path_to_prediction_input: Union[str, Path], output_path: Union[str, Path], config: ArmSwingQuantificationConfig) -> None:
# Load the features & predictions
metadata_time, metadata_samples = read_metadata(path_to_feature_input, config.meta_filename, config.time_filename, config.values_filename)
df_features = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

metadata_dict = tsdf.load_metadata_from_path(os.path.join(path_to_prediction_input, config.meta_filename))
metadata_time = metadata_dict[config.time_filename]
metadata_samples = metadata_dict[config.values_filename]
df_predictions = tsdf.load_dataframe_from_binaries([metadata_time, metadata_samples], tsdf.constants.ConcatenationType.columns)

# Validate
# Dataframes have same length
assert df_features.shape[0] == df_predictions.shape[0]
Copy link
Contributor

@KarsVeldkamp KarsVeldkamp Oct 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kan dit ook niet zo zijn dan? Of is dit redundant?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In principe kan dit niet anders zijn, dus is het redundant. Het is een test die we in een heel vroeg stadium hebben toegevoegd, en die we later wel kunnen verwijderen als we weten dat het geen invloed heeft. Echter is het wel van belang dat ze hetzelfde zijn, anders kan de concatenation in een later stadium niet werken. Eigenlijk werkt de concatenation wel, maar krijg je vreemde resultaten, wat nog erger is (daarom heb je liever een warning/error).


# Dataframes have same time column
assert df_features['time'].equals(df_predictions['time'])

# Subset features
l_feature_cols = ['time', 'range_of_motion', 'forward_peak_ang_vel_mean', 'backward_peak_ang_vel_mean']
df_features = df_features[l_feature_cols]

# Concatenate features and predictions
df = pd.concat([df_features, df_predictions[config.pred_arm_swing_colname]], axis=1)

df_aggregates = quantify_arm_swing(df, config)

# Store data
metadata_samples.file_name = 'arm_swing_values.bin'
metadata_time.file_name = 'arm_swing_time.bin'
Expand Down
Loading