Skip to content

Commit

Permalink
Merge pull request #43 from biomarkersParkinson/main
Browse files Browse the repository at this point in the history
Another attempt at initial release
  • Loading branch information
kretep authored Aug 22, 2024
2 parents dfa5203 + 6f5d52c commit b737e7f
Show file tree
Hide file tree
Showing 12 changed files with 118 additions and 105 deletions.
9 changes: 8 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,19 @@
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
]

autoapi_dirs = ["../src"]

# Include the following entities in the API documentation, this explicitly excludes 'imported-members',
# as we don't want to clutter the documentation with all the imported members.
# https://sphinx-autoapi.readthedocs.io/en/latest/reference/config.html#confval-autoapi_options
autoapi_options = ['members', 'undoc-members', 'private-members', 'show-inheritance',
'show-module-summary', 'special-members']

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
exclude_patterns = ["build", "Thumbs.db", ".DS_Store"]

# -- Options for HTML output -------------------------------------------------

Expand Down
14 changes: 7 additions & 7 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
```{include} ../README.md
```

```{toctree}
:maxdepth: 2
:caption: TSDF schema
tsdf_paradigma_schemas.md
tsdf_paradigma_channels_and_units.md
```

```{toctree}
:maxdepth: 2
:caption: Example notebooks
Expand All @@ -21,6 +14,13 @@ notebooks/ppg/ppg_analysis.ipynb
autoapi/index
```

```{toctree}
:maxdepth: 2
:caption: TSDF schema
tsdf_paradigma_schemas.md
tsdf_paradigma_channels_and_units.md
```

```{toctree}
:maxdepth: 2
:caption: Development
Expand Down
6 changes: 3 additions & 3 deletions docs/notebooks/gait/gait_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
"\n",
"import os\n",
"from paradigma.preprocessing_config import IMUPreprocessingConfig\n",
"from paradigma.gait_analysis import *\n",
"from paradigma.gait_analysis_config import *\n",
"from paradigma.imu_preprocessing import *"
"from paradigma.gait_analysis import extract_gait_features, detect_gait, extract_arm_swing_features, detect_arm_swing, quantify_arm_swing\n",
"from paradigma.gait_analysis_config import GaitFeatureExtractionConfig, GaitDetectionConfig, ArmSwingFeatureExtractionConfig, ArmSwingDetectionConfig, ArmSwingQuantificationConfig\n",
"from paradigma.imu_preprocessing import preprocess_imu_data"
]
},
{
Expand Down
12 changes: 6 additions & 6 deletions docs/notebooks/ppg/1-2-3_signal_quality.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
"\n",
"import tsdf\n",
"import paradigma\n",
"from paradigma import DataColumns\n",
"from paradigma.ppg_preprocessing import tsdf_scan_meta, synchronization, extract_overlapping_segments\n",
"from paradigma.constants import DataColumns\n",
"from paradigma.ppg_preprocessing import extract_meta_from_tsdf_files, synchronization, extract_overlapping_segments\n",
"from paradigma.util import parse_iso8601_to_datetime"
]
},
Expand Down Expand Up @@ -116,8 +116,8 @@
"metadata": {},
"outputs": [],
"source": [
"meta_ppg = tsdf_scan_meta(input_path_ppg)\n",
"meta_imu = tsdf_scan_meta(input_path_imu)"
"meta_ppg = extract_meta_from_tsdf_files(input_path_ppg)\n",
"meta_imu = extract_meta_from_tsdf_files(input_path_imu)"
]
},
{
Expand Down Expand Up @@ -445,7 +445,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -482,7 +482,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "paradigma"
version = "0.1.0"
version = "0.2.0"
description = "Paradigma - a toolbox for Digital Biomarkers for Parkinson's Disease"
authors = [ "Peter Kok <[email protected]>",
"Vedran Kasalica <[email protected]>",
Expand Down
4 changes: 1 addition & 3 deletions src/paradigma/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@

__version__ = version("paradigma")

from .imu_preprocessing import *

__all__ = ["PreprocessingPipelineConfig"]
__all__ = []
79 changes: 77 additions & 2 deletions src/paradigma/feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from sklearn.decomposition import PCA

from scipy import signal, fft

from scipy.integrate import cumulative_trapezoid
from scipy.signal import find_peaks

Expand Down Expand Up @@ -600,4 +599,80 @@ def extract_peak_angular_velocity(
# compute the backward peak angular velocity, defined by the maximum positive angular velocity between the two peaks
df.loc[index, 'backward_peak_ang_vel'].append(np.abs(max(row[velocity_colname][l_extrema_indices[j]:l_extrema_indices[j+1]])))

return
return


def extract_temporal_domain_features(config, df_windowed, l_gravity_stats=['mean', 'std']):
# compute the mean and standard deviation of the gravity component of the acceleration signal for each axis
for col in config.l_gravity_cols:
for stat in l_gravity_stats:
df_windowed[f'{col}_{stat}'] = generate_statistics(
sensor_col=df_windowed[col],
statistic=stat
)

# compute the standard deviation of the Euclidean norm of the three axes
df_windowed['std_norm_acc'] = generate_std_norm(
df=df_windowed,
cols=config.l_accelerometer_cols
)

return df_windowed


def extract_spectral_domain_features(config, df_windowed, sensor, l_sensor_colnames):

for col in l_sensor_colnames:

# transform the temporal signal to the spectral domain using the fast fourier transform
df_windowed[f'{col}_freqs'], df_windowed[f'{col}_fft'] = signal_to_ffts(
sensor_col=df_windowed[col],
window_type=config.window_type,
sampling_frequency=config.sampling_frequency
)

# compute the power in distinct frequency bandwidths
for bandwidth, frequencies in config.d_frequency_bandwidths.items():
df_windowed[col+'_'+bandwidth] = df_windowed.apply(lambda x: compute_power_in_bandwidth(
sensor_col=x[col],
fmin=frequencies[0],
fmax=frequencies[1],
sampling_frequency=config.sampling_frequency,
window_type=config.window_type,
), axis=1
)

# compute the dominant frequency, i.e., the frequency with the highest power
df_windowed[col+'_dominant_frequency'] = df_windowed.apply(lambda x: get_dominant_frequency(
signal_ffts=x[col+'_fft'],
signal_freqs=x[col+'_freqs'],
fmin=config.spectrum_low_frequency,
fmax=config.spectrum_high_frequency
), axis=1
)

# compute the power summed over the individual axes to obtain the total power per frequency bandwidth
for bandwidth in config.d_frequency_bandwidths.keys():
df_windowed['total_'+bandwidth] = df_windowed.apply(lambda x: sum(x[y+'_'+bandwidth] for y in l_sensor_colnames), axis=1)

# compute the power summed over the individual frequency bandwidths to obtain the total power
df_windowed['total_power'] = compute_power(
df=df_windowed,
fft_cols=[f'{col}_fft' for col in l_sensor_colnames])

# compute the cepstral coefficients of the total power signal
cc_cols = generate_cepstral_coefficients(
total_power_col=df_windowed['total_power'],
window_length_s=config.window_length_s,
sampling_frequency=config.sampling_frequency,
low_frequency=config.spectrum_low_frequency,
high_frequency=config.spectrum_high_frequency,
n_filters=config.n_dct_filters_cc,
n_coefficients=config.n_coefficients_cc
)

df_windowed = pd.concat([df_windowed, cc_cols], axis=1)

df_windowed = df_windowed.rename(columns={f'cc_{cc_nr}': f'cc_{cc_nr}_{sensor}' for cc_nr in range(1,config.n_coefficients_cc+1)}).rename(columns={'window_start': 'time'})

return df_windowed
90 changes: 11 additions & 79 deletions src/paradigma/gait_analysis.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,20 @@
import os
import numpy as np
import pandas as pd

import tsdf

from paradigma.gait_analysis_config import *
from paradigma.feature_extraction import *
from paradigma.quantification import *
from paradigma.windowing import *
from paradigma.constants import DataColumns
from paradigma.gait_analysis_config import GaitFeatureExtractionConfig, GaitDetectionConfig, \
ArmSwingFeatureExtractionConfig, ArmSwingDetectionConfig, ArmSwingQuantificationConfig
from paradigma.feature_extraction import extract_temporal_domain_features, \
extract_spectral_domain_features, pca_transform_gyroscope, compute_angle, \
remove_moving_average_angle, extract_angle_extremes, extract_range_of_motion, \
extract_peak_angular_velocity, signal_to_ffts, get_dominant_frequency, compute_perc_power
from paradigma.quantification import aggregate_segments
from paradigma.windowing import tabulate_windows, create_segments, discard_segments
from paradigma.util import get_end_iso8601, write_data, read_metadata

def extract_temporal_domain_features(config, df_windowed, l_gravity_stats=['mean', 'std']):
# compute the mean and standard deviation of the gravity component of the acceleration signal for each axis
for col in config.l_gravity_cols:
for stat in l_gravity_stats:
df_windowed[f'{col}_{stat}'] = generate_statistics(
sensor_col=df_windowed[col],
statistic=stat
)

# compute the standard deviation of the Euclidean norm of the three axes
df_windowed['std_norm_acc'] = generate_std_norm(
df=df_windowed,
cols=config.l_accelerometer_cols
)

return df_windowed


def extract_spectral_domain_features(config, df_windowed, sensor, l_sensor_colnames):

for col in l_sensor_colnames:

# transform the temporal signal to the spectral domain using the fast fourier transform
df_windowed[f'{col}_freqs'], df_windowed[f'{col}_fft'] = signal_to_ffts(
sensor_col=df_windowed[col],
window_type=config.window_type,
sampling_frequency=config.sampling_frequency
)

# compute the power in distinct frequency bandwidths
for bandwidth, frequencies in config.d_frequency_bandwidths.items():
df_windowed[col+'_'+bandwidth] = df_windowed.apply(lambda x: compute_power_in_bandwidth(
sensor_col=x[col],
fmin=frequencies[0],
fmax=frequencies[1],
sampling_frequency=config.sampling_frequency,
window_type=config.window_type,
), axis=1
)

# compute the dominant frequency, i.e., the frequency with the highest power
df_windowed[col+'_dominant_frequency'] = df_windowed.apply(lambda x: get_dominant_frequency(
signal_ffts=x[col+'_fft'],
signal_freqs=x[col+'_freqs'],
fmin=config.spectrum_low_frequency,
fmax=config.spectrum_high_frequency
), axis=1
)

# compute the power summed over the individual axes to obtain the total power per frequency bandwidth
for bandwidth in config.d_frequency_bandwidths.keys():
df_windowed['total_'+bandwidth] = df_windowed.apply(lambda x: sum(x[y+'_'+bandwidth] for y in l_sensor_colnames), axis=1)

# compute the power summed over the individual frequency bandwidths to obtain the total power
df_windowed['total_power'] = compute_power(
df=df_windowed,
fft_cols=[f'{col}_fft' for col in l_sensor_colnames])

# compute the cepstral coefficients of the total power signal
cc_cols = generate_cepstral_coefficients(
total_power_col=df_windowed['total_power'],
window_length_s=config.window_length_s,
sampling_frequency=config.sampling_frequency,
low_frequency=config.spectrum_low_frequency,
high_frequency=config.spectrum_high_frequency,
n_filters=config.n_dct_filters_cc,
n_coefficients=config.n_coefficients_cc
)

df_windowed = pd.concat([df_windowed, cc_cols], axis=1)

df_windowed = df_windowed.rename(columns={f'cc_{cc_nr}': f'cc_{cc_nr}_{sensor}' for cc_nr in range(1,config.n_coefficients_cc+1)}).rename(columns={'window_start': 'time'})

return df_windowed


def extract_gait_features(input_path: str, output_path: str, config: GaitFeatureExtractionConfig) -> None:
# load data
Expand Down
3 changes: 2 additions & 1 deletion src/paradigma/gait_analysis_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Dict, List
from paradigma import DataColumns

from paradigma.constants import DataColumns


class GaitFeatureExtractionConfig:
Expand Down
1 change: 0 additions & 1 deletion src/paradigma/heart_rate_analysis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import List
import numpy as np
from scipy.signal import welch
from scipy.signal.windows import hann
from sklearn.preprocessing import StandardScaler
from dateutil import parser

Expand Down
1 change: 1 addition & 0 deletions src/paradigma/imu_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def resample_data(
) -> pd.DataFrame:
"""
Resamples the IMU data to the resampling frequency. The data is scaled before resampling.
Parameters
----------
df : pd.DataFrame
Expand Down
2 changes: 1 addition & 1 deletion src/paradigma/ppg_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def extract_meta_from_tsdf_files(tsdf_data_dir : str) -> List[dict]:
Examples
--------
>>> tsdf_scan_meta('/path/to/tsdf_data')
>>> extract_meta_from_tsdf_files('/path/to/tsdf_data')
[{'start_iso8601': '2021-06-27T16:52:20Z', 'end_iso8601': '2021-06-27T17:52:20Z'}, ...]
"""
metas = []
Expand Down

0 comments on commit b737e7f

Please sign in to comment.