Merge pull request #43 from biomarkersParkinson/main

Another attempt at initial release
biomarkersParkinson · Aug 22, 2024 · b737e7f · b737e7f
2 parents dfa5203 + 6f5d52c
commit b737e7f
Show file tree

Hide file tree

Showing 12 changed files with 118 additions and 105 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -23,12 +23,19 @@
     "sphinx.ext.napoleon",
     "sphinx.ext.viewcode",
 ]
+
 autoapi_dirs = ["../src"]
 
+# Include the following entities in the API documentation, this explicitly excludes 'imported-members',
+# as we don't want to clutter the documentation with all the imported members.
+# https://sphinx-autoapi.readthedocs.io/en/latest/reference/config.html#confval-autoapi_options
+autoapi_options = ['members', 'undoc-members', 'private-members', 'show-inheritance',
+                   'show-module-summary', 'special-members']
+
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+exclude_patterns = ["build", "Thumbs.db", ".DS_Store"]
 
 # -- Options for HTML output -------------------------------------------------
 

diff --git a/docs/index.md b/docs/index.md
@@ -1,13 +1,6 @@
 ```{include} ../README.md
 ```
 
-```{toctree}
-:maxdepth: 2
-:caption: TSDF schema
-tsdf_paradigma_schemas.md
-tsdf_paradigma_channels_and_units.md
-```
-
 ```{toctree}
 :maxdepth: 2
 :caption: Example notebooks
@@ -21,6 +14,13 @@ notebooks/ppg/ppg_analysis.ipynb
 autoapi/index
 ```
 
+```{toctree}
+:maxdepth: 2
+:caption: TSDF schema
+tsdf_paradigma_schemas.md
+tsdf_paradigma_channels_and_units.md
+```
+
 ```{toctree}
 :maxdepth: 2
 :caption: Development

diff --git a/docs/notebooks/gait/gait_analysis.ipynb b/docs/notebooks/gait/gait_analysis.ipynb
@@ -19,9 +19,9 @@
     "\n",
     "import os\n",
     "from paradigma.preprocessing_config import IMUPreprocessingConfig\n",
-    "from paradigma.gait_analysis import *\n",
-    "from paradigma.gait_analysis_config import *\n",
-    "from paradigma.imu_preprocessing import *"
+    "from paradigma.gait_analysis import extract_gait_features, detect_gait, extract_arm_swing_features, detect_arm_swing, quantify_arm_swing\n",
+    "from paradigma.gait_analysis_config import GaitFeatureExtractionConfig, GaitDetectionConfig, ArmSwingFeatureExtractionConfig, ArmSwingDetectionConfig, ArmSwingQuantificationConfig\n",
+    "from paradigma.imu_preprocessing import preprocess_imu_data"
    ]
   },
   {

diff --git a/docs/notebooks/ppg/1-2-3_signal_quality.ipynb b/docs/notebooks/ppg/1-2-3_signal_quality.ipynb
@@ -43,8 +43,8 @@
     "\n",
     "import tsdf\n",
     "import paradigma\n",
-    "from paradigma import DataColumns\n",
-    "from paradigma.ppg_preprocessing import tsdf_scan_meta, synchronization, extract_overlapping_segments\n",
+    "from paradigma.constants import DataColumns\n",
+    "from paradigma.ppg_preprocessing import extract_meta_from_tsdf_files, synchronization, extract_overlapping_segments\n",
     "from paradigma.util import parse_iso8601_to_datetime"
    ]
   },
@@ -116,8 +116,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "meta_ppg = tsdf_scan_meta(input_path_ppg)\n",
-    "meta_imu = tsdf_scan_meta(input_path_imu)"
+    "meta_ppg = extract_meta_from_tsdf_files(input_path_ppg)\n",
+    "meta_imu = extract_meta_from_tsdf_files(input_path_imu)"
    ]
   },
   {
@@ -445,7 +445,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -482,7 +482,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "paradigma"
-version = "0.1.0"
+version = "0.2.0"
 description = "Paradigma - a toolbox for Digital Biomarkers for Parkinson's Disease"
 authors = [ "Peter Kok <[email protected]>",
             "Vedran Kasalica <[email protected]>",

diff --git a/src/paradigma/__init__.py b/src/paradigma/__init__.py
@@ -3,6 +3,4 @@
 
 __version__ = version("paradigma")
 
-from .imu_preprocessing import *
-
-__all__ = ["PreprocessingPipelineConfig"]
+__all__ = []
diff --git a/src/paradigma/feature_extraction.py b/src/paradigma/feature_extraction.py
@@ -3,7 +3,6 @@
 from sklearn.decomposition import PCA
 
 from scipy import signal, fft
-
 from scipy.integrate import cumulative_trapezoid
 from scipy.signal import find_peaks
 
@@ -600,4 +599,80 @@ def extract_peak_angular_velocity(
                     # compute the backward peak angular velocity, defined by the maximum positive angular velocity between the two peaks
                     df.loc[index, 'backward_peak_ang_vel'].append(np.abs(max(row[velocity_colname][l_extrema_indices[j]:l_extrema_indices[j+1]])))
 
-    return
+    return
+
+
+def extract_temporal_domain_features(config, df_windowed, l_gravity_stats=['mean', 'std']):
+    # compute the mean and standard deviation of the gravity component of the acceleration signal for each axis
+    for col in config.l_gravity_cols:
+        for stat in l_gravity_stats:
+            df_windowed[f'{col}_{stat}'] = generate_statistics(
+                sensor_col=df_windowed[col],
+                statistic=stat
+                )
+
+    # compute the standard deviation of the Euclidean norm of the three axes
+    df_windowed['std_norm_acc'] = generate_std_norm(
+        df=df_windowed,
+        cols=config.l_accelerometer_cols
+        )
+
+    return df_windowed
+
+
+def extract_spectral_domain_features(config, df_windowed, sensor, l_sensor_colnames):
+
+    for col in l_sensor_colnames:
+
+        # transform the temporal signal to the spectral domain using the fast fourier transform
+        df_windowed[f'{col}_freqs'], df_windowed[f'{col}_fft'] = signal_to_ffts(
+            sensor_col=df_windowed[col],
+            window_type=config.window_type,
+            sampling_frequency=config.sampling_frequency
+            )
+
+        # compute the power in distinct frequency bandwidths
+        for bandwidth, frequencies in config.d_frequency_bandwidths.items():
+            df_windowed[col+'_'+bandwidth] = df_windowed.apply(lambda x: compute_power_in_bandwidth(
+                sensor_col=x[col],
+                fmin=frequencies[0],
+                fmax=frequencies[1],
+                sampling_frequency=config.sampling_frequency,
+                window_type=config.window_type,
+                ), axis=1
+            )
+
+        # compute the dominant frequency, i.e., the frequency with the highest power
+        df_windowed[col+'_dominant_frequency'] = df_windowed.apply(lambda x: get_dominant_frequency(
+            signal_ffts=x[col+'_fft'], 
+            signal_freqs=x[col+'_freqs'],
+            fmin=config.spectrum_low_frequency,
+            fmax=config.spectrum_high_frequency
+            ), axis=1
+        )
+
+    # compute the power summed over the individual axes to obtain the total power per frequency bandwidth
+    for bandwidth in config.d_frequency_bandwidths.keys():
+        df_windowed['total_'+bandwidth] = df_windowed.apply(lambda x: sum(x[y+'_'+bandwidth] for y in l_sensor_colnames), axis=1)
+
+    # compute the power summed over the individual frequency bandwidths to obtain the total power
+    df_windowed['total_power'] = compute_power(
+        df=df_windowed,
+        fft_cols=[f'{col}_fft' for col in l_sensor_colnames])
+
+    # compute the cepstral coefficients of the total power signal
+    cc_cols = generate_cepstral_coefficients(
+        total_power_col=df_windowed['total_power'],
+        window_length_s=config.window_length_s,
+        sampling_frequency=config.sampling_frequency,
+        low_frequency=config.spectrum_low_frequency,
+        high_frequency=config.spectrum_high_frequency,
+        n_filters=config.n_dct_filters_cc,
+        n_coefficients=config.n_coefficients_cc
+        )
+
+    df_windowed = pd.concat([df_windowed, cc_cols], axis=1)
+
+    df_windowed = df_windowed.rename(columns={f'cc_{cc_nr}': f'cc_{cc_nr}_{sensor}' for cc_nr in range(1,config.n_coefficients_cc+1)}).rename(columns={'window_start': 'time'})
+
+    return df_windowed
diff --git a/src/paradigma/gait_analysis.py b/src/paradigma/gait_analysis.py
@@ -1,88 +1,20 @@
 import os
+import numpy as np
+import pandas as pd
 
 import tsdf
 
-from paradigma.gait_analysis_config import *
-from paradigma.feature_extraction import *
-from paradigma.quantification import *
-from paradigma.windowing import *
+from paradigma.constants import DataColumns
+from paradigma.gait_analysis_config import GaitFeatureExtractionConfig, GaitDetectionConfig, \
+    ArmSwingFeatureExtractionConfig, ArmSwingDetectionConfig, ArmSwingQuantificationConfig
+from paradigma.feature_extraction import extract_temporal_domain_features, \
+    extract_spectral_domain_features, pca_transform_gyroscope, compute_angle, \
+    remove_moving_average_angle, extract_angle_extremes, extract_range_of_motion, \
+    extract_peak_angular_velocity, signal_to_ffts, get_dominant_frequency, compute_perc_power
+from paradigma.quantification import aggregate_segments
+from paradigma.windowing import tabulate_windows, create_segments, discard_segments
 from paradigma.util import get_end_iso8601, write_data, read_metadata
 
-def extract_temporal_domain_features(config, df_windowed, l_gravity_stats=['mean', 'std']):
-    # compute the mean and standard deviation of the gravity component of the acceleration signal for each axis
-    for col in config.l_gravity_cols:
-        for stat in l_gravity_stats:
-            df_windowed[f'{col}_{stat}'] = generate_statistics(
-                sensor_col=df_windowed[col],
-                statistic=stat
-                )
-
-    # compute the standard deviation of the Euclidean norm of the three axes
-    df_windowed['std_norm_acc'] = generate_std_norm(
-        df=df_windowed,
-        cols=config.l_accelerometer_cols
-        )
-
-    return df_windowed
-
-
-def extract_spectral_domain_features(config, df_windowed, sensor, l_sensor_colnames):
-
-    for col in l_sensor_colnames:
-
-        # transform the temporal signal to the spectral domain using the fast fourier transform
-        df_windowed[f'{col}_freqs'], df_windowed[f'{col}_fft'] = signal_to_ffts(
-            sensor_col=df_windowed[col],
-            window_type=config.window_type,
-            sampling_frequency=config.sampling_frequency
-            )
-
-        # compute the power in distinct frequency bandwidths
-        for bandwidth, frequencies in config.d_frequency_bandwidths.items():
-            df_windowed[col+'_'+bandwidth] = df_windowed.apply(lambda x: compute_power_in_bandwidth(
-                sensor_col=x[col],
-                fmin=frequencies[0],
-                fmax=frequencies[1],
-                sampling_frequency=config.sampling_frequency,
-                window_type=config.window_type,
-                ), axis=1
-            )
-
-        # compute the dominant frequency, i.e., the frequency with the highest power
-        df_windowed[col+'_dominant_frequency'] = df_windowed.apply(lambda x: get_dominant_frequency(
-            signal_ffts=x[col+'_fft'], 
-            signal_freqs=x[col+'_freqs'],
-            fmin=config.spectrum_low_frequency,
-            fmax=config.spectrum_high_frequency
-            ), axis=1
-        )
-
-    # compute the power summed over the individual axes to obtain the total power per frequency bandwidth
-    for bandwidth in config.d_frequency_bandwidths.keys():
-        df_windowed['total_'+bandwidth] = df_windowed.apply(lambda x: sum(x[y+'_'+bandwidth] for y in l_sensor_colnames), axis=1)
-
-    # compute the power summed over the individual frequency bandwidths to obtain the total power
-    df_windowed['total_power'] = compute_power(
-        df=df_windowed,
-        fft_cols=[f'{col}_fft' for col in l_sensor_colnames])
-
-    # compute the cepstral coefficients of the total power signal
-    cc_cols = generate_cepstral_coefficients(
-        total_power_col=df_windowed['total_power'],
-        window_length_s=config.window_length_s,
-        sampling_frequency=config.sampling_frequency,
-        low_frequency=config.spectrum_low_frequency,
-        high_frequency=config.spectrum_high_frequency,
-        n_filters=config.n_dct_filters_cc,
-        n_coefficients=config.n_coefficients_cc
-        )
-
-    df_windowed = pd.concat([df_windowed, cc_cols], axis=1)
-
-    df_windowed = df_windowed.rename(columns={f'cc_{cc_nr}': f'cc_{cc_nr}_{sensor}' for cc_nr in range(1,config.n_coefficients_cc+1)}).rename(columns={'window_start': 'time'})
-
-    return df_windowed
-
 
 def extract_gait_features(input_path: str, output_path: str, config: GaitFeatureExtractionConfig) -> None:
     # load data

diff --git a/src/paradigma/gait_analysis_config.py b/src/paradigma/gait_analysis_config.py
@@ -1,5 +1,6 @@
 from typing import Dict, List
-from paradigma import DataColumns
+
+from paradigma.constants import DataColumns
 
 
 class GaitFeatureExtractionConfig:

diff --git a/src/paradigma/heart_rate_analysis.py b/src/paradigma/heart_rate_analysis.py
@@ -1,7 +1,6 @@
 from typing import List
 import numpy as np
 from scipy.signal import welch
-from scipy.signal.windows import hann
 from sklearn.preprocessing import StandardScaler
 from dateutil import parser
 

diff --git a/src/paradigma/imu_preprocessing.py b/src/paradigma/imu_preprocessing.py
@@ -136,6 +136,7 @@ def resample_data(
 ) -> pd.DataFrame:
     """
     Resamples the IMU data to the resampling frequency. The data is scaled before resampling.
+    
     Parameters
     ----------
     df : pd.DataFrame

diff --git a/src/paradigma/ppg_preprocessing.py b/src/paradigma/ppg_preprocessing.py
@@ -171,7 +171,7 @@ def extract_meta_from_tsdf_files(tsdf_data_dir : str) -> List[dict]:
 
     Examples
     --------
-    >>> tsdf_scan_meta('/path/to/tsdf_data')
+    >>> extract_meta_from_tsdf_files('/path/to/tsdf_data')
     [{'start_iso8601': '2021-06-27T16:52:20Z', 'end_iso8601': '2021-06-27T17:52:20Z'}, ...]
     """
     metas = []