biomarkersParkinson · KarsVeldkamp · Dec 3, 2024 · Dec 3, 2024 · Dec 3, 2024 · Dec 4, 2024
diff --git a/docs/matlab_scripts/ppg/1-2-3_signal_quality.m b/docs/matlab_scripts/ppg/1-2-3_signal_quality.m
@@ -107,7 +107,7 @@
     warning('Sample is of insufficient length!')
 else
     [v_ppg_pre, tr_ppg_pre] = preprocessing_ppg(tr_ppg, v_ppg, fs_ppg);   %  2 matrices, one for ppg (Nx1) and for time (Nx1) 
-    [v_imu_pre, tr_imu_pre] = preprocessing_imu(tr_imu, v_acc_scaled, fs_imu);   % 2 matrices, one for accel imu (Nx3) and for time (Nx1)
+    [v_acc_pre, tr_acc_pre] = preprocessing_imu(tr_imu, v_acc_scaled, fs_imu);   % 2 matrices, one for accel imu (Nx3) and for time (Nx1)
 end
 
 %% 5a. Write TSDF PPG preprocessing output

diff --git a/docs/matlab_scripts/ppg/1-4_HR_estimation.m b/docs/matlab_scripts/ppg/1-4_HR_estimation.m
@@ -16,7 +16,7 @@
 
 %% Initalization
 % Setting data paths + extracting metafilenames already
-clear all; close all; clc
+clearvars; close all; clc
 addpath(genpath('..\..\..\paradigma-toolbox'))       % Add git repository to the path
 addpath(genpath("..\..\..\\tsdf4matlab"))       % Add wrapper to the path
 warning('off','all')        % Turn off warnings to improve speed in spwvd especially
@@ -25,12 +25,12 @@
 unix_ticks_ms = 1000.0;
 fs_ppg = 30;     % Establish the sampling rate desired for resampling PPG --> now chosen to be fixed on 30 Hz
 
-raw_data_root = '..\..\tests\data\1.sensor_data\';
+raw_data_root = '..\..\..\tests\data\1.sensor_data\';
 ppp_data_path_ppg = [raw_data_root 'PPG\'];
 meta_ppg = tsdf_scan_meta(ppp_data_path_ppg);            % tsdf_scan_meta returns metafile struct containing information of all metafiles from all patients in tsdf_dirlist
 n_files_ppg = length(meta_ppg); 
 
-sqa_data_path = '..\..\tests\data\4.predictions\ppg'; % Set the path to the SQA data
+sqa_data_path = '..\..\..\tests\data\4.predictions\ppg'; % Set the path to the SQA data
 sqa_output_list = dir(fullfile(sqa_data_path, '*_meta.json'));              % seperate for the SQA output files
 
 meta_path_sqa = fullfile(sqa_output_list.folder, sqa_output_list.name); % Get the name of the SQA output file
@@ -106,7 +106,7 @@
 
 
     t_iso_ppg = metadata_list_ppg{time_idx_ppg}.start_iso8601;
-    datetime_ppg = datetime(t_iso_ppg, 'InputFormat', 'dd-MMM-yyyy HH:mm:ss z', 'TimeZone', 'UTC');
+    datetime_ppg = datetime(t_iso_ppg, 'InputFormat', 'yyyy-MM-dd''T''HH:mm:ssZ', 'TimeZone', 'UTC');
     ts_ppg = posixtime(datetime_ppg) * unix_ticks_ms;
 
     t_ppg = cumsum(double(data_list_ppg{time_idx_ppg})) + ts_ppg;
@@ -218,4 +218,4 @@
 meta_class{2} = metafile_values_hr;
 
 mat_metadata_file_name = "hr_est_meta.json";
-save_tsdf_data(meta_class, data_hr_est, location, mat_metadata_file_name);
+% save_tsdf_data(meta_class, data_hr_est, location, mat_metadata_file_name);
diff --git a/docs/notebooks/ppg/ppg_analysis.ipynb b/docs/notebooks/ppg/ppg_analysis.ipynb
@@ -9,23 +9,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Automatically reload modules\n",
-    "# %load_ext autoreload\n",
-    "# %autoreload 2\n",
-    "\n",
     "import os\n",
     "from paradigma.config import PPGConfig, IMUConfig, SignalQualityFeatureExtractionConfig, SignalQualityClassificationConfig, HeartRateExtractionConfig\n",
     "from paradigma.ppg_preprocessing import scan_and_sync_segments, preprocess_ppg_data\n",
-    "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features, signal_quality_classification"
+    "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features, signal_quality_classification, estimate_heart_rate"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {
     "tags": [
      "parameters"
@@ -48,22 +44,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Shape of the original data: (64775, 2) (72947, 4)\n",
+      "Shape of the overlapping segments: (64775, 2) (64361, 4)\n"
+     ]
+    }
+   ],
    "source": [
     "ppg_config = PPGConfig()\n",
     "imu_config = IMUConfig()\n",
     "metadatas_ppg, metadatas_imu = scan_and_sync_segments(os.path.join(path_to_sensor_data, 'ppg'),\n",
     "                                                       os.path.join(path_to_sensor_data, 'imu'))\n",
-    "df_ppg_proc, df_imu_proc=preprocess_ppg_data(metadatas_ppg[0], metadatas_imu[0],\n",
+    "df_ppg_proc, df_acc_proc=preprocess_ppg_data(metadatas_ppg[0], metadatas_imu[0],\n",
-    "df_ppg_proc, df_acc_proc=preprocess_ppg_data(metadatas_ppg[0], metadatas_imu[0],\n",
+    "df_ppg_proc, df_acc_proc = preprocess_ppg_data(metadatas_ppg[0], metadatas_imu[0],\n",
-    "df_ppg_proc, df_acc_proc=preprocess_ppg_data(metadatas_ppg[0], metadatas_imu[0],\n",
+    "df_ppg_proc, df_acc_proc = preprocess_ppg_data(metadatas_ppg[0], metadatas_imu[0],\n",
     "                    path_to_preprocessed_data,\n",
     "                    ppg_config, imu_config)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -73,18 +78,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
     "config = SignalQualityClassificationConfig()\n",
     "df = signal_quality_classification(df_windowed, config, path_to_classifier)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = HeartRateExtractionConfig()\n",
+    "df_hr = estimate_heart_rate(df, df_ppg_proc, config)"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "paradigma-Fn6RLG4_-py3.11",
+   "display_name": "paradigma-8ps1bg0Z-py3.12",
    "language": "python",
    "name": "python3"
   },
@@ -98,7 +113,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.12.6"
   }
  },
  "nbformat": 4,

diff --git a/src/paradigma/config.py b/src/paradigma/config.py
@@ -146,8 +146,6 @@ def __init__(self) -> None:
 
         self.window_length_s: int = 6
         self.window_step_size_s: int = 1
-        self.segment_gap_s = 1.5
-
 
 # Domain feature extraction configs
 class GaitFeatureExtractionConfig(GaitBaseConfig):
@@ -375,9 +373,8 @@ def __init__(self, min_window_length: float = 10) -> None:
         super().__init__()
 
          # Parameters for HR analysis
-        self.window_length_s: int = 6
-        self.window_step_size_s: int = 1
-        self.min_hr_samples = min_window_length * self.sampling_frequency
+        self.window_overlap_s = self.window_length_s - self.window_step_size_s
+        self.min_hr_samples = int(min_window_length * self.sampling_frequency)
         self.threshold_sqa = 0.5
 
         # Heart rate estimation parameters
@@ -389,12 +386,18 @@ def __init__(self, min_window_length: float = 10) -> None:
         self.kern_type = 'sep'
         win_type_doppler = 'hamm'
         win_type_lag = 'hamm'
-        win_length_doppler = 1
-        win_length_lag = 8
+        win_length_doppler = 8
+        win_length_lag = 1
         doppler_samples = self.sampling_frequency * win_length_doppler
         lag_samples = win_length_lag * self.sampling_frequency
-        self.kern_params = [
-            {'doppler_samples': doppler_samples, 'win_type_doppler': win_type_doppler}, 
-            {'lag_samples': lag_samples, 'win_type_lag': win_type_lag}
-        ]
+        self.kern_params = {
+            'doppler': {
+                'win_length': doppler_samples,
+                'win_type': win_type_doppler,
+            },
+            'lag': {
+                'win_length': lag_samples,
+                'win_type': win_type_lag,
+            }
+        }
 
diff --git a/src/paradigma/heart_rate/heart_rate_analysis.py b/src/paradigma/heart_rate/heart_rate_analysis.py
@@ -3,16 +3,18 @@
 
 import pandas as pd
 import os
+import numpy as np
 
 import tsdf
 import tsdf.constants 
-from paradigma.config import SignalQualityFeatureExtractionConfig, SignalQualityClassificationConfig
+from paradigma.heart_rate.heart_rate_analysis_config import SignalQualityFeatureExtractionConfig, SignalQualityClassificationConfig, HeartRateExtractionConfig
 from paradigma.util import read_metadata
+from paradigma.config import SignalQualityFeatureExtractionConfig, SignalQualityClassificationConfig, HeartRateExtractionConfig
 from paradigma.segmenting import tabulate_windows_legacy
 from paradigma.heart_rate.feature_extraction import extract_temporal_domain_features, extract_spectral_domain_features
+from paradigma.heart_rate.heart_rate_estimation import assign_sqa_label, extract_hr_segments, extract_hr_from_segment
 from paradigma.constants import DataColumns
 
-
 def extract_signal_quality_features(df: pd.DataFrame, config: SignalQualityFeatureExtractionConfig) -> pd.DataFrame:
     # Group sequences of timestamps into windows
     df_windowed = tabulate_windows_legacy(config, df)
@@ -31,7 +33,7 @@ def extract_signal_quality_features_io(input_path: Union[str, Path], output_path
     metadata_time, metadata_values = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename)
     df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_values], tsdf.constants.ConcatenationType.columns)
 
-    # Extract gait features
+    # Extract signal quality features
     df_windowed = extract_signal_quality_features(df, config)
     return df_windowed
 
@@ -74,7 +76,7 @@ def signal_quality_classification(df: pd.DataFrame, config: SignalQualityClassif
     # Make predictions for PPG signal quality assessment
     df[DataColumns.PRED_SQA_PROBA] = lr_clf.predict_proba(X_normalized)[:, 0]                   
     df.drop(columns = lr_clf.feature_names_in_, inplace=True)  # Drop the features used for classification since they are no longer needed
-    
+
     return df    
 
 
@@ -85,4 +87,60 @@ def signal_quality_classification_io(input_path: Union[str, Path], output_path:
     metadata_time, metadata_values = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename)
     df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_values], tsdf.constants.ConcatenationType.columns)
 
-    df = signal_quality_classification(df, config, path_to_classifier_input)
+    df = signal_quality_classification(df, config, path_to_classifier_input)
+
+
+def estimate_heart_rate(df: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config:HeartRateExtractionConfig) -> pd.DataFrame:  
-def estimate_heart_rate(df: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config:HeartRateExtractionConfig) -> pd.DataFrame:  
+def estimate_heart_rate(df: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config: HeartRateExtractionConfig) -> pd.DataFrame:  
-def estimate_heart_rate(df: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config:HeartRateExtractionConfig) -> pd.DataFrame:  
+def estimate_heart_rate(df: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config: HeartRateExtractionConfig) -> pd.DataFrame:  
+    """
+    Estimate the heart rate from the PPG signal using the time-frequency domain method.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The DataFrame containing the PPG signal.
+    df_ppg_preprocessed : pd.DataFrame
+        The DataFrame containing the preprocessed PPG signal.
+    config : HeartRateExtractionConfig
+        The configuration for the heart rate estimation.
+
+    Returns
+    -------
+    pd.DataFrame
+        The DataFrame containing the heart rate estimations.
+    """
+
+    # Assign window-level probabilities to individual samples
+    ppg_post_prob = df.loc[:, DataColumns.PRED_SQA_PROBA].to_numpy()
+    #acc_label = df.loc[:, DataColumns.ACCELEROMETER_LABEL].to_numpy() # Adjust later in data columns to get the correct label, should be first intergrated in feature extraction and classification
+
+    sqa_label = assign_sqa_label(ppg_post_prob, config)
+    v_start_idx, v_end_idx = extract_hr_segments(sqa_label, config.min_hr_samples)
+
+    fs = config.sampling_frequency
+
+    v_hr_rel = np.array([])
+    t_hr_rel = np.array([])
+
+    for start_idx, end_idx in zip(v_start_idx, v_end_idx):
+        # Skip if the epoch cannot be extended by 2s on both sides
+        if start_idx < 2 * fs or end_idx > len(df_ppg_preprocessed) - 2 * fs:
+            continue
+
+        # Extract the extended PPG segment for HR estimation
+        extended_ppg_segment = df_ppg_preprocessed[DataColumns.PPG][start_idx - 2 * fs : end_idx + 2 * fs]
+
+        # Perform HR estimation
+        hr_est = extract_hr_from_segment(extended_ppg_segment, config.tfd_length, fs, config.kern_type, config.kern_params)
+
+        # Generate HR estimation time array
+        rel_segment_time = df_ppg_preprocessed.time[start_idx:end_idx].values
+        n_full_segments = len(rel_segment_time) // config.hr_est_samples
+        hr_time = rel_segment_time[:n_full_segments * config.hr_est_samples : config.hr_est_samples] # relative time in seconds after the start of the segment
+
+        # Concatenate HR estimations and times
+        v_hr_rel = np.concatenate((v_hr_rel, hr_est))
+        t_hr_rel = np.concatenate((t_hr_rel, hr_time))
+
+    df_hr = pd.DataFrame({"rel_time": t_hr_rel, "heart_rate": v_hr_rel})
+
+    return df_hr