diff --git a/docs/matlab_scripts/ppg/1-2-3_signal_quality.m b/docs/matlab_scripts/ppg/1-2-3_signal_quality.m index eb64505f..193b0ffc 100644 --- a/docs/matlab_scripts/ppg/1-2-3_signal_quality.m +++ b/docs/matlab_scripts/ppg/1-2-3_signal_quality.m @@ -107,7 +107,7 @@ warning('Sample is of insufficient length!') else [v_ppg_pre, tr_ppg_pre] = preprocessing_ppg(tr_ppg, v_ppg, fs_ppg); % 2 matrices, one for ppg (Nx1) and for time (Nx1) - [v_imu_pre, tr_imu_pre] = preprocessing_imu(tr_imu, v_acc_scaled, fs_imu); % 2 matrices, one for accel imu (Nx3) and for time (Nx1) + [v_acc_pre, tr_acc_pre] = preprocessing_imu(tr_imu, v_acc_scaled, fs_imu); % 2 matrices, one for accel imu (Nx3) and for time (Nx1) end %% 5a. Write TSDF PPG preprocessing output diff --git a/docs/matlab_scripts/ppg/1-4_HR_estimation.m b/docs/matlab_scripts/ppg/1-4_HR_estimation.m index 628f2889..e4353a13 100644 --- a/docs/matlab_scripts/ppg/1-4_HR_estimation.m +++ b/docs/matlab_scripts/ppg/1-4_HR_estimation.m @@ -16,7 +16,7 @@ %% Initalization % Setting data paths + extracting metafilenames already -clear all; close all; clc +clearvars; close all; clc addpath(genpath('..\..\..\paradigma-toolbox')) % Add git repository to the path addpath(genpath("..\..\..\\tsdf4matlab")) % Add wrapper to the path warning('off','all') % Turn off warnings to improve speed in spwvd especially @@ -25,12 +25,12 @@ unix_ticks_ms = 1000.0; fs_ppg = 30; % Establish the sampling rate desired for resampling PPG --> now chosen to be fixed on 30 Hz -raw_data_root = '..\..\tests\data\1.sensor_data\'; +raw_data_root = '..\..\..\tests\data\1.sensor_data\'; ppp_data_path_ppg = [raw_data_root 'PPG\']; meta_ppg = tsdf_scan_meta(ppp_data_path_ppg); % tsdf_scan_meta returns metafile struct containing information of all metafiles from all patients in tsdf_dirlist n_files_ppg = length(meta_ppg); -sqa_data_path = '..\..\tests\data\4.predictions\ppg'; % Set the path to the SQA data +sqa_data_path = '..\..\..\tests\data\4.predictions\ppg'; % Set the path to the SQA data sqa_output_list = dir(fullfile(sqa_data_path, '*_meta.json')); % seperate for the SQA output files meta_path_sqa = fullfile(sqa_output_list.folder, sqa_output_list.name); % Get the name of the SQA output file @@ -106,7 +106,7 @@ t_iso_ppg = metadata_list_ppg{time_idx_ppg}.start_iso8601; - datetime_ppg = datetime(t_iso_ppg, 'InputFormat', 'dd-MMM-yyyy HH:mm:ss z', 'TimeZone', 'UTC'); + datetime_ppg = datetime(t_iso_ppg, 'InputFormat', 'yyyy-MM-dd''T''HH:mm:ssZ', 'TimeZone', 'UTC'); ts_ppg = posixtime(datetime_ppg) * unix_ticks_ms; t_ppg = cumsum(double(data_list_ppg{time_idx_ppg})) + ts_ppg; @@ -218,4 +218,4 @@ meta_class{2} = metafile_values_hr; mat_metadata_file_name = "hr_est_meta.json"; -save_tsdf_data(meta_class, data_hr_est, location, mat_metadata_file_name); \ No newline at end of file +% save_tsdf_data(meta_class, data_hr_est, location, mat_metadata_file_name); \ No newline at end of file diff --git a/docs/notebooks/ppg/ppg_analysis.ipynb b/docs/notebooks/ppg/ppg_analysis.ipynb index 4176d1f8..82516b53 100644 --- a/docs/notebooks/ppg/ppg_analysis.ipynb +++ b/docs/notebooks/ppg/ppg_analysis.ipynb @@ -9,23 +9,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# Automatically reload modules\n", - "# %load_ext autoreload\n", - "# %autoreload 2\n", - "\n", "import os\n", "from paradigma.config import PPGConfig, IMUConfig, SignalQualityFeatureExtractionConfig, SignalQualityClassificationConfig, HeartRateExtractionConfig\n", "from paradigma.ppg_preprocessing import scan_and_sync_segments, preprocess_ppg_data\n", - "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features, signal_quality_classification" + "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features, signal_quality_classification, estimate_heart_rate" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "tags": [ "parameters" @@ -48,22 +44,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of the original data: (64775, 2) (72947, 4)\n", + "Shape of the overlapping segments: (64775, 2) (64361, 4)\n" + ] + } + ], "source": [ "ppg_config = PPGConfig()\n", "imu_config = IMUConfig()\n", "metadatas_ppg, metadatas_imu = scan_and_sync_segments(os.path.join(path_to_sensor_data, 'ppg'),\n", " os.path.join(path_to_sensor_data, 'imu'))\n", - "df_ppg_proc, df_imu_proc=preprocess_ppg_data(metadatas_ppg[0], metadatas_imu[0],\n", + "df_ppg_proc, df_acc_proc=preprocess_ppg_data(metadatas_ppg[0], metadatas_imu[0],\n", " path_to_preprocessed_data,\n", " ppg_config, imu_config)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -73,18 +78,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "config = SignalQualityClassificationConfig()\n", "df = signal_quality_classification(df_windowed, config, path_to_classifier)" ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "config = HeartRateExtractionConfig()\n", + "df_hr = estimate_heart_rate(df, df_ppg_proc, config)" + ] } ], "metadata": { "kernelspec": { - "display_name": "paradigma-Fn6RLG4_-py3.11", + "display_name": "paradigma-8ps1bg0Z-py3.12", "language": "python", "name": "python3" }, @@ -98,7 +113,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.12.6" } }, "nbformat": 4, diff --git a/src/paradigma/config.py b/src/paradigma/config.py index 3317e596..e9e2638c 100644 --- a/src/paradigma/config.py +++ b/src/paradigma/config.py @@ -146,8 +146,6 @@ def __init__(self) -> None: self.window_length_s: int = 6 self.window_step_size_s: int = 1 - self.segment_gap_s = 1.5 - # Domain feature extraction configs class GaitFeatureExtractionConfig(GaitBaseConfig): @@ -375,9 +373,8 @@ def __init__(self, min_window_length: float = 10) -> None: super().__init__() # Parameters for HR analysis - self.window_length_s: int = 6 - self.window_step_size_s: int = 1 - self.min_hr_samples = min_window_length * self.sampling_frequency + self.window_overlap_s = self.window_length_s - self.window_step_size_s + self.min_hr_samples = int(min_window_length * self.sampling_frequency) self.threshold_sqa = 0.5 # Heart rate estimation parameters @@ -389,12 +386,18 @@ def __init__(self, min_window_length: float = 10) -> None: self.kern_type = 'sep' win_type_doppler = 'hamm' win_type_lag = 'hamm' - win_length_doppler = 1 - win_length_lag = 8 + win_length_doppler = 8 + win_length_lag = 1 doppler_samples = self.sampling_frequency * win_length_doppler lag_samples = win_length_lag * self.sampling_frequency - self.kern_params = [ - {'doppler_samples': doppler_samples, 'win_type_doppler': win_type_doppler}, - {'lag_samples': lag_samples, 'win_type_lag': win_type_lag} - ] + self.kern_params = { + 'doppler': { + 'win_length': doppler_samples, + 'win_type': win_type_doppler, + }, + 'lag': { + 'win_length': lag_samples, + 'win_type': win_type_lag, + } + } \ No newline at end of file diff --git a/src/paradigma/heart_rate/heart_rate_analysis.py b/src/paradigma/heart_rate/heart_rate_analysis.py index 8d58c2ec..52f9f1d4 100644 --- a/src/paradigma/heart_rate/heart_rate_analysis.py +++ b/src/paradigma/heart_rate/heart_rate_analysis.py @@ -3,16 +3,18 @@ import pandas as pd import os +import numpy as np import tsdf import tsdf.constants -from paradigma.config import SignalQualityFeatureExtractionConfig, SignalQualityClassificationConfig +from paradigma.heart_rate.heart_rate_analysis_config import SignalQualityFeatureExtractionConfig, SignalQualityClassificationConfig, HeartRateExtractionConfig from paradigma.util import read_metadata +from paradigma.config import SignalQualityFeatureExtractionConfig, SignalQualityClassificationConfig, HeartRateExtractionConfig from paradigma.segmenting import tabulate_windows_legacy from paradigma.heart_rate.feature_extraction import extract_temporal_domain_features, extract_spectral_domain_features +from paradigma.heart_rate.heart_rate_estimation import assign_sqa_label, extract_hr_segments, extract_hr_from_segment from paradigma.constants import DataColumns - def extract_signal_quality_features(df: pd.DataFrame, config: SignalQualityFeatureExtractionConfig) -> pd.DataFrame: # Group sequences of timestamps into windows df_windowed = tabulate_windows_legacy(config, df) @@ -31,7 +33,7 @@ def extract_signal_quality_features_io(input_path: Union[str, Path], output_path metadata_time, metadata_values = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename) df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_values], tsdf.constants.ConcatenationType.columns) - # Extract gait features + # Extract signal quality features df_windowed = extract_signal_quality_features(df, config) return df_windowed @@ -74,7 +76,7 @@ def signal_quality_classification(df: pd.DataFrame, config: SignalQualityClassif # Make predictions for PPG signal quality assessment df[DataColumns.PRED_SQA_PROBA] = lr_clf.predict_proba(X_normalized)[:, 0] df.drop(columns = lr_clf.feature_names_in_, inplace=True) # Drop the features used for classification since they are no longer needed - + return df @@ -85,4 +87,60 @@ def signal_quality_classification_io(input_path: Union[str, Path], output_path: metadata_time, metadata_values = read_metadata(input_path, config.meta_filename, config.time_filename, config.values_filename) df = tsdf.load_dataframe_from_binaries([metadata_time, metadata_values], tsdf.constants.ConcatenationType.columns) - df = signal_quality_classification(df, config, path_to_classifier_input) \ No newline at end of file + df = signal_quality_classification(df, config, path_to_classifier_input) + + +def estimate_heart_rate(df: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, config:HeartRateExtractionConfig) -> pd.DataFrame: + """ + Estimate the heart rate from the PPG signal using the time-frequency domain method. + + Parameters + ---------- + df : pd.DataFrame + The DataFrame containing the PPG signal. + df_ppg_preprocessed : pd.DataFrame + The DataFrame containing the preprocessed PPG signal. + config : HeartRateExtractionConfig + The configuration for the heart rate estimation. + + Returns + ------- + pd.DataFrame + The DataFrame containing the heart rate estimations. + """ + + # Assign window-level probabilities to individual samples + ppg_post_prob = df.loc[:, DataColumns.PRED_SQA_PROBA].to_numpy() + #acc_label = df.loc[:, DataColumns.ACCELEROMETER_LABEL].to_numpy() # Adjust later in data columns to get the correct label, should be first intergrated in feature extraction and classification + + sqa_label = assign_sqa_label(ppg_post_prob, config) + v_start_idx, v_end_idx = extract_hr_segments(sqa_label, config.min_hr_samples) + + fs = config.sampling_frequency + + v_hr_rel = np.array([]) + t_hr_rel = np.array([]) + + for start_idx, end_idx in zip(v_start_idx, v_end_idx): + # Skip if the epoch cannot be extended by 2s on both sides + if start_idx < 2 * fs or end_idx > len(df_ppg_preprocessed) - 2 * fs: + continue + + # Extract the extended PPG segment for HR estimation + extended_ppg_segment = df_ppg_preprocessed[DataColumns.PPG][start_idx - 2 * fs : end_idx + 2 * fs] + + # Perform HR estimation + hr_est = extract_hr_from_segment(extended_ppg_segment, config.tfd_length, fs, config.kern_type, config.kern_params) + + # Generate HR estimation time array + rel_segment_time = df_ppg_preprocessed.time[start_idx:end_idx].values + n_full_segments = len(rel_segment_time) // config.hr_est_samples + hr_time = rel_segment_time[:n_full_segments * config.hr_est_samples : config.hr_est_samples] # relative time in seconds after the start of the segment + + # Concatenate HR estimations and times + v_hr_rel = np.concatenate((v_hr_rel, hr_est)) + t_hr_rel = np.concatenate((t_hr_rel, hr_time)) + + df_hr = pd.DataFrame({"rel_time": t_hr_rel, "heart_rate": v_hr_rel}) + + return df_hr diff --git a/src/paradigma/heart_rate/heart_rate_analysis_config.py b/src/paradigma/heart_rate/heart_rate_analysis_config.py new file mode 100644 index 00000000..1527b480 --- /dev/null +++ b/src/paradigma/heart_rate/heart_rate_analysis_config.py @@ -0,0 +1,177 @@ +from typing import Dict, List +from paradigma.constants import DataColumns + +class IMUconfig: + """ + Base class for Gait feature extraction and Gait detection configurations, based on the IMU data (accelerometer, gyroscope). + """ + def __init__(self): + + self.time_colname = DataColumns.TIME + self.segment_nr_colname = DataColumns.SEGMENT_NR + + self.accelerometer_cols: List[str] = [ + DataColumns.ACCELEROMETER_X, + DataColumns.ACCELEROMETER_Y, + DataColumns.ACCELEROMETER_Z, + ] + self.gyroscope_cols: List[str] = [ + DataColumns.GYROSCOPE_X, + DataColumns.GYROSCOPE_Y, + DataColumns.GYROSCOPE_Z, + ] + + self.gravity_cols: List[str] = [ + DataColumns.GRAV_ACCELEROMETER_X, + DataColumns.GRAV_ACCELEROMETER_Y, + DataColumns.GRAV_ACCELEROMETER_Z, + ] + + self.sampling_frequency = 100 + + def set_sensor(self, sensor: str) -> None: + """Sets the sensor and derived filenames""" + self.sensor: str = sensor + self.set_filenames(sensor) + + def set_filenames(self, prefix: str) -> None: + """Sets the filenames based on the prefix, + + Parameters + ---------- + prefix : str + The prefix for the filenames. + """ + self.meta_filename = f"{prefix}_meta.json" + self.time_filename = f"{prefix}_time.bin" + self.values_filename = f"{prefix}_samples.bin" + + def set_filenames_values(self, prefix: str) -> None: + """Sets the filenames based on the prefix, + + Parameters + ---------- + prefix : str + The prefix for the filenames. + """ + self.meta_filename = f"{prefix}_meta.json" + self.time_filename = f"{prefix}_time.bin" + self.values_filename = f"{prefix}_values.bin" + +class PPGconfig: + """ + Base class for signal quality feature extraction and heart rate estimation configurations, based on the PPG data. + """ + def __init__(self): + + self.time_colname = DataColumns.TIME + self.segment_nr_colname = DataColumns.SEGMENT_NR + self.ppg_colname = DataColumns.PPG + self.sampling_frequency = 30 + self.unix_ticks_ms = 1000 + + def set_sensor(self, sensor: str) -> None: + """Sets the sensor and derived filenames""" + self.sensor: str = sensor + self.set_filenames(sensor) + + def set_filenames(self, prefix: str) -> None: + """Sets the filenames based on the prefix, + + Parameters + ---------- + prefix : str + The prefix for the filenames. + """ + self.meta_filename = f"{prefix}_meta.json" + self.time_filename = f"{prefix}_time.bin" + self.values_filename = f"{prefix}_samples.bin" + + def set_filenames_values(self, prefix: str) -> None: + """Sets the filenames based on the prefix, + + Parameters + ---------- + prefix : str + The prefix for the filenames. + """ + self.meta_filename = f"{prefix}_meta.json" + self.time_filename = f"{prefix}_time.bin" + self.values_filename = f"{prefix}_values.bin" + +class SignalQualityFeatureExtractionConfig(PPGconfig): + + def __init__(self) -> None: + super().__init__() + self.set_sensor("PPG") + + self.window_length_s: int = 6 + self.window_step_size_s: int = 1 + self.segment_gap_s = 1.5 + self.window_length_welch = 3*self.sampling_frequency + self.overlap_welch_window = self.window_length_welch // 2 + + self.freq_band_physio = [0.75, 3] # Hz + self.bandwidth = 0.2 # Hz + + config_imu = IMUconfig() + self.sampling_frequency_imu = config_imu.sampling_frequency + + self.single_value_cols: List[str] = None + self.list_value_cols: List[str] = [ + self.ppg_colname + ] + + def set_sampling_frequency(self, sampling_frequency: int) -> None: + """Sets the sampling frequency and derived variables""" + self.sampling_frequency: int = sampling_frequency + self.spectrum_low_frequency: int = 0 # Hz + self.spectrum_high_frequency: int = int(self.sampling_frequency / 2) # Hz + self.filter_length: int = self.spectrum_high_frequency - 1 + +class SignalQualityClassificationConfig(PPGconfig): + + def __init__(self) -> None: + super().__init__() + self.classifier_file_name = "ppg_quality_classifier.pkl" + self.thresholds_file_name = "ppg_acc_quality_threshold.txt" + + self.set_filenames_values("ppg") + +class HeartRateExtractionConfig(PPGconfig): + + def __init__(self) -> None: + super().__init__() + # Parameters for HR analysis + self.sqa_window_length_s: int = 6 + self.sqa_window_overlap_s: int = 5 + self.sqa_window_step_size_s: int = 1 + min_window_length = 10 + self.min_hr_samples = min_window_length * self.sampling_frequency + self.threshold_sqa = 0.5 + + # Heart rate estimation parameters + hr_est_length = 2 + self.hr_est_samples = hr_est_length * self.sampling_frequency + + # Time-frequency distribution parameters + self.tfd_length = 10 + self.kern_type = 'sep' + win_type_doppler = 'hamm' + win_type_lag = 'hamm' + win_length_doppler = 8 + win_length_lag = 1 + doppler_samples = self.sampling_frequency * win_length_doppler + lag_samples = win_length_lag * self.sampling_frequency + + self.kern_params = { + 'doppler': { + 'win_length': doppler_samples, + 'win_type': win_type_doppler, + }, + 'lag': { + 'win_length': lag_samples, + 'win_type': win_type_lag, + } + } + \ No newline at end of file diff --git a/src/paradigma/heart_rate/heart_rate_estimation.py b/src/paradigma/heart_rate/heart_rate_estimation.py new file mode 100644 index 00000000..7a3b25dc --- /dev/null +++ b/src/paradigma/heart_rate/heart_rate_estimation.py @@ -0,0 +1,185 @@ +import numpy as np +from typing import Tuple + +from paradigma.config import HeartRateExtractionConfig +from paradigma.heart_rate.tfd import nonsep_gdtfd + +def assign_sqa_label(ppg_prob, config: HeartRateExtractionConfig, acc_label=None) -> np.ndarray: + """ + Assigns a signal quality label to every individual data point. + + Parameters: + - ppg_prob: numpy array of probabilities for PPG + - acc_label: numpy array of labels for accelerometer (optional) + - fs: Sampling frequency + + Returns: + - sqa_label: numpy array of signal quality assessment labels + """ + # Default _label to ones if not provided + if acc_label is None: + acc_label = np.ones(len(ppg_prob)) + + + # Number of samples in an epoch + fs = config.sampling_frequency + samples_per_epoch = config.window_length_s * fs + + # Calculate number of samples to shift for each epoch + samples_shift = config.window_step_size_s * fs + n_samples = int((len(ppg_prob) + config.window_overlap_s) * fs) + data_prob = np.zeros(n_samples) + data_label_imu = np.zeros(n_samples, dtype=np.int8) + + for i in range(n_samples): + # Start and end indices for current epoch + start_idx = int(np.floor((i - (samples_per_epoch - samples_shift)) / fs)) + end_idx = int(np.floor(i / fs)) + + # Correct for first and last epochs + if start_idx < 0: + start_idx = 0 + if end_idx > len(ppg_prob): + end_idx = len(ppg_prob) + + # Extract probabilities and labels for the current epoch + prob = ppg_prob[start_idx:end_idx+1] + label_imu = acc_label[start_idx:end_idx+1] + + # Calculate mean probability and majority voting for labels + data_prob[i] = np.mean(prob) + data_label_imu[i] = int(np.mean(label_imu) >= 0.5) + + # Set probability to zero if majority IMU label is 0 + data_prob[data_label_imu == 0] = 0 + sqa_label = (data_prob > config.threshold_sqa).astype(np.int8) + + return sqa_label + +def extract_hr_segments(sqa_label: np.ndarray, min_hr_samples: int) -> Tuple[np.ndarray, np.ndarray]: + """Extracts heart rate segments based on the SQA label. + + Parameters + ---------- + sqa_label : np.ndarray + The signal quality assessment label. + min_hr_samples : int + The minimum number of samples required for a heart rate segment. + + Returns + ------- + Tuple[v_start_idx_long, v_end_idx_long] + The start and end indices of the heart rate segments. + """ + # Find the start and end indices of the heart rate segments + v_start_idx = np.where(np.diff(sqa_label) == 1)[0] + 1 + v_end_idx = np.where(np.diff(sqa_label) == -1)[0] + 1 + + # Check if the first segment is a start or end + if sqa_label[0] == 1: + v_start_idx = np.insert(v_start_idx, 0, 0) + if sqa_label[-1] == 1: + v_end_idx = np.append(v_end_idx, len(sqa_label)) + + # Check if the segments are long enough + v_start_idx_long = v_start_idx[(v_end_idx - v_start_idx) >= min_hr_samples] + v_end_idx_long = v_end_idx[(v_end_idx - v_start_idx) >= min_hr_samples] + + return v_start_idx_long, v_end_idx_long + +def extract_hr_from_segment(ppg: np.ndarray, tfd_length: int, fs: int, kern_type: str, kern_params: dict) -> np.ndarray: + """Extracts heart rate from the time-frequency distribution of the PPG signal. + + Parameters + ---------- + ppg : np.ndarray + The preprocessed PPG segment with 2 seconds of padding on both sides to reduce boundary effects. + tfd_length : int + Length of each segment (in seconds) to calculate the time-frequency distribution. + fs : int + The sampling frequency of the PPG signal. + MA : int + The moving average window length. + kern_type : str + Type of TFD kernel to use (e.g., 'wvd' for Wigner-Ville distribution). + kern_params : dict + Parameters for the specified kernel. Not required for 'wvd', but relevant for other + kernels like 'spwvd' or 'swvd'. Default is None. + + Returns + ------- + np.ndarray + The estimated heart rate. + """ + + # Constants to handle boundary effects + edge_padding = 4 # Additional 4 seconds (2 seconds on both sides) + extended_epoch_length = tfd_length + edge_padding + + # Calculate the actual segment length without padding + original_segment_length = (len(ppg) - edge_padding * fs) / fs + + # Determine the number of tfd_length-second segments + if original_segment_length > extended_epoch_length: + num_segments = int(original_segment_length // tfd_length) + else: + num_segments = 1 # For shorter segments (< 30s) + + # Split the PPG signal into segments + ppg_segments = [] + for i in range(num_segments): + if i != num_segments - 1: # For all segments except the last + start_idx = int(i * tfd_length * fs) + end_idx = int((i + 1) * tfd_length * fs + edge_padding * fs) + else: # For the last segment + start_idx = int(i * tfd_length * fs) + end_idx = len(ppg) + ppg_segments.append(ppg[start_idx:end_idx]) + + hr_est_from_ppg = np.array([]) + for segment in ppg_segments: + # Calculate the time-frequency distribution + hr_tfd = extract_hr_with_tfd(segment, fs, kern_type, kern_params) + hr_est_from_ppg = np.concatenate((hr_est_from_ppg, hr_tfd)) # Append the HR estimates + + return hr_est_from_ppg + +def extract_hr_with_tfd(ppg: np.ndarray, fs: int, kern_type: str, kern_params: dict) -> np.ndarray: + """ + Estimate heart rate (HR) from a PPG segment using a TFD method with optional + moving average filtering. + + Parameters + ---------- + ppg_segment : array-like + Segment of the PPG signal to process. + fs : int + Sampling frequency in Hz. + kern_type : str + Type of TFD kernel to use. + kern_params : dict + Parameters for the specified kernel. + + Returns + ------- + hr_smooth_tfd : list + Estimated HR values (in beats per minute) for each 2-second segment of the PPG signal. + """ + # Generate the TFD matrix using the specified kernel + tfd = nonsep_gdtfd(ppg, kern_type, kern_params) # Returns an NxN matrix + + # Get time and frequency axes for the TFD + num_time_samples, num_freq_bins = tfd.shape + time_axis = np.arange(num_time_samples) / fs + freq_axis = np.linspace(0, 0.5, num_freq_bins) * fs + + # Estimate HR by identifying the max frequency in the TFD + max_freq_indices = np.argmax(tfd, axis=0) + + hr_smooth_tfd = np.array([]) + for i in range(2, int(len(ppg) / fs) - 4, 2): # Skip the first and last 2 seconds + relevant_indices = (time_axis >= i) & (time_axis < i + 2) + avg_frequency = np.mean(freq_axis[max_freq_indices[relevant_indices]]) + hr_smooth_tfd = np.concatenate((hr_smooth_tfd, [60 * avg_frequency])) # Convert frequency to BPM + + return hr_smooth_tfd diff --git a/src/paradigma/heart_rate/tfd.py b/src/paradigma/heart_rate/tfd.py new file mode 100644 index 00000000..427ce21b --- /dev/null +++ b/src/paradigma/heart_rate/tfd.py @@ -0,0 +1,553 @@ +import numpy as np +from scipy import signal + +""" + +This module contains the implementation of the Generalized Time-Frequency Distribution (TFD) computation using non-separable kernels. +This is a Python implementation of the MATLAB code provided by John O Toole in the following repository: https://github.com/otoolej/memeff_TFDs + +The following functions are implemented for the computation of the TFD: + - nonsep_gdtfd: Computes the generalized time-frequency distribution using a non-separable kernel. + - get_analytic_signal: Generates the analytic signal of the input signal. + - gen_analytic: Generates the analytic signal by zero-padding and performing FFT. + - gen_time_lag: Generates the time-lag distribution of the analytic signal. + - multiply_kernel_signal: Multiplies the TFD by the Doppler-lag kernel. + - gen_doppler_lag_kern: Generates the Doppler-lag kernel based on kernel type and parameters. + - get_kern: Gets the kernel based on the provided kernel type. + - get_window: General function to calculate a window function. + - get_win: Helper function to create the specified window type. + - shift_window: Shifts the window so that positive indices appear first. + - pad_window: Zero-pads the window to a specified length. + - compute_tfd: Finalizes the time-frequency distribution computation. + + +""" + +def nonsep_gdtfd(x, kern_type=None, kern_params=None): + """ + Computes the generalized time-frequency distribution (TFD) using a non-separable kernel. + + Parameters: + ----------- + x : ndarray + Input signal to be analyzed. + kern_type : str, optional + Type of kernel to be used for TFD computation. Default is None. + Currently supported kernels are: + wvd - kernel for Wigner-Ville distribution + swvd - kernel for smoothed Wigner-Ville distribution + (lag-independent kernel) + pwvd - kernel for pseudo Wigner-Ville distribution + (Doppler-independent kernel) + sep - kernel for separable kernel (combintation of SWVD and PWVD) + +kern_params : dict, optional + Dictionary of parameters specific to the kernel type. Default is None. + The structure of the dictionary depends on the selected kernel type: + - wvd: + An empty dictionary, as no additional parameters are required. + - swvd: + Dictionary with the following keys: + 'win_length': Length of the smoothing window. + 'win_type': Type of window function (e.g., 'hamm', 'hann'). + 'win_param' (optional): Additional parameters for the window. + 'win_param2' (optional): 0 for time-domain window or 1 for Doppler-domain window. + + Example: + ```python + kern_params = { + 'win_length': 11, + 'win_type': 'hamm', + 'win_param': 0, + 'domain': 1 + } + ``` + - pwvd: + Dictionary with the following keys: + 'win_length': Length of the smoothing window. + 'win_type': Type of window function (e.g., 'cosh'). + 'win_param' (optional): Additional parameters for the window. + 'win_param2' (optional): 0 for time-domain window or 1 for Doppler-domain window. + Example: + ```python + kern_params = { + 'win_length': 200, + 'win_type': 'cosh', + 'win_param': 0.1 + } + ``` + - sep: + Dictionary containing two nested dictionaries, one for the Doppler window and one for the lag window: + 'doppler': { + 'win_length': Length of the Doppler-domain window. + 'win_type': Type of Doppler-domain window function. + 'win_param' (optional): Additional parameters for the Doppler window. + 'win_param2' (optional): 0 for time-domain window or 1 for Doppler-domain window. + } + 'lag': { + 'win_length': Length of the lag-domain window. + 'win_type': Type of lag-domain window function. + 'win_param' (optional): Additional parameters for the lag window. + 'win_param2' (optional): 0 for time-domain window or 1 for Doppler-domain window. + } + Example: + ```python + kern_params = { + 'doppler': { + 'win_length': doppler_samples, + 'win_type': win_type_doppler, + }, + 'lag': { + 'win_length': lag_samples, + 'win_type': win_type_lag, + } + } + ``` + Returns: + -------- + tfd : ndarray + The computed time-frequency distribution. + """ + z = get_analytic_signal(x) + N = len(z) // 2 # Since z is a signal of length 2N + Nh = int(np.ceil(N / 2)) + + # Generate the time-lag distribution of the analytic signal + tfd = gen_time_lag(z) + + # Multiply the TFD by the Doppler-lag kernel + tfd = multiply_kernel_signal(tfd, kern_type, kern_params, N, Nh) + + # Finalize the TFD computation + tfd = compute_tfd(N, Nh, tfd) + + return tfd + +def get_analytic_signal(x): + """ + Generates the signals analytic version. + + Parameters: + ----------- + z : ndarray + Input real-valued signal. + + Returns: + -------- + z : ndarray + Analytic signal with zero-padded imaginary part. + """ + N = len(x) + + # Ensure the signal length is even by trimming one sample if odd, since the gen_time_lag function requires an even-length signal + if N % 2 != 0: + x = x[:-1] + + # Make the analytical signal of the real-valued signal z (preprocessed PPG signal) + # doesn't work for input of complex numbers + z = gen_analytic(x) + + return z + +def gen_analytic(x): + """ + Generates an analytic signal by zero-padding and performing FFT. + + Parameters: + ----------- + x : ndarray + Input real-valued signal. + + Returns: + -------- + z : ndarray + Analytic signal in the time domain with zeroed second half. + """ + N = len(x) + + # Zero-pad the signal to double its length + x = np.concatenate((np.real(x), np.zeros(N))) + x_fft = np.fft.fft(x) + + # Generate the analytic signal in the frequency domain + H = [1] + list(np.repeat(2, N-1)) + [1] + list(np.repeat(0, N-1)) + z_cb = np.fft.ifft(x_fft * H) + + # Force the second half of the time-domain signal to zero + z = np.concatenate((z_cb[:N], np.zeros(N))) + + return z + +def gen_time_lag(z): + """ + Generate the time-lag distribution of the analytic signal z. + + Parameters: + ----------- + z : ndarray + Analytic signal of the input signal x. + + Returns: + -------- + tfd : ndarray + Time-lag distribution of the analytic signal z. + + """ + N = len(z) // 2 # Assuming z is a signal of length 2N + Nh = int(np.ceil(N / 2)) + + # Initialize the time-frequency distribution (TFD) matrix + tfd = np.zeros((N, N), dtype=complex) + + m = np.arange(Nh) + + # Loop over time indices + for n in range(N): + inp = np.mod(n + m, 2 * N) + inn = np.mod(n - m, 2 * N) + + # Extract the time slice from the analytic signal + K_time_slice = z[inp] * np.conj(z[inn]) + + # Store real and imaginary parts + tfd[n, :Nh] = np.real(K_time_slice) + tfd[n, Nh:] = np.imag(K_time_slice) + + return tfd + +def multiply_kernel_signal(tfd, kern_type, kern_params, N, Nh): + """ + Multiplies the TFD by the Doppler-lag kernel. + + Parameters: + ----------- + tfd : ndarray + Time-frequency distribution. + kern_type : str + Kernel type to be applied. + kern_params : dict + Kernel parameters specific to the kernel type. + N : int + Length of the signal. + Nh : int + Half length of the signal. + + Returns: + -------- + tfd : ndarray + Modified TFD after kernel multiplication. + """ + # Loop over lag indices + for m in range(Nh): + # Generate the Doppler-lag kernel for each lag index + g_lag_slice = gen_doppler_lag_kern(kern_type, kern_params, N, m) + + # Extract and transform the TFD slice for this lag + tfd_slice = np.fft.fft(tfd[:, m]) + 1j * np.fft.fft(tfd[:, Nh + m]) + + # Multiply by the kernel and perform inverse FFT + R_lag_slice = np.fft.ifft(tfd_slice * g_lag_slice) + + # Store real and imaginary parts back into the TFD + tfd[:, m] = np.real(R_lag_slice) + tfd[:, Nh + m] = np.imag(R_lag_slice) + + return tfd + +def gen_doppler_lag_kern(kern_type, kern_params, N, lag_index): + """ + Generate the Doppler-lag kernel based on kernel type and parameters. + + Parameters: + ----------- + kern_type : str + Type of kernel (e.g., 'wvd', 'swvd', 'pwvd', etc.). + kern_params : dict + Parameters for the kernel. + N : int + Signal length. + lag_index : int + Current lag index. + + Returns: + -------- + g : ndarray + Doppler-lag kernel for the given lag. + """ + g = np.zeros(N, dtype=complex) # Initialize the kernel + + # Get kernel based on the type + g = get_kern(g, lag_index, kern_type, kern_params, N) + + return np.real(g) # All kernels are real valued + + +def get_kern(g, lag_index, kern_type, kern_params, N): + """ + Get the kernel based on the provided kernel type. + + Parameters: + ----------- + g : ndarray + Kernel to be filled. + lag_index : int + Lag index. + kern_type : str + Type of kernel ('wvd', 'swvd', 'pwvd', 'sep'). + kern_params : dict + Parameters for the kernel. + N : int + Signal length. + + Returns: + -------- + g : ndarray + Kernel function at the current lag. + """ + l = len(kern_params) + + if kern_type == 'wvd': + g[:] = 1 # WVD kernel is the equal to 1 for all lags + + elif kern_type == 'swvd': + key = 'lag' # Key for the lag-independent kernel + # Smoothed Wigner-Ville Distribution (Lag Independent kernel) + if l < 2: + raise ValueError("Need at least two window parameters for SWVD") + win_length = kern_params['win_length'] + win_type = kern_params['win_type'] + win_param = kern_params['win_param'] if l >= 3 else 0 + win_param2 = kern_params['win_param2'] if l >= 4 else 1 + + G1 = get_window(win_length, win_type, win_param) + G1 = pad_window(G1, N) + + # Define window in the time domain or Doppler domain + if win_param2 == 0: + G1 = np.fft.fft(G1) + G1 /= G1[0] + + g[:] = G1 # Assign the window to the kernel + + elif kern_type == 'pwvd': + key = 'doppler' + # Pseudo-Wigner-Ville Distribution (Doppler Independent kernel) + if l < 2: + raise ValueError("Need at least two window parameters for PWVD") + win_length = kern_params['win_length'] + win_type = kern_params['win_type'] + win_param = kern_params['win_param'] if l >= 3 else 0 + win_param2 = kern_params['win_param2'] if l >= 4 else 0 + + G2 = get_window(win_length, win_type, win_param) # Generate the window, same per lag iteration + G2 = pad_window(G2, N) # Zero-pad the window to the length of the signal + G2 = G2[lag_index] # Extract the lag_index-th element of the window --> can this be done more efficiently? Since we only need one element of the window and the padding is similar for every iteration + + g[:] = G2 # Assign the window to the kernel + + elif kern_type == 'sep': + # Separable Kernel + g1 = np.copy(g) # Create a new array for g1 + g2 = np.copy(g) # Create a new array for g2 + + # Call recursively to obtain g1 and g2 kernels (no in-place modification of g) + g1 = get_kern(g1, lag_index, 'swvd', kern_params['lag'], N) # Generate the first kernel + g2 = get_kern(g2, lag_index, 'pwvd', kern_params['doppler'], N) # Generate the second kernel + g = g1 * g2 # Multiply the two kernels to obtain the separable kernel + + else: + raise ValueError(f"Unknown kernel type: {kern_type}") + + return g + +def get_window(win_length, win_type, win_param=None, dft_window=False, Npad=0): + """ + General function to calculate a window function. + + Parameters: + ----------- + win_length : int + Length of the window. + win_type : str + Type of window. Options are: + {'delta', 'rect', 'hamm'|'hamming', 'hann'|'hanning', 'gauss', 'cosh'}. + win_param : float, optional + Window parameter (e.g., alpha for Gaussian window). Default is None. + dft_window : bool, optional + If True, returns the DFT of the window. Default is False. + Npad : int, optional + If greater than 0, zero-pads the window to length Npad. Default is 0. + + Returns: + -------- + win : ndarray + The calculated window (or its DFT if dft_window is True). + """ + # Handle optional arguments + if win_param is None: + win_param = [] + + # Get the window + win = get_win(win_length, win_type, win_param, dft_window) + + # Shift the window so that positive indices are first + win = shift_window(win) + + # Zero-pad the window to length Npad if necessary + if Npad > 0: + win = pad_window(win, Npad) + + return win + +def get_win(win_length, win_type, win_param=None, dft_window=False): + """ + Helper function to create the specified window type. + + Parameters: + ----------- + win_length : int + Length of the window. + win_type : str + Type of window. + win_param : float, optional + Additional parameter for certain window types (e.g., Gaussian alpha). Default is None. + dft_window : bool, optional + If True, returns the DFT of the window. Default is False. + + Returns: + -------- + win : ndarray + The created window (or its DFT if dft_window is True). + """ + if win_type == 'delta': + win = np.zeros(win_length) + win[win_length // 2] = 1 + elif win_type == 'rect': + win = np.ones(win_length) + elif win_type in ['hamm', 'hamming']: + win = signal.windows.hamming(win_length) + elif win_type in ['hann', 'hanning']: + win = signal.windows.hann(win_length) + elif win_type == 'gauss': + win = signal.windows.gaussian(win_length, std=win_param if win_param else 0.4) + elif win_type == 'cosh': + win_hlf = win_length // 2 + if not win_param: + win_param = 0.01 + win = np.array([np.cosh(m) ** (-2 * win_param) for m in range(-win_hlf, win_hlf+1)]) + win = np.fft.fftshift(win) + else: + raise ValueError(f"Unknown window type {win_type}") + + # If dft_window is True, return the DFT of the window + if dft_window: + win = np.fft.fft(np.roll(win, win_length // 2)) + win = np.roll(win, -win_length // 2) + + return win + +def shift_window(w): + """ + Shift the window so that positive indices appear first. + + Parameters: + ----------- + w : ndarray + Window to be shifted. + + Returns: + -------- + w_shifted : ndarray + Shifted window with positive indices first. + """ + N = len(w) + return np.roll(w, N // 2) + +def pad_window(w, Npad): + """ + Zero-pad the window to a specified length. + + Parameters: + ----------- + w : ndarray + The original window. + Npad : int + Length to zero-pad the window to. + + Returns: + -------- + w_pad : ndarray + Zero-padded window of length Npad. + + Raises: + ------- + ValueError: + If Npad is less than the original window length. + """ + N = len(w) + w_pad = np.zeros(Npad) + Nh = N // 2 + + if Npad < N: + raise ValueError("Npad must be greater than or equal to the window length") + + if N == Npad: + return w + + if N % 2 == 1: # For odd N + w_pad[:Nh+1] = w[:Nh+1] + w_pad[-Nh:] = w[-Nh:] + else: # For even N + w_pad[:Nh] = w[:Nh] + w_pad[Nh] = w[Nh] / 2 + w_pad[-Nh:] = w[-Nh:] + w_pad[-Nh] = w[Nh] / 2 + + return w_pad + +def compute_tfd(N, Nh, tfd): + """ + Finalizes the time-frequency distribution computation. + + Parameters: + ----------- + N : int + Size of the TFD. + Nh : int + Half-length parameter. + tfd : ndarray + Time-frequency distribution to be finalized. + + Returns: + -------- + tfd : ndarray + Final computed TFD (N,N). + """ + m = np.arange(0, Nh) # m = 0:(Nh-1) + mb = np.arange(1, Nh) # mb = 1:(Nh-1) + + m_real = np.arange(Nh) # m_real = 0:(Nh-1) --> is this the same as m? In matlab it is different because of 1-based indexing + m_imag = np.arange(Nh, N) # m_imag = Nh:(N-1) + + for n in range(0, N-1, 2): # for n=0:2:N-2 + R_even_half = np.complex128(tfd[n, :Nh]) + 1j * np.complex128(tfd[n, Nh:]) + R_odd_half = np.complex128(tfd[n+1, :Nh]) + 1j * np.complex128(tfd[n+1, Nh:]) + + R_tslice_even = np.zeros(N, dtype=np.complex128) + R_tslice_odd = np.zeros(N, dtype=np.complex128) + + R_tslice_even[m] = R_even_half + R_tslice_odd[m] = R_odd_half + + R_tslice_even[N-mb] = np.conj(R_even_half[mb]) + R_tslice_odd[N-mb] = np.conj(R_odd_half[mb]) + + # Perform FFT to compute time slices + tfd_time_slice = np.fft.fft(R_tslice_even + 1j * R_tslice_odd) + + tfd[n, :] = np.real(tfd_time_slice) + tfd[n+1, :] = np.imag(tfd_time_slice) + + tfd = tfd / N # Normalize the TFD + tfd = tfd.transpose() # Transpose the TFD to have the time on the x-axis and frequency on the y-axis + return tfd diff --git a/src/paradigma/ppg_preprocessing.py b/src/paradigma/ppg_preprocessing.py index e4a599c4..1b195408 100644 --- a/src/paradigma/ppg_preprocessing.py +++ b/src/paradigma/ppg_preprocessing.py @@ -71,12 +71,16 @@ def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TS # Drop the gyroscope columns from the IMU data cols_to_drop = df_imu.filter(regex='^rotation_').columns df_imu.drop(cols_to_drop, axis=1, inplace=True) - df_imu = df_imu.rename(columns={f'acceleration_{a}': f'accelerometer_{a}' for a in ['x', 'y', 'z']}) + df_acc = df_imu.rename(columns={f'acceleration_{a}': f'accelerometer_{a}' for a in ['x', 'y', 'z']}) + + # Apply scale factors to accelerometer data only + acc_columns = list(imu_config.d_channels_accelerometer.keys()) + df_acc[acc_columns] *= metadata_values_imu.scale_factors[:3] # Transform the time arrays to absolute milliseconds start_time_ppg = parse_iso8601_to_datetime(metadata_time_ppg.start_iso8601).timestamp() - df_imu[DataColumns.TIME] = paradigma.imu_preprocessing.transform_time_array( - time_array=df_imu[DataColumns.TIME], + df_acc[DataColumns.TIME] = paradigma.imu_preprocessing.transform_time_array( + time_array=df_acc[DataColumns.TIME], scale_factor=1000, input_unit_type = TimeUnit.DIFFERENCE_MS, output_unit_type = TimeUnit.RELATIVE_MS, @@ -91,16 +95,13 @@ def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TS start_time = start_time_imu) # Extract overlapping segments - print("Shape of the original data:", df_ppg.shape, df_imu.shape) - df_ppg_overlapping, df_imu_overlapping = extract_overlapping_segments(df_ppg, df_imu) - print("Shape of the overlapping segments:", df_ppg_overlapping.shape, df_imu_overlapping.shape) - - # Apply scale factors - df_imu_overlapping[list(imu_config.d_channels_imu.keys())] *= metadata_values_imu.scale_factors - + print("Shape of the original data:", df_ppg.shape, df_acc.shape) + df_ppg_overlapping, df_acc_overlapping = extract_overlapping_segments(df_ppg, df_acc) + print("Shape of the overlapping segments:", df_ppg_overlapping.shape, df_acc_overlapping.shape) + # Resample accelerometer data - df_imu_proc = paradigma.imu_preprocessing.resample_data( - df=df_imu_overlapping, + df_acc_proc = paradigma.imu_preprocessing.resample_data( + df=df_acc_overlapping, time_column=DataColumns.TIME, time_unit_type=TimeUnit.RELATIVE_MS, values_column_names = list(imu_config.d_channels_accelerometer.keys()), @@ -119,19 +120,19 @@ def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TS # change to correct units [g] if imu_config.acceleration_units == DataUnits.ACCELERATION: - df_imu_proc[col] /= 9.81 + df_acc_proc[col] /= 9.81 for result, side_pass in zip(['filt', 'grav'], ['hp', 'lp']): - df_imu_proc[f'{result}_{col}'] = paradigma.imu_preprocessing.butterworth_filter( - data=np.array(df_imu_proc[col]), + df_acc_proc[f'{result}_{col}'] = paradigma.imu_preprocessing.butterworth_filter( + data=np.array(df_acc_proc[col]), order=imu_config.filter_order, cutoff_frequency=imu_config.lower_cutoff_frequency, passband=side_pass, sampling_frequency=imu_config.sampling_frequency, ) - df_imu_proc = df_imu_proc.drop(columns=[col]) - df_imu_proc = df_imu_proc.rename(columns={f'filt_{col}': col}) + df_acc_proc = df_acc_proc.drop(columns=[col]) + df_acc_proc = df_acc_proc.rename(columns={f'filt_{col}': col}) for col in ppg_config.d_channels_ppg.keys(): df_ppg_proc[f'filt_{col}'] = paradigma.imu_preprocessing.butterworth_filter( @@ -152,7 +153,7 @@ def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TS metadata_values_imu.file_name = 'accelerometer_values.bin' metadata_time_imu.units = [TimeUnit.ABSOLUTE_MS] metadata_time_imu.file_name = 'accelerometer_time.bin' - write_df_data(metadata_time_imu, metadata_values_imu, output_path, 'accelerometer_meta.json', df_imu_proc) + write_df_data(metadata_time_imu, metadata_values_imu, output_path, 'accelerometer_meta.json', df_acc_proc) metadata_values_ppg.channels = list(ppg_config.d_channels_ppg.keys()) metadata_values_ppg.units = list(ppg_config.d_channels_ppg.values()) @@ -161,7 +162,7 @@ def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TS metadata_time_ppg.file_name = 'PPG_time.bin' write_df_data(metadata_time_ppg, metadata_values_ppg, output_path, 'PPG_meta.json', df_ppg_proc) - return df_ppg_proc, df_imu_proc + return df_ppg_proc, df_acc_proc # TODO: ideally something like this should be possible directly in the tsdf library @@ -288,23 +289,23 @@ def synchronization(ppg_meta, imu_meta): return segment_ppg_total, segment_imu_total -def extract_overlapping_segments(df_ppg, df_imu, time_column_ppg='time', time_column_imu='time'): +def extract_overlapping_segments(df_ppg, df_acc, time_column_ppg='time', time_column_imu='time'): """ Extract DataFrames with overlapping data segments between IMU and PPG datasets based on their timestamps. Parameters: df_ppg (pd.DataFrame): DataFrame containing PPG data with a time column in UNIX milliseconds. - df_imu (pd.DataFrame): DataFrame containing IMU data with a time column in UNIX milliseconds. + df_acc (pd.DataFrame): DataFrame containing IMU accelerometer data with a time column in UNIX milliseconds. time_column_ppg (str): Column name of the timestamp in the PPG DataFrame. time_column_imu (str): Column name of the timestamp in the IMU DataFrame. Returns: - tuple: Tuple containing two DataFrames (df_ppg_overlapping, df_imu_overlapping) that contain only the data + tuple: Tuple containing two DataFrames (df_ppg_overlapping, df_acc_overlapping) that contain only the data within the overlapping time segments. """ # Convert UNIX milliseconds to seconds ppg_time = df_ppg[time_column_ppg] / 1000 # Convert milliseconds to seconds - imu_time = df_imu[time_column_imu] / 1000 # Convert milliseconds to seconds + imu_time = df_acc[time_column_imu] / 1000 # Convert milliseconds to seconds # Determine the overlapping time interval start_time = max(ppg_time.iloc[0], imu_time.iloc[0]) @@ -318,6 +319,6 @@ def extract_overlapping_segments(df_ppg, df_imu, time_column_ppg='time', time_co # Extract overlapping segments from DataFrames df_ppg_overlapping = df_ppg.iloc[ppg_start_index:ppg_end_index + 1] - df_imu_overlapping = df_imu.iloc[imu_start_index:imu_end_index + 1] + df_acc_overlapping = df_acc.iloc[imu_start_index:imu_end_index + 1] - return df_ppg_overlapping, df_imu_overlapping + return df_ppg_overlapping, df_acc_overlapping