-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'refs/heads/pep_benchmarking'
# Conflicts: # poetry.lock # pyproject.toml # src/biopsykit/io/tfm.py # src/biopsykit/utils/array_handling.py # src/biopsykit/utils/exceptions.py
- Loading branch information
Showing
64 changed files
with
24,388 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
"""Module for processing different types of biosignals.""" | ||
from biopsykit.signals import ecg, eeg, imu, rsp | ||
from biopsykit.signals import ecg, eeg, icg, imu, rsp | ||
|
||
__all__ = ["ecg", "eeg", "imu", "rsp"] | ||
__all__ = ["ecg", "eeg", "imu", "rsp", "icg"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from typing import Literal, get_args | ||
|
||
import pandas as pd | ||
from tpcp import Algorithm | ||
|
||
HANDLE_MISSING_EVENTS = Literal["raise", "warn", "ignore"] | ||
|
||
|
||
class CanHandleMissingEventsMixin(Algorithm): | ||
def __init__(self, handle_missing_events: HANDLE_MISSING_EVENTS = "warn"): | ||
"""Mixin class to handle missing events in the input dataframes. | ||
Parameters | ||
---------- | ||
handle_missing_events : one of {"warn", "raise", "ignore"}, optional | ||
How to handle missing data in the input dataframes. If "warn", a warning is raised if missing data is found. | ||
If "raise", an exception is raised if missing data is found. If "ignore", missing data is ignored. | ||
Default: "warn" | ||
""" | ||
self.handle_missing_events = handle_missing_events | ||
|
||
def _check_valid_missing_handling(self): | ||
if self.handle_missing_events not in get_args(HANDLE_MISSING_EVENTS): | ||
raise ValueError( | ||
f"Invalid value '{self.handle_missing_events}' for 'handle_missing_events'. " | ||
f"Must be one of {get_args(HANDLE_MISSING_EVENTS)}." | ||
) | ||
|
||
|
||
class BaseExtraction(Algorithm): | ||
"""Base class which defines the interface for all fiducial point extraction algorithms. | ||
Results: | ||
points_ : saves positions of extracted points in pd.DataFrame | ||
""" | ||
|
||
_action_methods = "extract" | ||
|
||
# results | ||
points_: pd.DataFrame |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import pandas as pd | ||
from biopsykit.utils.exceptions import ValidationError | ||
|
||
__all__ = ["assert_sample_columns_int"] | ||
|
||
|
||
def assert_sample_columns_int(data: pd.DataFrame) -> None: | ||
"""Assert that the columns of a DataFrame that have "_sample" in their name are of type int.""" | ||
if not any(data.columns.str.contains("_sample")): | ||
raise ValidationError("DataFrame does not contain any columns with '_sample' in their name!") | ||
for col in data.columns: | ||
if "_sample" in col and not pd.api.types.is_integer_dtype(data[col]): | ||
raise ValidationError(f"Column '{col}' is not of type 'int'!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
"""Module for ECG data analysis and visualization.""" | ||
from biopsykit.signals.ecg import plotting | ||
from biopsykit.signals.ecg import event_extraction, plotting, preprocessing, segmentation | ||
from biopsykit.signals.ecg.ecg import EcgProcessor | ||
|
||
__all__ = ["EcgProcessor", "plotting"] | ||
__all__ = ["EcgProcessor", "plotting", "preprocessing", "segmentation", "event_extraction"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
"""Module for ECG event extraction.""" | ||
from biopsykit.signals.ecg.event_extraction._base_ecg_extraction import BaseEcgExtraction | ||
from biopsykit.signals.ecg.event_extraction._q_peak_forounzafar2018 import QPeakExtractionForouzanfar2018 | ||
from biopsykit.signals.ecg.event_extraction._q_peak_martinez2004_neurokit import QPeakExtractionMartinez2004Neurokit | ||
from biopsykit.signals.ecg.event_extraction._q_peak_scipy_findpeaks_neurokit import ( | ||
QPeakExtractionSciPyFindPeaksNeurokit, | ||
) | ||
from biopsykit.signals.ecg.event_extraction._q_peak_vanlien2013 import QPeakExtractionVanLien2013 | ||
|
||
__all__ = [ | ||
"BaseEcgExtraction", | ||
"QPeakExtractionVanLien2013", | ||
"QPeakExtractionMartinez2004Neurokit", | ||
"QPeakExtractionSciPyFindPeaksNeurokit", | ||
"QPeakExtractionForouzanfar2018", | ||
] |
16 changes: 16 additions & 0 deletions
16
src/biopsykit/signals/ecg/event_extraction/_base_ecg_extraction.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import pandas as pd | ||
|
||
__all__ = ["BaseEcgExtraction"] | ||
|
||
from biopsykit.signals._base_extraction import BaseExtraction | ||
|
||
|
||
class BaseEcgExtraction(BaseExtraction): | ||
def extract( | ||
self, | ||
*, | ||
ecg: pd.Series, | ||
heartbeats: pd.DataFrame, | ||
sampling_rate_hz: float, | ||
): | ||
raise NotImplementedError("This is an abstract method and needs to be implemented in a subclass.") |
94 changes: 94 additions & 0 deletions
94
src/biopsykit/signals/ecg/event_extraction/_q_peak_forounzafar2018.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import numpy as np | ||
import pandas as pd | ||
from biopsykit.signals._base_extraction import HANDLE_MISSING_EVENTS, CanHandleMissingEventsMixin | ||
from biopsykit.signals._dtypes import assert_sample_columns_int | ||
from biopsykit.signals.ecg.event_extraction._base_ecg_extraction import BaseEcgExtraction | ||
from biopsykit.utils._datatype_validation_helper import _assert_has_columns, _assert_is_dtype | ||
from biopsykit.utils.array_handling import sanitize_input_series | ||
from tpcp import Parameter | ||
|
||
|
||
class QPeakExtractionForouzanfar2018(BaseEcgExtraction, CanHandleMissingEventsMixin): | ||
"""Algorithm by Forouzanfar et al. (2018) for Q-peak extraction.""" | ||
|
||
scaling_factor: Parameter[float] | ||
|
||
def __init__(self, scaling_factor: float = 2000, handle_missing_events: HANDLE_MISSING_EVENTS = "warn"): | ||
"""Initialize new QPeakExtractionVanLien algorithm instance. | ||
Parameters | ||
---------- | ||
scaling_factor : float, optional | ||
Scaling factor for the threshold used to detect the Q-peak. Default: 2000 | ||
handle_missing_events : one of {"warn", "raise", "ignore"}, optional | ||
How to handle missing data in the input dataframes. Default: "warn" | ||
""" | ||
super().__init__(handle_missing_events=handle_missing_events) | ||
self.scaling_factor = scaling_factor | ||
|
||
# @make_action_safe | ||
def extract( | ||
self, | ||
*, | ||
ecg: pd.DataFrame, | ||
heartbeats: pd.DataFrame, | ||
sampling_rate_hz: int, # noqa: ARG002 | ||
): | ||
"""Extract Q-peaks from given ECG cleaned signal. | ||
The results are saved in the ``points_`` attribute of the super class. | ||
Parameters | ||
---------- | ||
ecg: :class:`~pandas.DataFrame` | ||
ECG signal | ||
heartbeats: :class:`~pandas.DataFrame` | ||
DataFrame containing one row per segmented heartbeat, each row contains start, end, and R-peak | ||
location (in samples from beginning of signal) of that heartbeat, index functions as id of heartbeat | ||
sampling_rate_hz: int | ||
Sampling rate of ECG signal in hz | ||
Returns | ||
------- | ||
self | ||
Raises | ||
------ | ||
:exc:`~biopsykit.utils.exceptions.EventExtractionError` | ||
If missing data is found and ``handle_missing`` is set to "raise" | ||
""" | ||
self._check_valid_missing_handling() | ||
ecg = sanitize_input_series(ecg, name="ecg") | ||
ecg = ecg.squeeze() | ||
|
||
# result df | ||
q_peaks = pd.DataFrame(index=heartbeats.index, columns=["q_peak_sample", "nan_reason"]) | ||
|
||
# search Q-peak for each heartbeat of the given signal | ||
for idx, data in heartbeats.iterrows(): | ||
heartbeat_start = data["start_sample"] | ||
r_peak_sample = data["r_peak_sample"] | ||
|
||
# set an individual threshold for detecting the Q-peaks based on the R-peak | ||
threshold = (-1.2 * ecg.iloc[r_peak_sample]) / self.scaling_factor | ||
|
||
# search for the Q-peak as the last sample before the R-peak that is below the threshold | ||
ecg_before_r_peak = ecg[heartbeat_start:r_peak_sample].reset_index(drop=True) | ||
ecg_below = np.where(ecg_before_r_peak < threshold)[0] | ||
|
||
if len(ecg_below) == 0: | ||
q_peaks.loc[idx, "q_peak_sample"] = np.nan | ||
q_peaks.loc[idx, "nan_reason"] = "no_value_below_threshold" | ||
continue | ||
|
||
q_peak_sample = heartbeat_start + ecg_below[-1] | ||
q_peaks.loc[idx, "q_peak_sample"] = q_peak_sample | ||
|
||
_assert_is_dtype(q_peaks, pd.DataFrame) | ||
_assert_has_columns(q_peaks, [["q_peak_sample", "nan_reason"]]) | ||
q_peaks = q_peaks.astype({"q_peak_sample": "Int64", "nan_reason": "object"}) | ||
assert_sample_columns_int(q_peaks) | ||
|
||
self.points_ = q_peaks | ||
return self |
134 changes: 134 additions & 0 deletions
134
src/biopsykit/signals/ecg/event_extraction/_q_peak_martinez2004_neurokit.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
import warnings | ||
|
||
import neurokit2 as nk | ||
import numpy as np | ||
import pandas as pd | ||
from biopsykit.signals._base_extraction import HANDLE_MISSING_EVENTS, CanHandleMissingEventsMixin | ||
from biopsykit.signals._dtypes import assert_sample_columns_int | ||
from biopsykit.signals.ecg.event_extraction._base_ecg_extraction import BaseEcgExtraction | ||
from biopsykit.utils._datatype_validation_helper import _assert_has_columns, _assert_is_dtype | ||
from biopsykit.utils.array_handling import sanitize_input_series | ||
from biopsykit.utils.exceptions import EventExtractionError | ||
|
||
|
||
class QPeakExtractionMartinez2004Neurokit(BaseEcgExtraction, CanHandleMissingEventsMixin): | ||
"""Algorithm by Martinez et al. (2004) for Q-peak extraction using the DWT method implemented in NeuroKit2.""" | ||
|
||
def __init__(self, handle_missing_events: HANDLE_MISSING_EVENTS = "warn"): | ||
"""Initialize new QPeakExtractionMartinez2004Neurokit algorithm instance. | ||
Parameters | ||
---------- | ||
handle_missing_events : one of {"warn", "raise", "ignore"}, optional | ||
How to handle missing data in the input dataframes. Default: "warn" | ||
""" | ||
super().__init__(handle_missing_events=handle_missing_events) | ||
|
||
# @make_action_safe | ||
def extract( | ||
self, | ||
*, | ||
ecg: pd.DataFrame, | ||
heartbeats: pd.DataFrame, | ||
sampling_rate_hz: int, | ||
): | ||
"""Extract Q-peaks from given ECG cleaned signal. | ||
The results are saved in the ``points_`` attribute of the super class. | ||
Parameters | ||
---------- | ||
ecg: :class:`~pandas.DataFrame` | ||
ECG signal | ||
heartbeats: :class:`~pandas.DataFrame` | ||
DataFrame containing one row per segmented heartbeat, each row contains start, end, and R-peak | ||
location (in samples from beginning of signal) of that heartbeat, index functions as id of heartbeat | ||
sampling_rate_hz: int | ||
Sampling rate of ECG signal in hz | ||
Returns | ||
------- | ||
self | ||
Raises | ||
------ | ||
:exc:`~biopsykit.utils.exceptions.EventExtractionError` | ||
If missing data is found and ``handle_missing`` is set to "raise" | ||
""" | ||
self._check_valid_missing_handling() | ||
|
||
ecg = sanitize_input_series(ecg, name="ecg") | ||
ecg = ecg.squeeze() | ||
|
||
# result df | ||
q_peaks = pd.DataFrame(index=heartbeats.index, columns=["q_peak", "nan_reason"]) | ||
|
||
# used subsequently to store ids of heartbeats for which no AO or IVC could be detected | ||
heartbeats_no_q = [] | ||
heartbeats_q_after_r = [] | ||
|
||
# some neurokit functions (for example ecg_delineate()) don't work with r-peaks input as Series, so list instead | ||
r_peaks = list(heartbeats["r_peak_sample"]) | ||
|
||
_, waves = nk.ecg_delineate(ecg, rpeaks=r_peaks, sampling_rate=sampling_rate_hz, method="dwt", show=False) | ||
|
||
extracted_q_peaks = waves["ECG_Q_Peaks"] | ||
|
||
# find heartbeat to which Q-peak belongs and save Q-peak position in corresponding row | ||
for idx, q in enumerate(extracted_q_peaks): | ||
# for some heartbeats, no Q can be detected, will be NaN in resulting df | ||
if np.isnan(q): | ||
heartbeats_no_q.append(idx) | ||
else: | ||
heartbeat_idx = heartbeats.loc[(heartbeats["start_sample"] < q) & (q < heartbeats["end_sample"])].index[ | ||
0 | ||
] | ||
|
||
# Q occurs after R, which is not valid | ||
if heartbeats["r_peak_sample"].loc[heartbeat_idx].item() < q: | ||
heartbeats_q_after_r.append(heartbeat_idx) | ||
q_peaks.loc[heartbeat_idx, "q_peak"] = np.NaN | ||
# valid Q-peak found | ||
else: | ||
q_peaks.loc[heartbeat_idx, "q_peak"] = q | ||
|
||
# inform user about missing Q-values | ||
if q_peaks.isna().sum()[0] > 0: | ||
nan_rows = q_peaks[q_peaks["q_peak"].isna()] | ||
nan_rows = nan_rows.drop(index=heartbeats_q_after_r) | ||
nan_rows = nan_rows.drop(index=heartbeats_no_q) | ||
|
||
missing_str = f"No Q-peak detected in {q_peaks.isna().sum()[0]} heartbeats:\n" | ||
if len(heartbeats_no_q) > 0: | ||
q_peaks.loc[heartbeats_no_q, "nan_reason"] = "no_q_peak" | ||
missing_str += ( | ||
f"- for heartbeats {heartbeats_no_q} the neurokit algorithm was not able to detect a Q-peak\n" | ||
) | ||
if len(heartbeats_q_after_r) > 0: | ||
q_peaks.loc[heartbeats_no_q, "nan_reason"] = "q_after_r_peak" | ||
missing_str += ( | ||
f"- for heartbeats {heartbeats_q_after_r} the detected Q is invalid " | ||
f"because it occurs after the R-peak\n" | ||
) | ||
if len(nan_rows.index.values) > 0: | ||
q_peaks.loc[nan_rows.index, "nan_reason"] = "no_q_peak_within_heartbeats" | ||
missing_str += ( | ||
f"- for {nan_rows.index.to_numpy()} apparently none of the found Q-peaks " | ||
f"were within these heartbeats" | ||
) | ||
|
||
if self.handle_missing_events == "warn": | ||
warnings.warn(missing_str) | ||
elif self.handle_missing_events == "raise": | ||
raise EventExtractionError(missing_str) | ||
|
||
q_peaks.columns = ["q_peak_sample", "nan_reason"] | ||
_assert_is_dtype(q_peaks, pd.DataFrame) | ||
_assert_has_columns(q_peaks, [["q_peak_sample", "nan_reason"]]) | ||
q_peaks = q_peaks.astype({"q_peak_sample": "Int64", "nan_reason": "object"}) | ||
assert_sample_columns_int(q_peaks) | ||
|
||
self.points_ = q_peaks | ||
return self |
Oops, something went wrong.