Skip to content

Commit

Permalink
Merge branch 'refs/heads/pep_benchmarking'
Browse files Browse the repository at this point in the history
# Conflicts:
#	poetry.lock
#	pyproject.toml
#	src/biopsykit/io/tfm.py
#	src/biopsykit/utils/array_handling.py
#	src/biopsykit/utils/exceptions.py
  • Loading branch information
richrobe committed Jan 3, 2025
2 parents 2117b2c + 17d33fe commit f996f32
Show file tree
Hide file tree
Showing 64 changed files with 24,388 additions and 27 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ mne = {version = "^1.2.1", optional = true}
IPython = {version = "^7", optional = true}
ipywidgets = {version = "^8", optional = true}
ipympl = {version = "^0.9", optional = true}
tpcp = ">=1"
ts2vg = "^1.2.4"
statannotations = "^0.7.1"

[tool.poetry.extras]
Expand Down
22 changes: 8 additions & 14 deletions src/biopsykit/io/tfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
from typing import ClassVar, Optional

import pandas as pd
from scipy.io import loadmat

from biopsykit.utils._types import path_t
from scipy.io import loadmat


class TFMDataset:
Expand Down Expand Up @@ -43,20 +42,15 @@ def from_mat_file(
# channel_mapping: Optional[Dict[str, str]] = None,
tz: Optional[str] = "Europe/Berlin",
):
"""Load TFM data from a .mat file.
"""Load a TFM dataset from a .mat file.
Parameters
----------
path : str
Path to the .mat file containing the TFM data.
path : str or :class:`~pathlib.Path`
Path to the .mat file.
tz : str, optional
Timezone of the data. Default: "Europe/Berlin"
Returns
-------
:class:`~biopsykit.io.tfm.TFMDataset`
TFM dataset object.
"""
data = loadmat(path, struct_as_record=False, squeeze_me=True)
data_raw = data["RAW_SIGNALS"]
Expand All @@ -65,13 +59,13 @@ def from_mat_file(
data_dict = {key: getattr(data_raw, value) for key, value in cls.CHANNEL_MAPPING.items()}
return cls(data_dict=data_dict, tz=tz, sampling_rate_dict={})

def data_as_df(self):
"""Return data as a single pandas DataFrame.
def data_as_df(self) -> dict[str, pd.DataFrame]:
"""Return the TFM data as a dictionary of pandas DataFrames.
Returns
-------
:class:`~pandas.DataFrame`
Data as a single pandas DataFrame.
dict
Dictionary containing the TFM data as pandas DataFrames. Keys are channel names, values are the dataframes.
"""
return self._data
4 changes: 2 additions & 2 deletions src/biopsykit/signals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Module for processing different types of biosignals."""
from biopsykit.signals import ecg, eeg, imu, rsp
from biopsykit.signals import ecg, eeg, icg, imu, rsp

__all__ = ["ecg", "eeg", "imu", "rsp"]
__all__ = ["ecg", "eeg", "imu", "rsp", "icg"]
41 changes: 41 additions & 0 deletions src/biopsykit/signals/_base_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from typing import Literal, get_args

import pandas as pd
from tpcp import Algorithm

HANDLE_MISSING_EVENTS = Literal["raise", "warn", "ignore"]


class CanHandleMissingEventsMixin(Algorithm):
def __init__(self, handle_missing_events: HANDLE_MISSING_EVENTS = "warn"):
"""Mixin class to handle missing events in the input dataframes.
Parameters
----------
handle_missing_events : one of {"warn", "raise", "ignore"}, optional
How to handle missing data in the input dataframes. If "warn", a warning is raised if missing data is found.
If "raise", an exception is raised if missing data is found. If "ignore", missing data is ignored.
Default: "warn"
"""
self.handle_missing_events = handle_missing_events

def _check_valid_missing_handling(self):
if self.handle_missing_events not in get_args(HANDLE_MISSING_EVENTS):
raise ValueError(
f"Invalid value '{self.handle_missing_events}' for 'handle_missing_events'. "
f"Must be one of {get_args(HANDLE_MISSING_EVENTS)}."
)


class BaseExtraction(Algorithm):
"""Base class which defines the interface for all fiducial point extraction algorithms.
Results:
points_ : saves positions of extracted points in pd.DataFrame
"""

_action_methods = "extract"

# results
points_: pd.DataFrame
13 changes: 13 additions & 0 deletions src/biopsykit/signals/_dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pandas as pd
from biopsykit.utils.exceptions import ValidationError

__all__ = ["assert_sample_columns_int"]


def assert_sample_columns_int(data: pd.DataFrame) -> None:
"""Assert that the columns of a DataFrame that have "_sample" in their name are of type int."""
if not any(data.columns.str.contains("_sample")):
raise ValidationError("DataFrame does not contain any columns with '_sample' in their name!")
for col in data.columns:
if "_sample" in col and not pd.api.types.is_integer_dtype(data[col]):
raise ValidationError(f"Column '{col}' is not of type 'int'!")
4 changes: 2 additions & 2 deletions src/biopsykit/signals/ecg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Module for ECG data analysis and visualization."""
from biopsykit.signals.ecg import plotting
from biopsykit.signals.ecg import event_extraction, plotting, preprocessing, segmentation
from biopsykit.signals.ecg.ecg import EcgProcessor

__all__ = ["EcgProcessor", "plotting"]
__all__ = ["EcgProcessor", "plotting", "preprocessing", "segmentation", "event_extraction"]
16 changes: 16 additions & 0 deletions src/biopsykit/signals/ecg/event_extraction/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Module for ECG event extraction."""
from biopsykit.signals.ecg.event_extraction._base_ecg_extraction import BaseEcgExtraction
from biopsykit.signals.ecg.event_extraction._q_peak_forounzafar2018 import QPeakExtractionForouzanfar2018
from biopsykit.signals.ecg.event_extraction._q_peak_martinez2004_neurokit import QPeakExtractionMartinez2004Neurokit
from biopsykit.signals.ecg.event_extraction._q_peak_scipy_findpeaks_neurokit import (
QPeakExtractionSciPyFindPeaksNeurokit,
)
from biopsykit.signals.ecg.event_extraction._q_peak_vanlien2013 import QPeakExtractionVanLien2013

__all__ = [
"BaseEcgExtraction",
"QPeakExtractionVanLien2013",
"QPeakExtractionMartinez2004Neurokit",
"QPeakExtractionSciPyFindPeaksNeurokit",
"QPeakExtractionForouzanfar2018",
]
16 changes: 16 additions & 0 deletions src/biopsykit/signals/ecg/event_extraction/_base_ecg_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pandas as pd

__all__ = ["BaseEcgExtraction"]

from biopsykit.signals._base_extraction import BaseExtraction


class BaseEcgExtraction(BaseExtraction):
def extract(
self,
*,
ecg: pd.Series,
heartbeats: pd.DataFrame,
sampling_rate_hz: float,
):
raise NotImplementedError("This is an abstract method and needs to be implemented in a subclass.")
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import numpy as np
import pandas as pd
from biopsykit.signals._base_extraction import HANDLE_MISSING_EVENTS, CanHandleMissingEventsMixin
from biopsykit.signals._dtypes import assert_sample_columns_int
from biopsykit.signals.ecg.event_extraction._base_ecg_extraction import BaseEcgExtraction
from biopsykit.utils._datatype_validation_helper import _assert_has_columns, _assert_is_dtype
from biopsykit.utils.array_handling import sanitize_input_series
from tpcp import Parameter


class QPeakExtractionForouzanfar2018(BaseEcgExtraction, CanHandleMissingEventsMixin):
"""Algorithm by Forouzanfar et al. (2018) for Q-peak extraction."""

scaling_factor: Parameter[float]

def __init__(self, scaling_factor: float = 2000, handle_missing_events: HANDLE_MISSING_EVENTS = "warn"):
"""Initialize new QPeakExtractionVanLien algorithm instance.
Parameters
----------
scaling_factor : float, optional
Scaling factor for the threshold used to detect the Q-peak. Default: 2000
handle_missing_events : one of {"warn", "raise", "ignore"}, optional
How to handle missing data in the input dataframes. Default: "warn"
"""
super().__init__(handle_missing_events=handle_missing_events)
self.scaling_factor = scaling_factor

# @make_action_safe
def extract(
self,
*,
ecg: pd.DataFrame,
heartbeats: pd.DataFrame,
sampling_rate_hz: int, # noqa: ARG002
):
"""Extract Q-peaks from given ECG cleaned signal.
The results are saved in the ``points_`` attribute of the super class.
Parameters
----------
ecg: :class:`~pandas.DataFrame`
ECG signal
heartbeats: :class:`~pandas.DataFrame`
DataFrame containing one row per segmented heartbeat, each row contains start, end, and R-peak
location (in samples from beginning of signal) of that heartbeat, index functions as id of heartbeat
sampling_rate_hz: int
Sampling rate of ECG signal in hz
Returns
-------
self
Raises
------
:exc:`~biopsykit.utils.exceptions.EventExtractionError`
If missing data is found and ``handle_missing`` is set to "raise"
"""
self._check_valid_missing_handling()
ecg = sanitize_input_series(ecg, name="ecg")
ecg = ecg.squeeze()

# result df
q_peaks = pd.DataFrame(index=heartbeats.index, columns=["q_peak_sample", "nan_reason"])

# search Q-peak for each heartbeat of the given signal
for idx, data in heartbeats.iterrows():
heartbeat_start = data["start_sample"]
r_peak_sample = data["r_peak_sample"]

# set an individual threshold for detecting the Q-peaks based on the R-peak
threshold = (-1.2 * ecg.iloc[r_peak_sample]) / self.scaling_factor

# search for the Q-peak as the last sample before the R-peak that is below the threshold
ecg_before_r_peak = ecg[heartbeat_start:r_peak_sample].reset_index(drop=True)
ecg_below = np.where(ecg_before_r_peak < threshold)[0]

if len(ecg_below) == 0:
q_peaks.loc[idx, "q_peak_sample"] = np.nan
q_peaks.loc[idx, "nan_reason"] = "no_value_below_threshold"
continue

q_peak_sample = heartbeat_start + ecg_below[-1]
q_peaks.loc[idx, "q_peak_sample"] = q_peak_sample

_assert_is_dtype(q_peaks, pd.DataFrame)
_assert_has_columns(q_peaks, [["q_peak_sample", "nan_reason"]])
q_peaks = q_peaks.astype({"q_peak_sample": "Int64", "nan_reason": "object"})
assert_sample_columns_int(q_peaks)

self.points_ = q_peaks
return self
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import warnings

import neurokit2 as nk
import numpy as np
import pandas as pd
from biopsykit.signals._base_extraction import HANDLE_MISSING_EVENTS, CanHandleMissingEventsMixin
from biopsykit.signals._dtypes import assert_sample_columns_int
from biopsykit.signals.ecg.event_extraction._base_ecg_extraction import BaseEcgExtraction
from biopsykit.utils._datatype_validation_helper import _assert_has_columns, _assert_is_dtype
from biopsykit.utils.array_handling import sanitize_input_series
from biopsykit.utils.exceptions import EventExtractionError


class QPeakExtractionMartinez2004Neurokit(BaseEcgExtraction, CanHandleMissingEventsMixin):
"""Algorithm by Martinez et al. (2004) for Q-peak extraction using the DWT method implemented in NeuroKit2."""

def __init__(self, handle_missing_events: HANDLE_MISSING_EVENTS = "warn"):
"""Initialize new QPeakExtractionMartinez2004Neurokit algorithm instance.
Parameters
----------
handle_missing_events : one of {"warn", "raise", "ignore"}, optional
How to handle missing data in the input dataframes. Default: "warn"
"""
super().__init__(handle_missing_events=handle_missing_events)

# @make_action_safe
def extract(
self,
*,
ecg: pd.DataFrame,
heartbeats: pd.DataFrame,
sampling_rate_hz: int,
):
"""Extract Q-peaks from given ECG cleaned signal.
The results are saved in the ``points_`` attribute of the super class.
Parameters
----------
ecg: :class:`~pandas.DataFrame`
ECG signal
heartbeats: :class:`~pandas.DataFrame`
DataFrame containing one row per segmented heartbeat, each row contains start, end, and R-peak
location (in samples from beginning of signal) of that heartbeat, index functions as id of heartbeat
sampling_rate_hz: int
Sampling rate of ECG signal in hz
Returns
-------
self
Raises
------
:exc:`~biopsykit.utils.exceptions.EventExtractionError`
If missing data is found and ``handle_missing`` is set to "raise"
"""
self._check_valid_missing_handling()

ecg = sanitize_input_series(ecg, name="ecg")
ecg = ecg.squeeze()

# result df
q_peaks = pd.DataFrame(index=heartbeats.index, columns=["q_peak", "nan_reason"])

# used subsequently to store ids of heartbeats for which no AO or IVC could be detected
heartbeats_no_q = []
heartbeats_q_after_r = []

# some neurokit functions (for example ecg_delineate()) don't work with r-peaks input as Series, so list instead
r_peaks = list(heartbeats["r_peak_sample"])

_, waves = nk.ecg_delineate(ecg, rpeaks=r_peaks, sampling_rate=sampling_rate_hz, method="dwt", show=False)

extracted_q_peaks = waves["ECG_Q_Peaks"]

# find heartbeat to which Q-peak belongs and save Q-peak position in corresponding row
for idx, q in enumerate(extracted_q_peaks):
# for some heartbeats, no Q can be detected, will be NaN in resulting df
if np.isnan(q):
heartbeats_no_q.append(idx)
else:
heartbeat_idx = heartbeats.loc[(heartbeats["start_sample"] < q) & (q < heartbeats["end_sample"])].index[
0
]

# Q occurs after R, which is not valid
if heartbeats["r_peak_sample"].loc[heartbeat_idx].item() < q:
heartbeats_q_after_r.append(heartbeat_idx)
q_peaks.loc[heartbeat_idx, "q_peak"] = np.NaN
# valid Q-peak found
else:
q_peaks.loc[heartbeat_idx, "q_peak"] = q

# inform user about missing Q-values
if q_peaks.isna().sum()[0] > 0:
nan_rows = q_peaks[q_peaks["q_peak"].isna()]
nan_rows = nan_rows.drop(index=heartbeats_q_after_r)
nan_rows = nan_rows.drop(index=heartbeats_no_q)

missing_str = f"No Q-peak detected in {q_peaks.isna().sum()[0]} heartbeats:\n"
if len(heartbeats_no_q) > 0:
q_peaks.loc[heartbeats_no_q, "nan_reason"] = "no_q_peak"
missing_str += (
f"- for heartbeats {heartbeats_no_q} the neurokit algorithm was not able to detect a Q-peak\n"
)
if len(heartbeats_q_after_r) > 0:
q_peaks.loc[heartbeats_no_q, "nan_reason"] = "q_after_r_peak"
missing_str += (
f"- for heartbeats {heartbeats_q_after_r} the detected Q is invalid "
f"because it occurs after the R-peak\n"
)
if len(nan_rows.index.values) > 0:
q_peaks.loc[nan_rows.index, "nan_reason"] = "no_q_peak_within_heartbeats"
missing_str += (
f"- for {nan_rows.index.to_numpy()} apparently none of the found Q-peaks "
f"were within these heartbeats"
)

if self.handle_missing_events == "warn":
warnings.warn(missing_str)
elif self.handle_missing_events == "raise":
raise EventExtractionError(missing_str)

q_peaks.columns = ["q_peak_sample", "nan_reason"]
_assert_is_dtype(q_peaks, pd.DataFrame)
_assert_has_columns(q_peaks, [["q_peak_sample", "nan_reason"]])
q_peaks = q_peaks.astype({"q_peak_sample": "Int64", "nan_reason": "object"})
assert_sample_columns_int(q_peaks)

self.points_ = q_peaks
return self
Loading

0 comments on commit f996f32

Please sign in to comment.