Skip to content

Commit

Permalink
Merge pull request #4 from Zeutschler/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
Zeutschler authored Sep 15, 2024
2 parents be87b4c + 3f8e133 commit 1547924
Show file tree
Hide file tree
Showing 16 changed files with 1,015 additions and 352 deletions.
11 changes: 6 additions & 5 deletions datespanlib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import annotations
# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license

from __future__ import annotations
from dateutil.parser import parserinfo

from datespanlib.date_span import DateSpan
from datespanlib.date_span_set import DateSpanSet

__author__ = "Thomas Zeutschler"
__version__ = "0.1.0"
__version__ = "0.1.6"
__license__ = "MIT"
VERSION = __version__

Expand All @@ -17,7 +18,8 @@
"VERSION"
]

def parse(datespan_text: str, language:str | None = "en", parser_info: parserinfo | None = None) -> DateSpanSet:

def parse(datespan_text: str, language: str | None = "en", parser_info: parserinfo | None = None) -> DateSpanSet:
"""
Creates a new DateSpanSet instance and parses the given text into a set of DateSpan objects.
Expand All @@ -31,8 +33,7 @@ def parse(datespan_text: str, language:str | None = "en", parser_info: parserinf
The DateSpanSet instance contain 0 to N DateSpan objects derived from the given text.
Examples:
>>> DateSpanSet.parse('last month') # if today would be in February 2024
>>> DateSpanSet.evaluate('last month') # if today would be in February 2024
DateSpanSet([DateSpan(datetime.datetime(2024, 1, 1, 0, 0), datetime.datetime(2024, 1, 31, 23, 59, 59, 999999))])
"""
return DateSpanSet(datespan_text, language, parser_info)

8 changes: 5 additions & 3 deletions datespanlib/date_methods.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from datetime import datetime
# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license
# ----------------------------------------------------------------------------
# Language agnostic date time functions to resolve date- and time-related text

import pandas as pd
from datetime import datetime
from dateutil.relativedelta import MO
from dateutil.relativedelta import relativedelta

from datespanlib.date_span import DateSpan


# DateText Methods to resolve date- and time-related text
# shared methods to be used by all languages


# region helper methods

Expand Down
46 changes: 40 additions & 6 deletions datespanlib/date_span.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license

from __future__ import annotations
from datetime import datetime, time, timedelta
from dateutil.relativedelta import relativedelta
Expand All @@ -12,9 +14,11 @@ class DateSpan:
"""
TIME_EPSILON_MICROSECONDS = 1000 # 1 millisecond

def __init__(self, start: datetime | None, end: datetime | None = None):
def __init__(self, start: datetime | None = None, end: datetime | None = None, message: str | None = None):
self._start: datetime | None = start
self._end: datetime | None = end if end is not None else start
self._message: str | None = message


@classmethod
def now(cls) -> DateSpan:
Expand All @@ -32,6 +36,11 @@ def undefined(cls) -> DateSpan:
"""Returns an empty / undefined DateSpan."""
return DateSpan(None, None)

@property
def message(self) -> str | None:
"""Returns the message of the DateSpan."""
return self._message

@property
def is_undefined(self) -> bool:
"""Returns True if the DateSpan is undefined."""
Expand Down Expand Up @@ -164,14 +173,36 @@ def subtract(self, other: DateSpan, allow_split: bool = False) -> DateSpan | (Da
# overlap at the end
return DateSpan(self._start, other._start - timedelta(microseconds=1))

def with_time(self, time: datetime | time) -> DateSpan:
def with_time(self, time: datetime | time, text:str | None = None) -> DateSpan:
"""
Returns a new DateSpan with the start and end date set to the given date and time.
If text is provided, the DateSpan will be adjusted to the time span specified in the text,
e.g. "10:23:45" will set the DateSpan to the full second of the given time.
"10:23" will set the DateSpan to the full minute of the given time.
"""
start = self._start.replace(hour=time.hour, minute=time.minute, second=time.second,
microsecond=time.microsecond)
end = self._end.replace(hour=time.hour, minute=time.minute, second=time.second, microsecond=time.microsecond)
return DateSpan(start, end)
ds = DateSpan(start, end)
if text is not None:
parts = text.split(":")
if len(parts) == 3:
if "." in parts[2]:
return ds
else:
return ds.full_second()
elif len(parts) == 2:
return ds.full_minute()
elif len(parts) == 1:
return ds.full_hour()
return ds
def full_millisecond(self) -> DateSpan:
"""
Returns a new DateSpan with the start and end date set to the beginning and end of the respective millisecond(s).
"""
musec = int(self._start.microsecond // 1000 * 1000)
return DateSpan(self._start.replace(microsecond=musec),
self._end.replace(microsecond=musec + 999))

def full_second(self) -> DateSpan:
"""
Expand Down Expand Up @@ -223,9 +254,9 @@ def full_quarter(self) -> DateSpan:
"""
Returns a new DateSpan with the start and end date set to the beginning and end of the respective quarter(s).
"""
start = self._start.replace(month=(self._start.month // 3) * 3, day=1, hour=0, minute=0, second=0,
start = self._start.replace(month=(self._start.month // 3 - 1) * 3 + 1, day=1, hour=0, minute=0, second=0,
microsecond=0)
end = self._end.replace(month=(self._end.month // 3) * 3 + 1, day=1, hour=23, minute=59, second=59,
end = self._end.replace(month=(self._end.month // 3 - 1) * 3 + 1, day=1, hour=23, minute=59, second=59,
microsecond=999999) + relativedelta(months=3, days=-1)
return DateSpan(start.replace(hour=0, minute=0, second=0, microsecond=0),
end.replace(hour=23, minute=59, second=59, microsecond=999999))
Expand Down Expand Up @@ -486,7 +517,10 @@ def __bool__(self):
return not (self._start is None and self._end is None)

def __str__(self):
self.__repr__()
if self.is_undefined:
return "DateSpan(undefined)"
return (f"DateSpan({self._start.strftime('%a %Y-%m-%d %H:%M:%S')} <-> "
f"{self._end.strftime('%a %Y-%m-%d %H:%M:%S')})")

def __repr__(self):
if self.is_undefined:
Expand Down
50 changes: 36 additions & 14 deletions datespanlib/date_span_set.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations
# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license

from __future__ import annotations
from typing import Any

import uuid
from datetime import datetime
from dateutil.parser import parserinfo
Expand Down Expand Up @@ -35,7 +35,7 @@ def __init__(self, definition: Any | None = None, language: str | None = "en", p
ValueError: If the language is not supported or the text cannot be parsed.
"""
# super().__init__()
self._spans: list[DateSpan] = [] # The internal list of date spans objects.
self._spans: list[DateSpan] = []
self._definition: str | None = definition
self._parser_info: parserinfo | None = parser_info

Expand All @@ -53,11 +53,11 @@ def __init__(self, definition: Any | None = None, language: str | None = "en", p
self._parse(definition, parser_info)

# Magic Methods
def __iter__(self):
def __iter__(self) -> DateSpanSet:
self._iter_index = -1
return self

def __next__(self): # Python 2: def next(self)
def __next__(self) -> DateSpan: # Python 2: def next(self)
self._iter_index += 1
if self._iter_index < len(self._spans):
return self._spans[self._iter_index]
Expand All @@ -66,7 +66,7 @@ def __next__(self): # Python 2: def next(self)
def __len__(self):
return len(self._spans)

def __getitem__(self, item):
def __getitem__(self, item) -> DateSpan:
return self._spans[item]

def __str__(self):
Expand All @@ -89,6 +89,13 @@ def __eq__(self, other) -> bool:
if span != other._spans[i]:
return False
return True
if isinstance(other, DateSpan):
if len(self._spans) == 1:
return self._spans[0] == other
return False
if isinstance(other, str):
return self == DateSpanSet(other)

return False

def __ne__(self, other) -> bool:
Expand Down Expand Up @@ -209,7 +216,7 @@ def parse(cls, datespan_text: str, language: str | None = "en", parser_info: par
The DateSpanSet instance contain 0 to N DateSpan objects derived from the given text.
Examples:
>>> DateSpanSet.parse('last month') # if today would be in February 2024
>>> DateSpanSet.evaluate('last month') # if today would be in February 2024
DateSpanSet([DateSpan(datetime.datetime(2024, 1, 1, 0, 0), datetime.datetime(2024, 1, 31, 23, 59, 59, 999999))])
"""
return cls(definition=datespan_text, language=language, parser_info=parser_info)
Expand All @@ -229,7 +236,7 @@ def try_parse(cls, datespan_text: str, language: str | None = "en", parser_info:
The DateSpanSet instance contain 0 to N DateSpan objects derived from the given text or None.
Examples:
>>> DateSpanSet.parse('last month') # if today would be in February 2024
>>> DateSpanSet.evaluate('last month') # if today would be in February 2024
DateSpanSet([DateSpan(datetime.datetime(2024, 1, 1, 0, 0), datetime.datetime(2024, 1, 31, 23, 59, 59, 999999))])
"""
try:
Expand Down Expand Up @@ -405,12 +412,14 @@ def to_tuples(self) -> list[tuple[datetime, datetime]]:
""" Returns a list of tuples with start and end dates of all DateSpan objects in the DateSpanSet."""
return [(ds.start, ds.end) for ds in self._spans]

def filter(self, data: Any, return_mask:bool = False, return_index:bool=False) -> Any:
def filter(self, data: Any, column:str | None = None, return_mask:bool = False, return_index:bool=False) -> Any:
"""
Filters the given data object, e.g. a Pandas DataFrame or Series, based on the date spans of the DateSpanSet.
Arguments:
data: The data object to filter, e.g. a Pandas DataFrame or Series.
column: (optional) The name of the column in the DataFrame to filter.
If None, the data object itself will be filtered.
return_mask: (optional) If True, a boolean mask will be returned instead of the filtered data.
return_index: (optional) If True, the index of the filtered data will be returned.
Expand All @@ -433,11 +442,24 @@ def filter(self, data: Any, return_mask:bool = False, return_index:bool=False) -
class_name = f"{data.__class__.__module__}.{data.__class__.__qualname__}"
match class_name:
case "pandas.core.frame.DataFrame":
return self.to_df_lambda(data)
case "pandas.core.series.Series":
return self.to_df_lambda()(data)
if column is None:
raise ValueError("A column name must be provided to filter a Pandas DataFrame.")
mask = self.to_df_lambda()(data[column])
if return_mask:
return mask
elif return_index:
return data[mask].index.to_numpy()
return data[mask]

return data
case "pandas.core.series.Series":
mask = self.to_df_lambda()(data)
if return_mask:
return mask
elif return_index:
return data[mask].index.to_numpy()
return data[mask]
case _:
raise ValueError(f"Objects of type '{class_name}' are not yet supported for filtering.")
# endregion


Expand Down Expand Up @@ -486,7 +508,7 @@ def _parse(self, text: str, parser_info: parserinfo | None = None) -> bool:
self._message = None
self._spans.clear()
try:
ds: DateSpan | list[DateSpan] = self._parser.parse(text, parser_info)
ds: DateSpan | list[DateSpan] = self._parser.evaluate(text, parser_info)
if isinstance(ds, DateSpan):
self._spans.append(ds)
else:
Expand Down
2 changes: 1 addition & 1 deletion datespanlib/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@

# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license

26 changes: 23 additions & 3 deletions datespanlib/parser/base_parser.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,37 @@
# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license

from abc import ABC, abstractmethod
from datetime import datetime
from dateutil.parser import parserinfo

from datespanlib.date_span import DateSpan


class DateTextLanguageParser(ABC):
"""Base class for language specific date filter parsers."""
"""Base class for language specific date text parsing."""

@property
@abstractmethod
def language(self) -> str:
"""Returns the ISO 639-1 language code of the parser."""
pass

@abstractmethod
def parse(self, text: str, parser_info: None | parserinfo = None) -> DateSpan | list[DateSpan]:
def evaluate(self, text: str, parser_info: None | parserinfo = None) -> DateSpan | list[DateSpan]:
"""
Parses a date text string into a list of DateSpans, each containing a (`datetime`, `datetime`) time-span tuples.
Arguments:
text: The date text string to parse.
parser_info: (optional) A dateutil.parser.parserinfo instance to use for parsing dates contained
datespan_text. If not defined, the default parser of the dateutil library will be used.
Returns:
A list of DateSpan objects or None. If None is returned, the text could not be parsed.
"""
pass

@property
@abstractmethod
def message(self) -> str:
"""Returns information about the last failing parsing operation."""
pass
2 changes: 2 additions & 0 deletions datespanlib/parser/en/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license

from datespanlib.parser.en.parser import DateTextParser

__all__ = [
Expand Down
Loading

0 comments on commit 1547924

Please sign in to comment.