From fcb3103fad805911babb2527d008500352932aca Mon Sep 17 00:00:00 2001 From: Thomas Zeutschler Date: Sun, 22 Sep 2024 19:09:16 +0200 Subject: [PATCH] v0.2 --- README.md | 152 ++++------------- {datespanlib => datespan}/__init__.py | 12 +- {datespanlib => datespan}/date_span.py | 127 ++++++++++---- {datespanlib => datespan}/date_span_set.py | 157 ++++++++++++++---- {datespanlib => datespan}/parser/__init__.py | 1 + .../parser/datespanparser.py | 10 +- {datespanlib => datespan}/parser/errors.py | 2 + {datespanlib => datespan}/parser/evaluator.py | 70 ++++++-- {datespanlib => datespan}/parser/lexer.py | 5 +- {datespanlib => datespan}/parser/parser.py | 40 ++++- requirements.txt | 9 +- samples/basic_usage.py | 16 ++ samples/using_with_pandas.py | 6 +- setup.py | 21 +-- tests/test_class_DateSpan.py | 12 +- tests/test_class_DateSpanParser.py | 8 +- tests/test_class_DateSpanSet.py | 21 +-- tests/test_datespan_basics.py | 7 +- tests/test_datespan_methods.py | 4 +- tests/test_datespanset.py | 11 +- tests/test_debugging.py | 9 + 21 files changed, 450 insertions(+), 250 deletions(-) rename {datespanlib => datespan}/__init__.py (76%) rename {datespanlib => datespan}/date_span.py (91%) rename {datespanlib => datespan}/date_span_set.py (81%) rename {datespanlib => datespan}/parser/__init__.py (71%) rename {datespanlib => datespan}/parser/datespanparser.py (85%) rename {datespanlib => datespan}/parser/errors.py (94%) rename {datespanlib => datespan}/parser/evaluator.py (92%) rename {datespanlib => datespan}/parser/lexer.py (98%) rename {datespanlib => datespan}/parser/parser.py (91%) create mode 100644 samples/basic_usage.py create mode 100644 tests/test_debugging.py diff --git a/README.md b/README.md index 19958b6..ed0f8e3 100644 --- a/README.md +++ b/README.md @@ -1,133 +1,53 @@ -# DateSpanLib -![GitHub license](https://img.shields.io/github/license/Zeutschler/datespanlib?color=A1C547) -![PyPI version](https://img.shields.io/pypi/v/datespanlib?logo=pypi&logoColor=979DA4&color=A1C547) -![Python versions](https://img.shields.io/badge/dynamic/toml?url=https%3A%2F%2Fraw.githubusercontent.com%2FZeutschler%2Fdatespanlib%2Fmaster%2Fpyproject.toml&query=%24%5B'project'%5D%5B'requires-python'%5D&color=A1C547) -![PyPI Downloads](https://img.shields.io/pypi/dm/datespanlib.svg?logo=pypi&logoColor=979DA4&label=PyPI%20downloads&color=A1C547) -![GitHub last commit](https://img.shields.io/github/last-commit/Zeutschler/datespanlib?logo=github&logoColor=979DA4&color=A1C547) -![unit tests](https://img.shields.io/github/actions/workflow/status/zeutschler/datespanlib/python-package.yml?logo=GitHub&logoColor=979DA4&label=unit%20tests&color=A1C547) -![build](https://img.shields.io/github/actions/workflow/status/zeutschler/datespanlib/python-package.yml?logo=GitHub&logoColor=979DA4&color=A1C547) -![documentation](https://img.shields.io/github/actions/workflow/status/zeutschler/datespanlib/static-site-upload.yml?logo=GitHub&logoColor=979DA4&label=docs&color=A1C547&link=https%3A%2F%2Fzeutschler.github.io%2Fcubedpandas%2F) -![codecov](https://codecov.io/github/Zeutschler/datespanlib/graph/badge.svg?token=B12O0B6F10) +# datespan - convenient data span parsing & handling -**UNDER CONSTRUCTION** - The DateSpanLib library is under active development and in a pre-alpha state, not -suitable for production use and even testing. The library is expected to be released in a first alpha version -in the next weeks. +![GitHub license](https://img.shields.io/github/license/Zeutschler/datespan?color=A1C547) +![PyPI version](https://img.shields.io/pypi/v/datespan?logo=pypi&logoColor=979DA4&color=A1C547) +![PyPI Downloads](https://img.shields.io/pypi/dm/datespan.svg?logo=pypi&logoColor=979DA4&label=PyPI%20downloads&color=A1C547) +![GitHub last commit](https://img.shields.io/github/last-commit/Zeutschler/datespan?logo=github&logoColor=979DA4&color=A1C547) +![unit tests](https://img.shields.io/github/actions/workflow/status/zeutschler/datespan/python-package.yml?logo=GitHub&logoColor=979DA4&label=unit%20tests&color=A1C547) +![build](https://img.shields.io/github/actions/workflow/status/zeutschler/datespan/python-package.yml?logo=GitHub&logoColor=979DA4&color=A1C547) +![codecov](https://codecov.io/github/Zeutschler/datespan/graph/badge.svg?token=B12O0B6F10) ----------------- -A Python library for handling and using data and time spans. +A Python package for convenient **data span** parsing and handling. +Aimed for data analysis and processing, useful in any context requiring date & time spans. -```python -from datespanlib import DateSpan - -ds = DateSpan("January to March 2024") -print("2024-04-15" in ds + "1 month") # returns True -``` - -The DateSpanLib library is designed to be used for data analysis and data processing, -where date and time spans are often used to filter, aggregate or join data. But it -should also be valuable in any other context where date and time spans are used. - -It provides dependency free integrations with Pandas, Numpy, Spark and others, can -generate Python code artefacts, either as source text or as precompiled (lambda) -functions and can also generate SQL fragments for filtering in SQL WHERE clauses. - -#### Background -The DataSpanLib library has been carved out from the -[CubedPandas](https://github.com/Zeutschler/cubedpandas) project - a library for -intuitive data analysis with Pandas dataframes - as it serves a broader purpose and -can be used independently of CubedPandas. - -For internal DateTime parsing and manipulation, -the great [dateutil](https://github.com/dateutil/dateutil) library is used. The -DataSpanLib library has no other dependencies (like Pandas, Numpy Spark etc.), -so it is lightweight and easy to install. - -## Installation -The library can be installed via pip or is available as a download on [PyPi.org](https://pypi.org/datespanlib/). ```bash -pip install datespanlib +pip install datespan ``` -## Usage - -The library provides the following methods and classes: - -### Method parse() -The `parse` method converts an arbitrary string into a `DateSpanSet` object. The string can be a simple date -like '2021-01-01' or a complex date span expression like 'Mondays to Wednesday last month'. - -### Class DateSpan -`DateSpan` objects represent a single span of time, typically represented by a `start` and `end` datetime. -The `DateSpan` object provides methods to compare, merge, split, shift, expand, intersect etc. with other -`DateSpan` or Python datetime objects. - -`DateSpan` objects are 'expansive' in the sense that they resolve the widest possible time span -for the -, e.g. if a `DateSpan` object is created with a start date of '2021-01-01' and an end date of '2021-01-31', - - - - -### DateSpanSet - represents an ordered set of DateSpan objects -`DateSpanSet` is an ordered and redundancy free collection of `DateSpan` objects. If e.g. two `DateSpan` -objects in the set would overlap or are contiguous, they are merged into one `DateSpan` object. Aside -set related operations the `DateSpanSet` comes with two special capabilities worth mentioning: - -* A build in **interpreter for arbitrary date, time and date span strings**, ranging from simple dates - like '2021-01-01' up to complex date span expressions like 'Mondays to Wednesday last month'. - -* Provides methods and can create **artefacts and callables for data processing** with Python, SQL, Pandas - Numpy, Spark and other compatible libraries. - - - - -## Basic Usage ```python -from datespanlib import parse, DateSpanSet, DateSpan - -# Create a DateSpan object -jan = DateSpan(start='2024-01-01', end='2024-01-31') -feb = DateSpan("February 2024") - -jan_feb = DateSpanSet([jan, feb]) # Create a DateSpanSet object -assert(len(jan_feb) == 1) # returns 1, as the consecutive or overlapping DateSpan objects get merged. - -assert (jan_feb == parse("January, February 2024")) # Compare DateSpan objects - -# Set operations -jan_feb_mar = jan_feb + "1 month" -assert(jan_feb_mar == parse("first 3 month of 2024")) -jan_mar = jan_feb_mar - "Januray 2024" -assert(len(jan_mar)) # returns 2, as the one DateSpans gets split into two DataSpans. -assert(jan_mar.contains("2024-01-15")) - -# Use DateSpanSet to filter Pandas DataFrame import pandas as pd +from datespan import parse, DateSpan df = pd.DataFrame({"date": pd.date_range("2024-01-01", "2024-12-31")}) -result = df[df["date"].apply(jan_mar.contains)] # don't use this, slow -result = jan_mar.filter(df, "date") # fast vectorized operation - -# Use DateSpanSet to filter Spark DataFrame -from pyspark.sql import SparkSession -spark = SparkSession.builder.getOrCreate() -df = spark.createDataFrame(pd.DataFrame({"date": pd.date_range("2024-01-01", "2024-12-31")})) -result = jan_mar.filter(df, "date") # fast vectorized/distributed operation -# Use DateSpanSet to filter Numpy array -import numpy as np -arr = np.arange(np.datetime64("2024-01-01"), np.datetime64("2024-12-31")) -result = jan_mar.filter(arr) # fast vectorized operation +dss = parse("April 2024 ytd") # Create a DateSpanSet object +dss.add("May") # Add a full month of the current year (e.g. 2024 in 2024) +dss.add("today") # Add the current day from 00:00:00 to 23:59:59 +dss += "previous week" # Add a full week from Monday 00:00:00 to Sunday 23:59 +dss -= "January" # Remove the full month of January 2024 -# Use DateSpanSet to create an SQL WHERE statement -sql = f"SELECT * FROM table WHERE {jan_mar.to_sql('date')}" +print(len(dss)) # returns the number of nonconsecutive DateSpans +print(dss.to_sql("date")) # returns an SQL WHERE clause fragment +print(dss.filter(df, "date")) # returns filtered DataFrame # vectorized filtering of column 'date' of a DataFrame ``` +### Classes +`DateSpan` represents a single date or time span, defined by a start and an end datetime. +Provides methods to create, compare, merge, parse, split, shift, expand & intersect +`DateSpan` objects and /or `datetime`, `date`or `time` objects. +`DateSpanSet` represents an ordered and redundancy free collection of `DateSpan` objects, +where consecutive or overlapping `DateSpan` objects get automatically merged into a single `DateSpan` +object. Required for fragmented date span expressions like `every 2nd Friday of next month`. +`DateSpanParser` provides parsing for arbitrary date, time and date span strings in english language, +ranging from simple dates like '2021-01-01' up to complex date span expressions like +'Mondays to Wednesday last month'. For internal DateTime parsing and manipulation, the +[DateUtil]() library is used. - - - - - +### Classes +The 'dataspan' package has been carved out from the +[CubedPandas](https://github.com/Zeutschler/cubedpandas) project - a library for + data analysis with Pandas dataframes - as DataSpan serves a broader purpose and +can be used independently of CubedPandas. diff --git a/datespanlib/__init__.py b/datespan/__init__.py similarity index 76% rename from datespanlib/__init__.py rename to datespan/__init__.py index 721a4c0..30e23f5 100644 --- a/datespanlib/__init__.py +++ b/datespan/__init__.py @@ -1,13 +1,13 @@ -# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license from __future__ import annotations from dateutil.parser import parserinfo -from datespanlib.date_span import DateSpan -from datespanlib.date_span_set import DateSpanSet +from datespan.date_span import DateSpan +from datespan.date_span_set import DateSpanSet __author__ = "Thomas Zeutschler" -__version__ = "0.1.8" +__version__ = "0.2.0" __license__ = "MIT" VERSION = __version__ @@ -20,7 +20,7 @@ ] -def parse(datespan_text: str, language: str | None = "en", parser_info: parserinfo | None = None) -> DateSpanSet: +def parse(datespan_text: str, parser_info: parserinfo | None = None) -> DateSpanSet: """ Creates a new DateSpanSet instance and parses the given text into a set of DateSpan objects. @@ -37,4 +37,4 @@ def parse(datespan_text: str, language: str | None = "en", parser_info: parserin >>> DateSpanSet.evaluate('last month') # if today would be in February 2024 DateSpanSet([DateSpan(datetime.datetime(2024, 1, 1, 0, 0), datetime.datetime(2024, 1, 31, 23, 59, 59, 999999))]) """ - return DateSpanSet(datespan_text, language, parser_info) + return DateSpanSet(definition=datespan_text, parser_info=parser_info) diff --git a/datespanlib/date_span.py b/datespan/date_span.py similarity index 91% rename from datespanlib/date_span.py rename to datespan/date_span.py index 31095ab..8235b1c 100644 --- a/datespanlib/date_span.py +++ b/datespan/date_span.py @@ -1,11 +1,10 @@ -# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license from __future__ import annotations from datetime import datetime, time, timedelta from dateutil.relativedelta import relativedelta from dateutil.relativedelta import MO - class DateSpan: """ Represents a time span with a start and end date. The DateSpan can be used to compare, merge, intersect, subtract @@ -14,18 +13,44 @@ class DateSpan: The DateSpan is immutable, all methods that change the DateSpan will return a new DateSpan. """ TIME_EPSILON_MICROSECONDS = 100_000 # 0.1 seconds - """The time epsilon in microseconds used for comparison of time deltas.""" - MIN_YEAR = 1700 + """The time epsilon in microseconds used for detecting overlapping or consecutive date time spans.""" + MIN_YEAR = datetime.min.year """The minimum year that can be represented by the DateSpan.""" - MAX_YEAR = 2300 + MAX_YEAR = datetime.max.year """The maximum year that can be represented by the DateSpan.""" - def __init__(self, start: datetime | None = None, end: datetime | None = None, message: str | None = None): - self._start: datetime | None = start - self._end: datetime | None = end if end is not None else start - self._start, self._end = self._swap() + def __init__(self, start = None, end = None, message: str | None = None): + """ + Initializes a new DateSpan with the given start and end date. If only one date is given, the DateSpan will + represent a single point in time. If no date is given, the DateSpan will be undefined. + + If `start` and `end` are datetime objects, the DateSpan will be initialized with these datetimes. + If `start` is larger than `end`, the dates will be automatically swapped. + + If `start` and/or `end` contains arbitrary date span text, the text will be parsed into a DateSpan. + If both `start` and `end` contain text that refer/resolve to distinct date span, then the resulting + DateSpan will start at the beginning of the first date span defined by `start` and the end at the end of the + second date span defined by `end`. + + Raises: + ValueError: If arguments of the DateSpan are invalid, the DateSpan could not be parsed or the + parsing of the DateSpan would result in more than one DateSpan. For such cases use the DateSpanSet + class to parse multipart date spans. + """ + self._arg_start = start + self._arg_end = end self._message: str | None = message + if isinstance(start, datetime | None) and isinstance(end, datetime | None): + self._start: datetime | None = start + self._end: datetime | None = end if end is not None else start + self._start, self._end = self._swap() + else: + try: + self._start, self._end = self._parse(start, end) + except ValueError as e: + raise e + @property def message(self) -> str | None: """Returns the message of the DateSpan.""" @@ -77,9 +102,7 @@ def overlaps_with(self, other: DateSpan) -> bool: """ if self.is_undefined or other.is_undefined: return False - if self._start >= other._start: - return self._start <= other._end - return self._end >= other._start + return max(self._start, other._start) <= min(self._end, other._end) def consecutive_with(self, other: DateSpan) -> bool: """ @@ -118,6 +141,17 @@ def merge(self, other: DateSpan) -> DateSpan: return DateSpan(min(self._start, other._start), max(self._end, other._end)) raise ValueError("Cannot merge DateSpans that do not overlap or are not consecutive.") + def can_merge(self, other: DateSpan) -> bool: + """ + Returns True if the DateSpan can be merged with the given DateSpan. + """ + if self.is_undefined or other.is_undefined: + return True + return self.overlaps_with(other) or self.consecutive_with(other) + + + + def intersect(self, other: DateSpan) -> DateSpan: """ Returns a new DateSpan that is the intersection of the DateSpan with the given DateSpan. @@ -170,7 +204,7 @@ def subtract(self, other: DateSpan, allow_split: bool = False) -> DateSpan | (Da return self.clone() if other._start < self._start: - # overalap at the start + # overlap at the start return DateSpan(other._end + timedelta(microseconds=1), self._end) # overlap at the end return DateSpan(self._start, other._start - timedelta(microseconds=1)) @@ -449,18 +483,6 @@ def is_full_day(self) -> bool: return (self._start == self._begin_of_day(self._start) and self._end == self._end_of_day(self._end)) - - def _swap(self) -> DateSpan: - """Swap start and end date if start is greater than end.""" - if self._start is None or self._end is None: - return self - - if self._start > self._end: - tmp = self._start - self._start = self._end - self._end = tmp - return self - def replace(self, year: int | None = None, month: int | None = None, day: int | None = None, hour: int | None = None, minute: int | None = None, second: int | None = None, microsecond: int | None = None) -> DateSpan: @@ -935,16 +957,9 @@ def __str__(self): if self.is_undefined: return "DateSpan(undefined)" - if self._start.microsecond != 0: - start = f"{self._start.strftime('%a %Y-%m-%d %H:%M:%S.%f')}" - else: - start = f"{self._start.strftime('%a %Y-%m-%d %H:%M:%S')}" - - if self._end.microsecond != 0: - end = f"{self._end.strftime('%a %Y-%m-%d %H:%M:%S.%f')})" - else: - end = f"{self._end.strftime('%a %Y-%m-%d %H:%M:%S')})" - return (f"DateSpan({start} <-> {end})") + start = f"'{self._arg_start}'" if isinstance(self._arg_start, str) else str(self._arg_start) + end = f"'{self._arg_end}'" if isinstance(self._arg_end, str) else str(self._arg_end) + return f"DateSpan({start}, {end})" # -> ('start': {self._start}, 'end': {self._end})" def __repr__(self): return self.__str__() @@ -1024,3 +1039,45 @@ def __le__(self, other): def __hash__(self): return hash((self._start, self._end)) # endregion + + # region private methods + def _swap(self) -> DateSpan: + """Swap start and end date if start is greater than end.""" + if self._start is None or self._end is None: + return self + + if self._start > self._end: + tmp = self._start + self._start = self._end + self._end = tmp + return self + + def _parse(self, start, end = None) -> (datetime, datetime): + """Parse a date span string.""" + if end is None: + expected_spans = 1 + text = start + else: + expected_spans = 2 + text = f"{start}; {end}" # merge start and end into a single date span statement + + self._message = None + try: + from datespan.parser.datespanparser import DateSpanParser # overcome circular import + date_span_parser: DateSpanParser = DateSpanParser(text) + expressions = date_span_parser.parse() # todo: inject self.parser_info + if len(expressions) != expected_spans: + raise ValueError(f"The date span expression '{text}' resolves to " + f"more than just a single date span. " + f"Use 'DateSpanSet('{text}')' to parse multi-part date spans.") + if expected_spans == 2: + start = expressions[0][0][0] + end = expressions[1][0][1] + else: + start = expressions[0][0][0] + end = expressions[0][0][1] + + return start, end + except Exception as e: + self._message = str(e) + raise ValueError(str(e)) \ No newline at end of file diff --git a/datespanlib/date_span_set.py b/datespan/date_span_set.py similarity index 81% rename from datespanlib/date_span_set.py rename to datespan/date_span_set.py index 2a968b6..e24cdf3 100644 --- a/datespanlib/date_span_set.py +++ b/datespan/date_span_set.py @@ -1,4 +1,4 @@ -# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license from __future__ import annotations from typing import Any @@ -6,8 +6,8 @@ from datetime import datetime, date, time from dateutil.parser import parserinfo -from datespanlib.parser.datespanparser import DateSpanParser -from datespanlib.date_span import DateSpan +from datespan.parser.datespanparser import DateSpanParser +from datespan.date_span import DateSpan class DateSpanSet: @@ -35,16 +35,33 @@ def __init__(self, definition: Any | None = None, parser_info: parserinfo | None self._definition = definition self._parser_info: parserinfo | None = parser_info self._iter_index = 0 + if definition is not None: expressions = [] if isinstance(definition, DateSpan | str | datetime | time | date): expressions.append(definition) + elif isinstance(definition, DateSpanSet): + self._definition = definition._definition + expressions.extend(definition._spans) elif isinstance(definition, list | tuple): + definitions = [] for item in definition: - if isinstance(item, DateSpan | str | datetime | time | date): + if isinstance(item, DateSpan): + definitions.append(str(item._arg_start)) + expressions.append(item) + elif isinstance(item, DateSpanSet): + definitions.append(str(item._definition)) + expressions.extend(item._spans) + elif isinstance(item, datetime | time | date): + definitions.append(str(item)) expressions.append(item) + elif isinstance(item, str): + dss = DateSpanSet(item) + definitions.append(str(dss._definition)) + definitions.append(dss._spans) else: raise ValueError(f"Objects of type '{type(item)}' are not supported for DateSpanSet.") + self._definition = " + ".join(definitions) try: for exp in expressions: if isinstance(exp, DateSpan): @@ -86,7 +103,7 @@ def __add__(self, other) -> DateSpanSet: return self.merge(other) def __sub__(self, other) -> DateSpanSet: - return self.intersect(other) + return self.subtract(other) def __eq__(self, other) -> bool: if isinstance(other, DateSpanSet): @@ -128,17 +145,24 @@ def __ge__(self, other) -> bool: return False def __contains__(self, item) -> bool: + test_spans = [] if isinstance(item, DateSpan): - for span in self._spans: - if span == item: - return True - return False + test_spans.append(item) elif isinstance(item, datetime): - for span in self._spans: - if span.contains(item): - return True - return False - return False + test_spans.append(DateSpan(item)) + elif isinstance(item, str): + test_spans.extend(DateSpanSet(item)._spans) + elif isinstance(item, DateSpanSet): + test_spans.extend(item._spans) + else: + return False # unsupported type + + # todo: implement more efficient algorithm, check for start and end dates + for span in self._spans: + for test_span in test_spans: + if test_span not in span: + return False + return True def __bool__(self) -> bool: return len(self._spans) > 0 @@ -150,6 +174,11 @@ def __copy__(self) -> DateSpanSet: return self.clone() # endregion + @property + def spans(self) -> list[DateSpan]: + """Returns the list of DateSpan objects in the DateSpanSet.""" + return self._spans + @property def start(self) -> datetime | None: """Returns the start datetime of the first DateSpan object in the set.""" @@ -174,11 +203,13 @@ def clone(self) -> DateSpanSet: def add(self, other:DateSpanSet | DateSpan | str): """ Adds a new DateSpan object to the DateSpanSet.""" - self.merge(other) + merged = self.merge(other) + self._spans = merged._spans + self._definition = merged._definition def remove(self, other:DateSpanSet | DateSpan | str): """ Removes a DateSpan object from the DateSpanSet.""" - self.intersect(other) + self._spans = self.intersect(other)._spans def shift(self, years: int = 0, months: int = 0, days: int = 0, hours: int = 0, minutes: int = 0, seconds: int = 0, microseconds: int = 0, weeks: int = 0) -> DateSpanSet: @@ -472,10 +503,14 @@ def merge(self, other:DateSpanSet | DateSpan | str) -> DateSpanSet: Returns: A new DateSpanSet instance containing the merged date spans. """ - raise NotImplementedError() - if isinstance(other, DateSpan | DateSpanSet | str): + if isinstance(other, DateSpan): return DateSpanSet([self, other]) - return self.clone() + if isinstance(other, DateSpanSet): + return DateSpanSet([self, other]) + if isinstance(other, str): + return DateSpanSet([self, DateSpanSet(other)]) + raise ValueError(f"Objects of type '{type(other)}' are not supported for DateSpanSet merging.") + def intersect(self, other:DateSpanSet | DateSpan | str) -> DateSpanSet: """ @@ -491,6 +526,55 @@ def intersect(self, other:DateSpanSet | DateSpan | str) -> DateSpanSet: """ raise NotImplementedError() + def subtract(self, other:DateSpanSet | DateSpan | str) -> DateSpanSet: + """ + Subtracts a DateSpanSet, DateSpan or a string representing a data span from the current DateSpanSet. + So, the resulting DateSpanSet will contain data spans that represent the current DataSpanSet minus + the date spans that are contained in the other DateSpanSet. + + If there is no overlap between the current and the other DateSpanSet, a copy of the current DateSpanSet + will be returned. + + Arguments: + other: The other DateSpanSet, DateSpan or string to subtract. + + Returns: + A new DateSpanSet instance containing reduced DateSpanSet. + """ + definitions = [str(self._definition)] + subtracts: list[DateSpan] = [] + if isinstance(other, DateSpan): + definitions.append(f"({other._arg_start}, {other._arg_end})") + subtracts.append(other) + elif isinstance(other, DateSpanSet): + definitions.append(str(other._definition)) + subtracts.extend(other._spans) + elif isinstance(other, str): + dss = DateSpanSet(other) + definitions.append(str(dss._definition)) + subtracts.extend(dss._spans) + else: + raise ValueError(f"Objects of type '{type(other)}' are not supported for DateSpanSet subtraction.") + + result = self.clone() + final = [] + for sub in subtracts: + + for i, span in enumerate(result._spans): + if span.overlaps_with(sub): + result = span.subtract(sub, allow_split=True) + if isinstance(result, DateSpan): + if not result.is_undefined: + final.append(result) + else: + final.extend(result) + else: + final.append(span) + dss = DateSpanSet(final) + dss._definition = " - ".join(definitions) + return dss + + # end region @@ -498,22 +582,30 @@ def intersect(self, other:DateSpanSet | DateSpan | str) -> DateSpanSet: # region Internal Methods def _merge_all(self): """ - Merges all overlapping DateSpan objects in the set. + Merges all overlapping DateSpan objects if applicable. """ if len(self._spans) < 2: - return - new_spans: list[DateSpan] = [] - for span in self._spans: - if not new_spans: - new_spans.append(span) + return # special case, just one span = nothing to merge + + self._spans.sort() + + current:DateSpan = self._spans[0] + stack = self._spans[1:] + stack.reverse() + merged: list[DateSpan] = [] + + while True: + next: DateSpan = stack.pop() + if current.can_merge(next): + current = current.merge(next) else: - last = new_spans[-1] - if last.overlaps_with(span): - new_spans[-1] = last.merge(span) - else: - new_spans.append(span) - new_spans.sort() - self._spans = new_spans + merged.append(current) + current = next + if not stack: + merged.append(current) + break + + self._spans = merged def _parse(self, text: str | None = None): """ @@ -527,5 +619,6 @@ def _parse(self, text: str | None = None): for expr in expressions: self._spans.extend([DateSpan(span[0], span[1]) for span in expr]) except Exception as e: + self._message = str(e) raise ValueError(str(e)) # endregion diff --git a/datespanlib/parser/__init__.py b/datespan/parser/__init__.py similarity index 71% rename from datespanlib/parser/__init__.py rename to datespan/parser/__init__.py index c6ef507..42ae13f 100644 --- a/datespanlib/parser/__init__.py +++ b/datespan/parser/__init__.py @@ -1,3 +1,4 @@ +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license MIN_YEAR = 1700 """The minimum year that can be represented by the DateSpan.""" diff --git a/datespanlib/parser/datespanparser.py b/datespan/parser/datespanparser.py similarity index 85% rename from datespanlib/parser/datespanparser.py rename to datespan/parser/datespanparser.py index 04a8c9f..fe70027 100644 --- a/datespanlib/parser/datespanparser.py +++ b/datespan/parser/datespanparser.py @@ -1,9 +1,9 @@ -from datespanlib.parser.errors import ParsingError, EvaluationError -from datespanlib.parser.evaluator import Evaluator -from datespanlib.parser.lexer import Lexer -from datespanlib.parser.parser import Parser - +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license +from datespan.parser.errors import ParsingError, EvaluationError +from datespan.parser.evaluator import Evaluator +from datespan.parser.lexer import Lexer +from datespan.parser.parser import Parser class DateSpanParser: """ diff --git a/datespanlib/parser/errors.py b/datespan/parser/errors.py similarity index 94% rename from datespanlib/parser/errors.py rename to datespan/parser/errors.py index fd71e81..83f469f 100644 --- a/datespanlib/parser/errors.py +++ b/datespan/parser/errors.py @@ -1,3 +1,5 @@ +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license + class ParsingError(Exception): """ Exception raised when a parsing error occurs, including position and token information. diff --git a/datespanlib/parser/evaluator.py b/datespan/parser/evaluator.py similarity index 92% rename from datespanlib/parser/evaluator.py rename to datespan/parser/evaluator.py index 7d48b19..4654acd 100644 --- a/datespanlib/parser/evaluator.py +++ b/datespan/parser/evaluator.py @@ -1,14 +1,16 @@ +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license + import re from datetime import datetime, time, timedelta import dateutil.parser from dateutil.relativedelta import relativedelta -from datespanlib.parser import MIN_YEAR, MAX_YEAR -from datespanlib.parser.errors import EvaluationError, ParsingError -from datespanlib.parser.lexer import Token, TokenType, Lexer -from datespanlib.parser.parser import Parser -from datespanlib.date_span import DateSpan +from datespan.parser import MIN_YEAR, MAX_YEAR +from datespan.parser.errors import EvaluationError, ParsingError +from datespan.parser.lexer import Token, TokenType, Lexer +from datespan.parser.parser import Parser +from datespan.date_span import DateSpan class Evaluator: @@ -59,11 +61,8 @@ def evaluate_node(self, node): elif node_type == 'range': return self.evaluate_range(node.value['start_tokens'], node.value['end_tokens']) - # elif node_type == 'since': - # return self.evaluate_since(node.value['tokens']) elif node_type == 'half_bound': return self.evaluate_half_bound(node.value['tokens'], node.value['value']) - elif node_type == 'iterative': return self.evaluate_iterative(node.value['tokens'], node.value['period_tokens']) else: @@ -376,7 +375,7 @@ def evaluate_relative(self, tokens): return self.calculate_this(unit) return [] - def evaluate_special(self, value): + def evaluate_special(self, value, date_spans: list = None): """ Evaluates a special date expression and returns the corresponding date span. """ @@ -388,15 +387,41 @@ def evaluate_special(self, value): return DateSpan.tomorrow().to_tuple_list() elif value == 'now': return DateSpan.now().to_tuple_list() - elif value == 'ltm': + + elif value == 'ltm': # last 12 month + if date_spans: + date_spans.sort() + base = date_spans[-1][1] # latest end date + span = DateSpan(base).shift_start(years=-1) + return span.to_tuple_list() return DateSpan().ltm.to_tuple_list() elif value == 'ytd': + if date_spans: + date_spans.sort() + base = date_spans[-1][1] # latest end date + span = DateSpan(DateSpan(base).full_year.start, base) + return span.to_tuple_list() return DateSpan().ytd.to_tuple_list() elif value == 'qtd': + if date_spans: + date_spans.sort() + base = date_spans[-1][1] # latest end date + span = DateSpan(DateSpan(base).full_quarter.start, base) + return span.to_tuple_list() return DateSpan().qtd.to_tuple_list() elif value == 'mtd': + if date_spans: + date_spans.sort() + base = date_spans[-1][1] # latest end date + span = DateSpan(DateSpan(base).full_month.start, base) + return span.to_tuple_list() return DateSpan().mtd.to_tuple_list() elif value == 'wtd': + if date_spans: + date_spans.sort() + base = date_spans[-1][1] # latest end date + span = DateSpan(DateSpan(base).full_week.start, base) + return span.to_tuple_list() return DateSpan().wtd.to_tuple_list() # catch the following single words as specials @@ -454,7 +479,19 @@ def evaluate_triplet(self, triplet:str): elif relative == 'n': return self.calculate_future(number, unit) - + def _extract_special_token(self, tokens) -> (list[Token], Token): + """ + Extracts a special token from the list of tokens if it exists. + """ + if tokens[-1].type == TokenType.SPECIAL: + special_token = tokens[-1] + tokens = tokens[:-1] + return tokens, special_token + elif tokens[0].type == TokenType.SPECIAL: + special_token = tokens[0] + tokens = tokens[1:] + return tokens, special_token + return tokens, None def evaluate_months(self, tokens): """ @@ -463,6 +500,13 @@ def evaluate_months(self, tokens): months = [] year = self.today.year # Default to current year idx = 0 + if not tokens: + return [] + + # check if the last token is a special like 'ytd' + tokens, special_token = self._extract_special_token(tokens) + + # Check if the last token is a number (year) if tokens and tokens[-1].type == TokenType.NUMBER: year = tokens[-1].value @@ -482,6 +526,10 @@ def evaluate_months(self, tokens): start = datetime.combine(from_date.date(), time.min) end = datetime.combine(to_date.date(), time.max) date_spans.append((start, end)) + + if special_token is not None: + date_spans = self.evaluate_special(special_token.value, date_spans) + return date_spans def evaluate_days(self, tokens): diff --git a/datespanlib/parser/lexer.py b/datespan/parser/lexer.py similarity index 98% rename from datespanlib/parser/lexer.py rename to datespan/parser/lexer.py index db290fa..a921883 100644 --- a/datespanlib/parser/lexer.py +++ b/datespan/parser/lexer.py @@ -1,7 +1,9 @@ +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license + import re from dateutil import parser as dateutil_parser -from datespanlib.parser.errors import ParsingError +from datespan.parser.errors import ParsingError class Lexer: @@ -392,6 +394,7 @@ class TokenType: TRIPLET = 'ROLLING' SEMICOLON = 'SEMICOLON' EOF = 'EOF' + START = 'START' UNKNOWN = 'UNKNOWN' # For any unrecognized tokens diff --git a/datespanlib/parser/parser.py b/datespan/parser/parser.py similarity index 91% rename from datespanlib/parser/parser.py rename to datespan/parser/parser.py index c7dac48..d28b392 100644 --- a/datespanlib/parser/parser.py +++ b/datespan/parser/parser.py @@ -1,7 +1,7 @@ -from anyio.lowlevel import current_token +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license -from datespanlib.parser.errors import ParsingError -from datespanlib.parser.lexer import Token, TokenType, Lexer +from datespan.parser.errors import ParsingError +from datespan.parser.lexer import Token, TokenType, Lexer class ASTNode: @@ -18,6 +18,8 @@ class DateSpanNode(ASTNode): def __init__(self, value): self.value = value # Dictionary containing details about the date span + def __str__(self): + return f"DateSpanNode({self.value})" class Parser: """ @@ -29,6 +31,7 @@ def __init__(self, tokens, text = None): self.text = text self.pos = 0 # Current position in the token list self.current_token = self.tokens[self.pos] + self.statements = [] if text is None else text.split(';') self.ast = None # Store the abstract syntax tree def __str__(self): @@ -36,6 +39,18 @@ def __str__(self): def __repr__(self): return f"Parser('{self.text}')" + @property + def previous_token(self): + """ + Returns the previous token in the list. + """ + return self.tokens[self.pos - 1] if self.pos > 0 else Token(TokenType.START, line=0, column=0) + + @property + def next_token(self): + """ Returns the next token in the list. """ + return self.tokens[self.pos + 1] if self.pos < len(self.tokens) - 1 else Token(TokenType.EOF, line=0, column=0) + def eat(self, token_type): """ @@ -74,6 +89,14 @@ def parse(self): self.eat(TokenType.SEMICOLON) else: break # No more statements + + # add remaining tokens + if self.pos < len(self.tokens) - 1: + node = statements[-1][-1].value + if 'tokens' not in node: + node['tokens'] = [] + node["tokens"].extend(self.tokens[self.pos:-1]) + self.ast = statements return statements except Exception as e: @@ -92,6 +115,9 @@ def parse_statement(self): (self.current_token.type == TokenType.IDENTIFIER and self.current_token.value == 'and'): self.eat(self.current_token.type) # Consume ',' or 'and' else: + if self.current_token.type == TokenType.TIME: + node = self.date_span() + date_spans.append(node) break # End of date spans in this statement return date_spans @@ -104,8 +130,6 @@ def date_span(self): return self.iterative_date_span() elif self.current_token.value in ['last', 'next', 'past', 'previous', 'rolling', 'this']: return self.relative_date_span() - # elif self.current_token.value == 'since': - # return self.since_date_span() elif self.current_token.value in ['after', 'before', 'since', 'until']: return self.half_bound_date_span() elif self.current_token.value in ['between', 'from']: @@ -322,6 +346,12 @@ def month_date_span(self): if self.current_token.type == TokenType.NUMBER: tokens.append(self.current_token) # Append the year self.eat(TokenType.NUMBER) + + # optional eat trailing time tokens + if self.current_token.type == TokenType.TIME_UNIT: + tokens.append(self.current_token) + self.eat(TokenType.TIME_UNIT) + return DateSpanNode({'type': 'months', 'tokens': tokens}) def day_date_span(self): diff --git a/requirements.txt b/requirements.txt index 4ea05ed..e274238 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,8 @@ -python-dateutil \ No newline at end of file +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license + +# required for the datespan package +python-dateutil + +# for testing purposes +pandas +numpy diff --git a/samples/basic_usage.py b/samples/basic_usage.py new file mode 100644 index 0000000..7d73c44 --- /dev/null +++ b/samples/basic_usage.py @@ -0,0 +1,16 @@ +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license + +import pandas as pd +from datespan import parse, DateSpan +df = pd.DataFrame({"date": pd.date_range("2024-01-01", "2024-12-31")}) + +dss = parse("April 2024 ytd")# Create a DateSpanSet object +dss.add("May") # Add a full month of the current year (e.g. 2024 in 2024) +dss.add("today") # Add the current day from 00:00:00 to 23:59:59 +dss += "previous week" # Add a full week from Monday 00:00:00 to Sunday 23:59 +dss -= "January" # Remove the full month of January 2024 + +print(len(dss)) # returns the number of nonconsecutive DateSpans +print(dss.to_sql("date")) # returns an SQL WHERE clause fragment +df = dss.filter(df, "date") # vectorized filtering of column 'date' of a DataFrame# ) +print(df) # returns filtered DataFrame \ No newline at end of file diff --git a/samples/using_with_pandas.py b/samples/using_with_pandas.py index 582cb9a..df67617 100644 --- a/samples/using_with_pandas.py +++ b/samples/using_with_pandas.py @@ -1,10 +1,8 @@ -# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license -# ---------------------------------------------------------------------------- -# A simple example using DateSpanLib with pandas +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license from datetime import datetime import pandas as pd -from datespanlib import DateSpanSet, DateSpan, parse +from datespan import DateSpanSet, DateSpan, parse df = pd.DataFrame.from_dict({ "product": ["A", "B", "C", "A", "B", "C"], diff --git a/setup.py b/setup.py index b24585f..441364d 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,10 @@ # -*- coding: utf-8 -*- +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license # setup.py for cubedpandas from setuptools import setup from setuptools import find_packages -from datespanlib import VERSION as DATESPANLIB_VERSION +from datespan import VERSION as DATESPANLIB_VERSION # ...to run the build and deploy process to pypi.org manually: @@ -16,12 +17,12 @@ # https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ VERSION = DATESPANLIB_VERSION -DESCRIPTION = "DateSpanLib - A date and time span parsing and utilization library for data analysis and processing" +DESCRIPTION = "DateSpan - A date and time span parsing and utilization library for data analysis and processing" LONG_DESCRIPTION = """ A Python library for handling and using data and time spans. ```python -from datespanlib import DateSpan +from datespan import DateSpan ds = DateSpan("January to March 2024") print("2024-04-15" in ds + "1 month") # returns True @@ -38,7 +39,7 @@ setup( - name="DataSpanLib", + name="dataspan", version=VERSION, description=DESCRIPTION, long_description=LONG_DESCRIPTION, @@ -67,7 +68,7 @@ author="Thomas Zeutschler", keywords=['python', 'datetime', 'timespan', 'pandas', 'numpy', 'spark', 'data analysis', 'sql', 'dataframe', 'data'], author_email="cubedpandas@gmail.com", - url="https://github.com/Zeutschler/datespanlib", + url="https://github.com/Zeutschler/datespan", license='MIT', platforms=['any'], zip_safe=True, @@ -75,11 +76,11 @@ install_requires=[ 'python-dateutil', ], - test_suite="datespanlib.tests", - packages=['datespanlib', 'datespanlib.parser', 'tests'], + test_suite="datespan.tests", + packages=['datespan', 'datespan.parser', 'tests'], project_urls={ - 'Homepage': 'https://github.com/Zeutschler/datespanlib', - 'Documentation': 'https://github.com/Zeutschler/datespanlib', - 'GitHub': 'https://github.com/Zeutschler/datespanlib', + 'Homepage': 'https://github.com/Zeutschler/datespan', + 'Documentation': 'https://github.com/Zeutschler/datespan', + 'GitHub': 'https://github.com/Zeutschler/datespan', }, ) \ No newline at end of file diff --git a/tests/test_class_DateSpan.py b/tests/test_class_DateSpan.py index 6e6dd8e..4abc5a9 100644 --- a/tests/test_class_DateSpan.py +++ b/tests/test_class_DateSpan.py @@ -1,6 +1,8 @@ +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license + import unittest from datetime import datetime, timedelta, time -from datespanlib.date_span import DateSpan +from datespan.date_span import DateSpan class TestDateSpan(unittest.TestCase): @@ -204,5 +206,13 @@ def test_le(self): def test_hash(self): self.assertEqual(hash(self.jan), hash((self.jan.start, self.jan.end))) + def test_parse_start_end(self): + result = DateSpan('2023-01-01', '2023-01-31') + self.assertEqual(result, self.jan) + + def test_parse_start_end_text(self): + result = DateSpan('January 2023', 'March 2023') + self.assertEqual(result, self.jan_feb_mar) + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/tests/test_class_DateSpanParser.py b/tests/test_class_DateSpanParser.py index ebb9a55..589c024 100644 --- a/tests/test_class_DateSpanParser.py +++ b/tests/test_class_DateSpanParser.py @@ -1,11 +1,13 @@ +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license + import unittest from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta import random -from datespanlib.parser.datespanparser import DateSpanParser -from datespanlib.parser.errors import ParsingError, EvaluationError -from datespanlib import DateSpan +from datespan.parser.datespanparser import DateSpanParser +from datespan.parser.errors import ParsingError, EvaluationError +from datespan.date_span import DateSpan diff --git a/tests/test_class_DateSpanSet.py b/tests/test_class_DateSpanSet.py index 7bb8a90..111cfe3 100644 --- a/tests/test_class_DateSpanSet.py +++ b/tests/test_class_DateSpanSet.py @@ -1,8 +1,9 @@ +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license import unittest from datetime import datetime -from datespanlib.date_span import DateSpan -from datespanlib.date_span_set import DateSpanSet +from datespan.date_span import DateSpan +from datespan.date_span_set import DateSpanSet class TestDateSpanSet(unittest.TestCase): @@ -20,19 +21,19 @@ def test_init(self): def test_iter(self): spans = list(self.jan_feb) - self.assertEqual(spans, [self.jan, self.feb]) + self.assertEqual(len(spans), 1) def test_len(self): - self.assertEqual(len(self.jan_feb), 2) + self.assertEqual(len(self.jan_feb), 1) def test_getitem(self): - self.assertEqual(self.jan_feb[0], self.jan) + self.assertEqual(self.jan_feb[0], self.jan_feb._spans[0]) def test_str(self): self.assertEqual(str(self.jan_feb), repr(self.jan_feb)) def test_repr(self): - self.assertEqual(repr(self.jan_feb), f"DateSpanSet('[{self.jan}, {self.feb}]') := [{self.jan}, {self.feb}]") + self.assertTrue(repr(self.jan_feb).startswith("DateSpanSet(")) @unittest.skip("Not implemented") def test_add(self): @@ -72,7 +73,7 @@ def test_bool(self): self.assertFalse(self.empty_set) def test_hash(self): - self.assertEqual(hash(self.jan_feb), hash(tuple([self.jan, self.feb]))) + self.assertEqual(hash(self.jan_feb), hash(DateSpanSet("Jan, Feb 2023"))) def test_copy(self): clone = self.jan_feb.__copy__() @@ -131,7 +132,7 @@ def test_to_df_lambda(self): def test_to_tuples(self): tuples = self.jan_feb.to_tuples() - self.assertEqual(tuples, [(self.jan.start, self.jan.end), (self.feb.start, self.feb.end)]) + self.assertEqual(tuples, [(self.jan.start, self.feb.end)]) def test_filter(self): import pandas as pd @@ -142,8 +143,8 @@ def test_filter(self): self.assertEqual(len(filtered), 2) def test_merge(self): - with self.assertRaises(NotImplementedError): - self.jan_feb.merge(self.mar) + test = self.jan_feb.merge(self.mar) + self.assertEqual(test, self.jan_feb_mar) def test_intersect(self): with self.assertRaises(NotImplementedError): diff --git a/tests/test_datespan_basics.py b/tests/test_datespan_basics.py index 46ddd47..3236c06 100644 --- a/tests/test_datespan_basics.py +++ b/tests/test_datespan_basics.py @@ -1,10 +1,9 @@ -# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license + import sys from unittest import TestCase from datetime import date, datetime, time, timedelta -from dateutil.parser import parse - -from datespanlib import DateSpan, DateSpanSet, parse +from datespan.date_span import DateSpan class TestDateSpan(TestCase): diff --git a/tests/test_datespan_methods.py b/tests/test_datespan_methods.py index 822ecc5..a39a10f 100644 --- a/tests/test_datespan_methods.py +++ b/tests/test_datespan_methods.py @@ -1,6 +1,8 @@ +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license + import unittest from datetime import datetime, timedelta, time -from datespanlib.date_span import DateSpan +from datespan.date_span import DateSpan class TestDateSpan(unittest.TestCase): diff --git a/tests/test_datespanset.py b/tests/test_datespanset.py index 792579e..aa94207 100644 --- a/tests/test_datespanset.py +++ b/tests/test_datespanset.py @@ -1,4 +1,4 @@ -# DateSpanLib - Copyright (c)2024, Thomas Zeutschler, MIT license +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license import sys from unittest import TestCase @@ -6,11 +6,11 @@ import numpy as np import pandas as pd -from datespanlib.date_span_set import DateSpanSet -from datespanlib.date_span import DateSpan +from datespan.date_span_set import DateSpanSet +from datespan.date_span import DateSpan -# from datespanlib.parser_old.en.tokenizer import Tokenizer +# from datespan.parser_old.en.tokenizer import Tokenizer class TestDateTextParser(TestCase): @@ -108,6 +108,7 @@ def test_datespans(self): def test_advanced(self): samples = [ + "from 2024-09-01 to 2024-09-10", "ly", "py", "ny", @@ -155,7 +156,7 @@ def test_advanced(self): "from 2024-09-10 14:00:00.123 to 2024-09-10 15:00:00.789", "10/09/2024 14:00:00.123456", - "from 2024-09-01 to 2024-09-10", + "between 09/01/2024 and 09/10/2024", "from 09.01.2024 to 09.10.2024", "between 2024-09-01 and 2024-09-10", diff --git a/tests/test_debugging.py b/tests/test_debugging.py new file mode 100644 index 0000000..fa11490 --- /dev/null +++ b/tests/test_debugging.py @@ -0,0 +1,9 @@ +# datespan - Copyright (c)2024, Thomas Zeutschler, MIT license + +from unittest import TestCase +from datespan.date_span import DateSpan + +class TestDateTextParser(TestCase): + def test_datespan_parsing(self): + result = DateSpan('2023-01-01', '2023-01-31') + self.assertEqual(DateSpan('January 2023'), result) \ No newline at end of file