-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from Zeutschler/dev
initial commit
- Loading branch information
Showing
20 changed files
with
2,581 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# This workflow uses actions that are not certified by GitHub. | ||
# They are provided by a third-party and are governed by | ||
# separate terms of service, privacy policy, and support | ||
# documentation. | ||
|
||
# GitHub recommends pinning actions to a commit SHA. | ||
# To get a newer version, you will need to update the SHA. | ||
# You can also reference a tag or branch, but the action may change without warning. | ||
|
||
name: Upload Python Package | ||
|
||
on: | ||
release: | ||
types: [published] | ||
workflow_dispatch: | ||
|
||
permissions: | ||
contents: read | ||
|
||
jobs: | ||
release-build: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- uses: actions/setup-python@v5 | ||
with: | ||
python-version: "3.x" | ||
|
||
- name: Build release distributions | ||
run: | | ||
# NOTE: put your own distribution build steps here. | ||
python -m pip install build | ||
python -m pip install numpy | ||
python -m pip install pandas | ||
if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi | ||
python -m build | ||
- name: Upload distributions | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: release-dists | ||
path: dist/ | ||
|
||
pypi-publish: | ||
runs-on: ubuntu-latest | ||
|
||
needs: | ||
- release-build | ||
|
||
permissions: | ||
# IMPORTANT: this permission is mandatory for trusted publishing | ||
id-token: write | ||
|
||
# Dedicated environments with protections for publishing are strongly recommended. | ||
environment: | ||
name: pypi | ||
# OPTIONAL: uncomment and update to include your PyPI project URL in the deployment status: | ||
url: https://pypi.org/p/datespanlib | ||
|
||
steps: | ||
- name: Retrieve release distributions | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: release-dists | ||
path: dist/ | ||
|
||
- name: Publish release distributions to PyPI | ||
uses: pypa/gh-action-pypi-publish@release/v1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions | ||
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python | ||
|
||
name: Python package | ||
|
||
on: | ||
push: | ||
branches: [ "main" ] | ||
pull_request: | ||
branches: [ "main" ] | ||
workflow_dispatch: | ||
|
||
jobs: | ||
build: | ||
|
||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
os: [ubuntu-latest] # [ubuntu-latest, macos-latest, windows-latest] | ||
python-version: ["3.11"] # ["3.10", "3.11"] | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Set up Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v3 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
python -m pip install flake8 pytest | ||
python -m pip install build | ||
python -m pip install numpy | ||
python -m pip install pandas | ||
if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi | ||
- name: Lint with flake8 | ||
run: | | ||
# stop the build if there are Python syntax errors or undefined names | ||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | ||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | ||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | ||
- name: Install with pytest | ||
run: pip install pytest pytest-cov | ||
|
||
- name: Run tests | ||
run: pytest --cov # python -m pytest --cov | ||
|
||
- name: Upload results to Codecov | ||
uses: codecov/codecov-action@v4 | ||
with: | ||
fail_ci_if_error: false | ||
token: ${{ secrets.CODECOV_TOKEN }} | ||
|
||
- name: Upload pytest test results | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: pytest-results-${{ matrix.python-version }} | ||
path: junit/test-results-${{ matrix.python-version }}.xml | ||
# Use always() to always run this step to publish test results when there are test failures | ||
if: ${{ always() }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Changelog | ||
|
||
All notable changes to the CubedPandas project will be documented in this file. | ||
|
||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), | ||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). | ||
Categories: Added, Changed, Fixed, Deprecated, Removed, Security, Fixed, Security | ||
|
||
## [0.1.01] - in progress | ||
|
||
### Added | ||
- Documentation and examples for the DateSpan and DateSpanSet classes. | ||
### Changed | ||
### Fixed | ||
|
||
|
||
## [0.1.0] - 2024-09-14 | ||
|
||
### Added | ||
- Initial release, carved out from [CubedPandas](https://github.com/Zeutschler/cubedpandas) project. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,130 @@ | ||
# DateSpanLib | ||
Python library for handling data and time spans. | ||
 | ||
 | ||
 | ||
 | ||
 | ||
 | ||
 | ||
 | ||
 | ||
|
||
|
||
----------------- | ||
A Python library for handling and using data and time spans. | ||
|
||
```python | ||
from datespanlib import DateSpan | ||
|
||
ds = DateSpan("January to March 2024") | ||
print("2024-04-15" in ds + "1 month") # returns True | ||
``` | ||
|
||
The DateSpanLib library is designed to be used for data analysis and data processing, | ||
where date and time spans are often used to filter, aggregate or join data. But it | ||
should also be valuable in any other context where date and time spans are used. | ||
|
||
It provides dependency free integrations with Pandas, Numpy, Spark and others, can | ||
generate Python code artefacts, either as source text or as precompiled (lambda) | ||
functions and can also generate SQL fragments for filtering in SQL WHERE clauses. | ||
|
||
#### Background | ||
The DataSpanLib library has been carved out from the | ||
[CubedPandas](https://github.com/Zeutschler/cubedpandas) project - a library for | ||
intuitive data analysis with Pandas dataframes - as it serves a broader purpose and | ||
can be used independently of CubedPandas. | ||
|
||
For internal DateTime parsing and manipulation, | ||
the great [dateutil](https://github.com/dateutil/dateutil) library is used. The | ||
DataSpanLib library has no other dependencies (like Pandas, Numpy Spark etc.), | ||
so it is lightweight and easy to install. | ||
|
||
## Installation | ||
The library can be installed via pip or is available as a download on [PyPi.org](https://pypi.org/datespanlib/). | ||
```bash | ||
pip install datespanlib | ||
``` | ||
|
||
## Usage | ||
|
||
The library provides the following methods and classes: | ||
|
||
### Method parse() | ||
The `parse` method converts an arbitrary string into a `DateSpanSet` object. The string can be a simple date | ||
like '2021-01-01' or a complex date span expression like 'Mondays to Wednesday last month'. | ||
|
||
### Class DateSpan | ||
`DateSpan` objects represent a single span of time, typically represented by a `start` and `end` datetime. | ||
The `DateSpan` object provides methods to compare, merge, split, shift, expand, intersect etc. with other | ||
`DateSpan` or Python datetime objects. | ||
|
||
`DateSpan` objects are 'expansive' in the sense that they resolve the widest possible time span | ||
for the | ||
, e.g. if a `DateSpan` object is created with a start date of '2021-01-01' and an end date of '2021-01-31', | ||
|
||
|
||
|
||
|
||
### DateSpanSet - represents an ordered set of DateSpan objects | ||
`DateSpanSet` is an ordered and redundancy free collection of `DateSpan` objects. If e.g. two `DateSpan` | ||
objects in the set would overlap or are contiguous, they are merged into one `DateSpan` object. Aside | ||
set related operations the `DateSpanSet` comes with two special capabilities worth mentioning: | ||
|
||
* A build in **interpreter for arbitrary date, time and date span strings**, ranging from simple dates | ||
like '2021-01-01' up to complex date span expressions like 'Mondays to Wednesday last month'. | ||
|
||
* Provides methods and can create **artefacts and callables for data processing** with Python, SQL, Pandas | ||
Numpy, Spark and other compatible libraries. | ||
|
||
|
||
|
||
|
||
## Basic Usage | ||
```python | ||
from datespanlib import parse, DateSpanSet, DateSpan | ||
|
||
# Create a DateSpan object | ||
jan = DateSpan(start='2024-01-01', end='2024-01-31') | ||
feb = DateSpan("February 2024") | ||
|
||
jan_feb = DateSpanSet([jan, feb]) # Create a DateSpanSet object | ||
assert(len(jan_feb) == 1) # returns 1, as the consecutive or overlapping DateSpan objects get merged. | ||
|
||
assert (jan_feb == parse("January, February 2024")) # Compare DateSpan objects | ||
|
||
# Set operations | ||
jan_feb_mar = jan_feb + "1 month" | ||
assert(jan_feb_mar == parse("first 3 month of 2024")) | ||
jan_mar = jan_feb_mar - "Januray 2024" | ||
assert(len(jan_mar)) # returns 2, as the one DateSpans gets split into two DataSpans. | ||
assert(jan_mar.contains("2024-01-15")) | ||
|
||
# Use DateSpanSet to filter Pandas DataFrame | ||
import pandas as pd | ||
df = pd.DataFrame({"date": pd.date_range("2024-01-01", "2024-12-31")}) | ||
result = df[df["date"].apply(jan_mar.contains)] # don't use this, slow | ||
result = jan_mar.filter(df, "date") # fast vectorized operation | ||
|
||
# Use DateSpanSet to filter Spark DataFrame | ||
from pyspark.sql import SparkSession | ||
spark = SparkSession.builder.getOrCreate() | ||
df = spark.createDataFrame(pd.DataFrame({"date": pd.date_range("2024-01-01", "2024-12-31")})) | ||
result = jan_mar.filter(df, "date") # fast vectorized/distributed operation | ||
|
||
# Use DateSpanSet to filter Numpy array | ||
import numpy as np | ||
arr = np.arange(np.datetime64("2024-01-01"), np.datetime64("2024-12-31")) | ||
result = jan_mar.filter(arr) # fast vectorized operation | ||
|
||
# Use DateSpanSet to create an SQL WHERE statement | ||
sql = f"SELECT * FROM table WHERE {jan_mar.to_sql('date')}" | ||
``` | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
[build-system] | ||
requires = ["setuptools >= 61.0"] | ||
build-backend = "setuptools.build_meta" | ||
|
||
[project] | ||
name = "datespanlib" | ||
description = "A library for handling date spans." | ||
keywords = ['python', 'datetime', 'timespan', 'pandas', 'numpy', 'spark', 'data analysis', 'sql', 'dataframe', ] | ||
classifiers = [ | ||
"Intended Audience :: Science/Research", | ||
"Intended Audience :: Developers", | ||
"License :: OSI Approved :: MIT License", | ||
"Development Status :: 2 - Pre-Alpha", | ||
"Programming Language :: Python :: 3.10", | ||
"Programming Language :: Python :: 3.11", | ||
"Programming Language :: Python :: 3.12", | ||
"Topic :: Software Development", | ||
"Topic :: Scientific/Engineering", | ||
"Operating System :: Microsoft :: Windows", | ||
"Operating System :: POSIX", | ||
"Operating System :: Unix", | ||
"Operating System :: MacOS", | ||
] | ||
readme = "README.md" | ||
dynamic = ["version"] | ||
license = {file = "LICENSE"} | ||
requires-python = ">= 3.10" | ||
authors = [ | ||
{name = "Thomas Zeutschler"}, | ||
{email = "cubedpandas@gmail.com"}, | ||
] | ||
maintainers = [ | ||
{name = "Thomas Zeutschler", email="cubedpandas@gmail.com"}, | ||
] | ||
dependencies = [ | ||
"python-dateutil" | ||
] | ||
|
||
[project.urls] | ||
Homepage = "https://github.com/Zeutschler/DateSpanLib" | ||
Documentation = "https://github.com/Zeutschler/DateSpanLib" | ||
Repository = "https://github.com/Zeutschler/DateSpanLib.git" | ||
Issues = "https://github.com/Zeutschler/DateSpanLib/issues" | ||
Changelog = "https://github.com/Zeutschler/DateSpanLib/CHANGELOG.md" | ||
pypi = "https://pypi.org/project/datespanlib/" | ||
|
||
[tool.setuptools.dynamic] | ||
version = {attr = "datespanlib.__version__"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
python-dateutil |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from datetime import datetime | ||
import pandas as pd | ||
from datespanlib import DateSpanSet, DateSpan, parse | ||
|
||
df = pd.DataFrame.from_dict({ | ||
"product": ["A", "B", "C", "A", "B", "C"], | ||
"date": [datetime(2024, 6, 1), datetime(2024, 6, 2), | ||
datetime(2024, 7, 1), datetime(2024, 7, 2), | ||
datetime(2024, 12, 1), datetime(2023, 12, 2)], | ||
"sales": [100, 150, 300, 200, 250, 350] | ||
}) | ||
|
||
# create a DateSpanSet | ||
spans = DateSpanSet("June") | ||
print(spans) | ||
|
||
# filer the DataFrame using the DateSpanSet | ||
filtered_df = spans.filter(df["date"], return_mask=False) | ||
print(filtered_df) | ||
|
||
|
||
|
||
|
||
|
Oops, something went wrong.