Merge pull request #1 from Zeutschler/dev

initial commit
Zeutschler · Sep 14, 2024 · 503dd42 · 503dd42
2 parents c9fe918 + 1a44d95
commit 503dd42
Show file tree

Hide file tree

Showing 20 changed files with 2,581 additions and 1 deletion.
diff --git a/.github/workflows/pypi-upload.yml b/.github/workflows/pypi-upload.yml
@@ -0,0 +1,71 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+# GitHub recommends pinning actions to a commit SHA.
+# To get a newer version, you will need to update the SHA.
+# You can also reference a tag or branch, but the action may change without warning.
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  release-build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+
+      - name: Build release distributions
+        run: |
+          # NOTE: put your own distribution build steps here.
+          python -m pip install build
+          python -m pip install numpy
+          python -m pip install pandas
+          if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi
+          
+          python -m build
+
+      - name: Upload distributions
+        uses: actions/upload-artifact@v4
+        with:
+          name: release-dists
+          path: dist/
+
+  pypi-publish:
+    runs-on: ubuntu-latest
+
+    needs:
+      - release-build
+
+    permissions:
+      # IMPORTANT: this permission is mandatory for trusted publishing
+      id-token: write
+
+    # Dedicated environments with protections for publishing are strongly recommended.
+    environment:
+      name: pypi
+      # OPTIONAL: uncomment and update to include your PyPI project URL in the deployment status:
+      url: https://pypi.org/p/datespanlib
+
+    steps:
+      - name: Retrieve release distributions
+        uses: actions/download-artifact@v4
+        with:
+          name: release-dists
+          path: dist/
+
+      - name: Publish release distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -0,0 +1,63 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python package
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+  workflow_dispatch:
+
+jobs:
+  build:
+
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]   # [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.11"]  # ["3.10", "3.11"]
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        python -m pip install build
+        python -m pip install numpy
+        python -m pip install pandas
+        if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi
+
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
+    - name: Install with pytest
+      run: pip install pytest pytest-cov
+
+    - name: Run tests
+      run: pytest --cov    # python -m pytest --cov
+
+    - name: Upload results to Codecov
+      uses: codecov/codecov-action@v4
+      with:
+        fail_ci_if_error: false
+        token: ${{ secrets.CODECOV_TOKEN }}
+
+    - name: Upload pytest test results
+      uses: actions/upload-artifact@v4
+      with:
+          name: pytest-results-${{ matrix.python-version }}
+          path: junit/test-results-${{ matrix.python-version }}.xml
+      # Use always() to always run this step to publish test results when there are test failures
+      if: ${{ always() }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,21 @@
+# Changelog
+
+All notable changes to the CubedPandas project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+Categories: Added, Changed, Fixed, Deprecated, Removed, Security, Fixed, Security
+
+## [0.1.01] - in progress
+
+### Added
+- Documentation and examples for the DateSpan and DateSpanSet classes.
+### Changed
+### Fixed
+
+
+## [0.1.0] - 2024-09-14
+
+### Added
+- Initial release, carved out from [CubedPandas](https://github.com/Zeutschler/cubedpandas) project.
+
diff --git a/README.md b/README.md
@@ -1,2 +1,130 @@
 # DateSpanLib
-Python library for handling data and time spans.
+![GitHub license](https://img.shields.io/github/license/Zeutschler/datespanlib?color=A1C547)
+![PyPI version](https://img.shields.io/pypi/v/datespanlib?logo=pypi&logoColor=979DA4&color=A1C547)
+![Python versions](https://img.shields.io/badge/dynamic/toml?url=https%3A%2F%2Fraw.githubusercontent.com%2FZeutschler%2Fdatespanlib%2Fmaster%2Fpyproject.toml&query=%24%5B'project'%5D%5B'requires-python'%5D&color=A1C547)
+![PyPI Downloads](https://img.shields.io/pypi/dm/datespanlib.svg?logo=pypi&logoColor=979DA4&label=PyPI%20downloads&color=A1C547)
+![GitHub last commit](https://img.shields.io/github/last-commit/Zeutschler/datespanlib?logo=github&logoColor=979DA4&color=A1C547)
+![unit tests](https://img.shields.io/github/actions/workflow/status/zeutschler/datespanlib/python-package.yml?logo=GitHub&logoColor=979DA4&label=unit%20tests&color=A1C547)
+![build](https://img.shields.io/github/actions/workflow/status/zeutschler/datespanlib/python-package.yml?logo=GitHub&logoColor=979DA4&color=A1C547)
+![documentation](https://img.shields.io/github/actions/workflow/status/zeutschler/datespanlib/static-site-upload.yml?logo=GitHub&logoColor=979DA4&label=docs&color=A1C547&link=https%3A%2F%2Fzeutschler.github.io%2Fcubedpandas%2F)
+![codecov](https://codecov.io/github/Zeutschler/datespanlib/graph/badge.svg?token=B12O0B6F10)
+
+
+-----------------
+A Python library for handling and using data and time spans. 
+
+```python
+from datespanlib import DateSpan
+
+ds = DateSpan("January to March 2024")
+print("2024-04-15" in ds + "1 month")  # returns True  
+```
+
+The DateSpanLib library is designed to be used for data analysis and data processing, 
+where date and time spans are often used to filter, aggregate or join data. But it 
+should also be valuable in any other context where date and time spans are used.
+
+It provides dependency free integrations with Pandas, Numpy, Spark and others, can 
+generate Python code artefacts, either as source text or as precompiled (lambda) 
+functions and can also generate SQL fragments for filtering in SQL WHERE clauses.
+
+#### Background
+The DataSpanLib library has been carved out from the 
+[CubedPandas](https://github.com/Zeutschler/cubedpandas) project - a library for 
+intuitive data analysis with Pandas dataframes - as it serves a broader purpose and 
+can be used independently of CubedPandas. 
+
+For internal DateTime parsing and manipulation, 
+the great [dateutil](https://github.com/dateutil/dateutil) library is used. The
+DataSpanLib library has no other dependencies (like Pandas, Numpy Spark etc.), 
+so it is lightweight and easy to install.
+
+## Installation
+The library can be installed via pip or is available as a download on [PyPi.org](https://pypi.org/datespanlib/).
+```bash
+pip install datespanlib
+```
+
+## Usage
+
+The library provides the following methods and classes:
+
+### Method parse() 
+The `parse` method converts an arbitrary string into a `DateSpanSet` object. The string can be a simple date
+like '2021-01-01' or a complex date span expression like 'Mondays to Wednesday last month'.
+
+### Class DateSpan
+`DateSpan` objects represent a single span of time, typically represented by a `start` and `end` datetime.
+The `DateSpan` object provides methods to compare, merge, split, shift, expand, intersect etc. with other
+`DateSpan` or Python datetime objects.
+
+`DateSpan` objects are 'expansive' in the sense that they resolve the widest possible time span
+for the 
+, e.g. if a `DateSpan` object is created with a start date of '2021-01-01' and an end date of '2021-01-31',  
+
+
+
+
+###  DateSpanSet - represents an ordered set of DateSpan objects
+`DateSpanSet` is an ordered and redundancy free collection of `DateSpan` objects. If e.g. two `DateSpan` 
+objects in the set would overlap or are contiguous, they are merged into one `DateSpan` object. Aside 
+set related operations the `DateSpanSet` comes with two special capabilities worth mentioning:
+
+* A build in **interpreter for arbitrary date, time and date span strings**, ranging from simple dates
+  like '2021-01-01' up to complex date span expressions like 'Mondays to Wednesday last month'.
+
+* Provides methods and can create **artefacts and callables for data processing** with Python, SQL, Pandas
+  Numpy, Spark and other compatible libraries.
+
+
+
+
+## Basic Usage
+```python
+from datespanlib import parse, DateSpanSet, DateSpan
+
+# Create a DateSpan object
+jan = DateSpan(start='2024-01-01', end='2024-01-31')
+feb = DateSpan("February 2024")
+
+jan_feb = DateSpanSet([jan, feb]) # Create a DateSpanSet object
+assert(len(jan_feb) == 1)  # returns 1, as the consecutive or overlapping DateSpan objects get merged.
+
+assert (jan_feb == parse("January, February 2024")) # Compare DateSpan objects
+
+# Set operations
+jan_feb_mar = jan_feb + "1 month"
+assert(jan_feb_mar == parse("first 3 month of 2024"))
+jan_mar = jan_feb_mar - "Januray 2024"   
+assert(len(jan_mar))  # returns 2, as the one DateSpans gets split into two DataSpans.
+assert(jan_mar.contains("2024-01-15"))  
+
+# Use DateSpanSet to filter Pandas DataFrame
+import pandas as pd
+df = pd.DataFrame({"date": pd.date_range("2024-01-01", "2024-12-31")})
+result = df[df["date"].apply(jan_mar.contains)]  # don't use this, slow
+result = jan_mar.filter(df, "date")  # fast vectorized operation
+
+# Use DateSpanSet to filter Spark DataFrame
+from pyspark.sql import SparkSession
+spark = SparkSession.builder.getOrCreate()
+df = spark.createDataFrame(pd.DataFrame({"date": pd.date_range("2024-01-01", "2024-12-31")}))
+result = jan_mar.filter(df, "date")  # fast vectorized/distributed operation
+
+# Use DateSpanSet to filter Numpy array
+import numpy as np
+arr = np.arange(np.datetime64("2024-01-01"), np.datetime64("2024-12-31"))
+result = jan_mar.filter(arr)  # fast vectorized operation
+
+# Use DateSpanSet to create an SQL WHERE statement
+sql = f"SELECT * FROM table WHERE {jan_mar.to_sql('date')}"
+```
+
+
+
+
+
+
+
+
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,48 @@
+[build-system]
+requires = ["setuptools >= 61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "datespanlib"
+description = "A library for handling date spans."
+keywords = ['python', 'datetime', 'timespan', 'pandas', 'numpy', 'spark', 'data analysis', 'sql', 'dataframe', ]
+classifiers = [
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Development Status :: 2 - Pre-Alpha",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development",
+    "Topic :: Scientific/Engineering",
+    "Operating System :: Microsoft :: Windows",
+    "Operating System :: POSIX",
+    "Operating System :: Unix",
+    "Operating System :: MacOS",
+]
+readme = "README.md"
+dynamic = ["version"]
+license = {file = "LICENSE"}
+requires-python = ">= 3.10"
+authors = [
+  {name = "Thomas Zeutschler"},
+  {email = "cubedpandas@gmail.com"},
+]
+maintainers = [
+    {name = "Thomas Zeutschler", email="cubedpandas@gmail.com"},
+]
+dependencies = [
+    "python-dateutil"
+]
+
+[project.urls]
+Homepage = "https://github.com/Zeutschler/DateSpanLib"
+Documentation = "https://github.com/Zeutschler/DateSpanLib"
+Repository = "https://github.com/Zeutschler/DateSpanLib.git"
+Issues = "https://github.com/Zeutschler/DateSpanLib/issues"
+Changelog = "https://github.com/Zeutschler/DateSpanLib/CHANGELOG.md"
+pypi = "https://pypi.org/project/datespanlib/"
+
+[tool.setuptools.dynamic]
+version = {attr = "datespanlib.__version__"}
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+python-dateutil
diff --git a/samples/using_with_pandas.py b/samples/using_with_pandas.py
@@ -0,0 +1,24 @@
+from datetime import datetime
+import pandas as pd
+from datespanlib import DateSpanSet, DateSpan, parse
+
+df = pd.DataFrame.from_dict({
+    "product": ["A", "B", "C", "A", "B", "C"],
+    "date": [datetime(2024, 6, 1), datetime(2024, 6, 2),
+             datetime(2024, 7, 1), datetime(2024, 7, 2),
+             datetime(2024, 12, 1), datetime(2023, 12, 2)],
+    "sales": [100, 150, 300, 200, 250, 350]
+})
+
+# create a DateSpanSet
+spans = DateSpanSet("June")
+print(spans)
+
+# filer the DataFrame using the DateSpanSet
+filtered_df = spans.filter(df["date"], return_mask=False)
+print(filtered_df)
+
+
+
+
+