diff --git a/README.md b/README.md index ff748cb0..992bb792 100644 --- a/README.md +++ b/README.md @@ -4,40 +4,85 @@ Document datasets in Statistics Norway -## Usage example +## Usage -1. Run `from datadoc import main; main("./path/to/your/dataset")` to run Datadoc on a dataset of your choosing. -1. Complete metadata as you desire -1. Click `Lagre` to save a metadata document together with your dataset - -### If the datadoc package is not installed +![DataDoc in use](./doc/change-language-example.gif) -1. Clone this repo to your Jupyter instance (or local machine) -1. Open the `DataDoc.ipynb` Notebook and run the cell to see the example dataset +### From Jupyter -![DataDoc in use](./doc/change-language-example.gif) +1. Open or another Jupyter Lab environment +1. Run `pip install ssb-datadoc` in the terminal +1. Upload a dataset to your Jupyter server (e.g. ) +1. Run `from datadoc import main; main("./person_data_v1.parquet")` in a notebook +1. Datadoc will open in a new tab ## Contributing +### Prerequisites + +- Python >3.8 (3.10 is preferred) +- Poetry, install via `curl -sSL https://install.python-poetry.org | python3 -` + ### Dependency Management -Poetry is used for dependency management. +Poetry is used for dependency management. [Poe the Poet](https://github.com/nat-n/poethepoet) is used for running poe tasks within poetry's virtualenv. Upon cloning this project first install necessary dependencies, then run the tests to verify everything is working. + +#### Install all dependencies + +```shell +poetry install +``` + +### Add dependencies + +#### Main + +```shell +poetry add +``` + +#### Dev + +```shell +poetry add --group dev +``` + +### Run tests + +```shell +poetry run poe test +``` + +### Run project locally -To install all required dependencies in a virtual environment run `poetry install`. To add a new dependency to the project run `poetry add `. +To run the project locally: + +```shell +poetry run poe datadoc "gs://ssb-staging-dapla-felles-data-delt/datadoc/klargjorte_data/person_data_v1.parquet" +``` ### Run project locally in Jupyter To run the project locally in Jupyter run: -```bash -poetry install -poetry run jupyter lab +```shell +poetry run poe jupyter ``` -A Jupyter instance should open in your browser. Once there, open the `*.ipynb` file. Before running it, select the correct interpreter via `Kernel > Change Kernel > datadoc`. +A Jupyter instance should open in your browser. Open and run the cells in the `.ipynb` file to demo datadoc. -### Run tests +### Bump version + +```shell +poetry run poe bump-patch-version +``` + +> :warning: Run this on the default branch + +This command will: + +1. Increment version strings in files +1. Commit the changes +1. Tag the commit with the new version -1. Install dev dependencies (see [Dependency Management](#dependency-management)) -1. Run `poetry shell` to open a shell in the Virtual Environment for the project -1. Run `pytest` in the root of the project +Then just run `git push origin --tags` to push the changes and trigger the release process. diff --git a/datadoc/app.py b/datadoc/app.py index ba6ccbc4..c03d0ef2 100644 --- a/datadoc/app.py +++ b/datadoc/app.py @@ -72,20 +72,20 @@ def main(dataset_path: str = None): if dataset_path is None: # Get the supplied command line argument parser = argparse.ArgumentParser() - parser.add_argument("--dataset-path", help="Specify the path to a dataset") - args = parser.parse_args() - # Use example dataset if nothing specified - dataset = ( - args.dataset_path - or f"{os.path.dirname(__file__)}/../klargjorte_data/person_data_v1.parquet" + parser.add_argument( + "dataset_path", + help="Specify the path to a dataset", + nargs="?", + default=f"{os.path.dirname(__file__)}/../klargjorte_data/person_data_v1.parquet", ) + dataset = parser.parse_args().dataset_path else: dataset = dataset_path state.metadata = DataDocMetadata(dataset) if running_in_notebook(): - jupyterlab = "^3.4.5" ipykernel = "^6.13.0" mypy = "^0.950" @@ -53,11 +60,18 @@ pre-commit = "^2.20.0" autoflake = "^1.4" pytest-mock = "^3.8.2" ipython = "^8.4.0" +poethepoet = "^0.16.1" [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" +[tool.poe.tasks] +test = "pytest -vvvv --cache-clear --cov-report=term-missing --cov=." +bump-patch-version = "bump2version patch" +jupyter = "jupyter lab" +datadoc = "python datadoc/app.py" + [tool.isort] profile = "black" src_paths = ["datadoc"]