Skip to content

Commit

Permalink
build: Use uv over Poetry
Browse files Browse the repository at this point in the history
  • Loading branch information
jesse-c committed Oct 18, 2024
1 parent 19b05ca commit 00e84fc
Show file tree
Hide file tree
Showing 7 changed files with 4,488 additions and 142 deletions.
66 changes: 44 additions & 22 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,57 +11,79 @@ permissions:
contents: write

jobs:
test:
lint:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v4

- name: Install poetry
shell: bash
run: pipx install poetry==1.8.2
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "uv.lock"

- name: Install python or load from cache with dependencies
- name: Install Python or load from cache with dependencies
uses: actions/setup-python@v5
with:
python-version-file: .python-version
cache: poetry
python-version-file: "pyproject.toml"

- name: Install dependencies
run: poetry install --with dev
- name: Install the project
run: uv sync --all-extras --dev

- name: Lint
run: |
poetry run pre-commit install
poetry run pre-commit run --all-files --show-diff-on-failure
uv run pre-commit install
uv run pre-commit run --all-files --show-diff-on-failure
test:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "uv.lock"

- name: Install python or load from cache with dependencies
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install the project
run: uv sync --all-extras --dev

- name: Test
run: poetry run pytest -v
run: uv run pytest -v

deploy:
if: github.ref == 'refs/heads/main'
needs: [test]
needs: [lint, test]
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v4

- name: Install poetry
shell: bash
run: pipx install poetry==1.8.2
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "uv.lock"

- name: Install python or load from cache with dependencies
- name: Install Python or load from cache with dependencies
uses: actions/setup-python@v5
with:
python-version-file: .python-version
cache: poetry
python-version-file: "pyproject.toml"

- name: Install dependencies
run: poetry install
- name: Install the project
run: uv sync --all-extras --dev

- name: Deploy docs
working-directory: ./docs
run: poetry run mkdocs gh-deploy --force
run: uv run mkdocs gh-deploy --force

- name: Deploy more
run: echo 'this is a placeholder'
5 changes: 0 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,3 @@ repos:
files: ^src/
types: [python]
additional_dependencies: ['pyright@1.1.294']
- repo: https://github.com/python-poetry/poetry
rev: 1.8.0
hooks:
- id: poetry-check

1 change: 0 additions & 1 deletion .python-version

This file was deleted.

32 changes: 16 additions & 16 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,57 @@ set dotenv-load

# install dependencies and set up the project
install:
poetry install --with dev
poetry run pre-commit install
poetry run ipython kernel install --user
uv sync --extra dev
uv run pre-commit install
uv run ipython kernel install --user

# test the project
test:
poetry run pytest
uv run pytest

# update the snapshots for the tests
test-snapshot-update:
poetry run pytest --snapshot-update
uv run pytest --snapshot-update

# run linters and code formatters
lint:
poetry run pre-commit run --all-files --show-diff-on-failure
uv run pre-commit run --all-files --show-diff-on-failure

# build a dataset of passages
build-dataset:
poetry run python scripts/build_dataset.py
uv run python scripts/build_dataset.py

# fetch metadata and labelled passages for a specific wikibase ID
get-concept id:
poetry run python scripts/get_concept.py --wikibase-id {{id}}
uv run python scripts/get_concept.py --wikibase-id {{id}}

# train a model for a specific wikibase ID
train id +OPTS="":
poetry run train --wikibase-id {{id}} {{OPTS}}
uv run scripts/train.py --wikibase-id {{id}} {{OPTS}}

# evaluate a model for a specific wikibase ID
evaluate id:
poetry run python scripts/evaluate.py --wikibase-id {{id}}
uv run python scripts/evaluate.py --wikibase-id {{id}}

# promote a model for a specific wikibase ID
promote id +OPTS="":
poetry run promote --wikibase-id {{id}} {{OPTS}}
uv run scripts/promote.py --wikibase-id {{id}} {{OPTS}}

# run a model for a specific wikibase ID on a supplied string
label id string:
poetry run python scripts/label.py --wikibase-id {{id}} --input-string {{string}}
uv run python scripts/label.py --wikibase-id {{id}} --input-string {{string}}

# find instances of the concept in a set of passages for a specific wikibase ID
predict id:
poetry run python scripts/predict.py --wikibase-id {{id}}
uv run python scripts/predict.py --wikibase-id {{id}}

# sample a set of passages from the dataset for a specific wikibase ID
sample id:
poetry run python scripts/sample.py --wikibase-id {{id}}
uv run python scripts/sample.py --wikibase-id {{id}}

# push a sampled set of passages to argilla for a specific wikibase ID
push-to-argilla id usernames workspace:
poetry run python scripts/push_to_argilla.py --wikibase-id {{id}} --usernames {{usernames}} --workspace {{workspace}}
uv run python scripts/push_to_argilla.py --wikibase-id {{id}} --usernames {{usernames}} --workspace {{workspace}}

# run the full pipeline for a specific wikibase ID
create-labelling-task id usernames workspace:
Expand All @@ -64,6 +64,6 @@ create-labelling-task id usernames workspace:

# visualise IAA, model vs gold-standard agreement, and positive predictions on the full dataset
visualise-labels id:
poetry run python scripts/visualise_labels.py --wikibase-id {{id}}
uv run python scripts/visualise_labels.py --wikibase-id {{id}}

analyse-classifier id: (get-concept id) (train id) (predict id) (evaluate id) (visualise-labels id)
166 changes: 71 additions & 95 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,106 +1,82 @@
[tool.poetry]
[project]
name = "knowledge-graph"
version = "0.1.0"
description = ""
authors = ["CPR Data Science <dsci@climatepolicyradar.org>"]
license = "Apache 2.0"
description = "Add your description here"
readme = "README.md"
packages = [
{ include = "src", from = "." },
{ include = "tests", from = "." },
{ include = "scripts", from = "." },
requires-python = ">=3.11"
dependencies = [
"argilla==1.29.1",
"azure-pdf-parser",
"boto3==1.35.31",
"cpr-sdk==1.9.1",
"griffe==0.48.0",
"httpx==0.26.0",
"ipykernel==6.29.3",
"more-itertools==10.3.0",
"neomodel==5.3.3",
"prefect==2.16.8",
"python-dotenv==1.0.1",
"rapidfuzz==3.10.0",
"rich==13.7.0",
"sentence-transformers==3.1.1",
"tqdm>=4.66.5",
"typer==0.9.4",
"wandb==0.18.3",
# The following is to work around the fact that pytorch stopped supported intel-macs after 2.2.2
# See: https://github.com/pytorch/pytorch/issues/114602
"torch==2.2.2; sys_platform == 'darwin' and platform_machine == 'x86_64'",
"torch==2.4.1; sys_platform != 'darwin' or platform_machine != 'x86_64'",
]

[tool.poetry.dependencies]
python = ">=3.10,<3.11"
httpx = "^0.26.0"
rich = "^13.7.0"
tqdm = "^4.66.2"
ipykernel = "^6.29.3"
azure_pdf_parser = { git = "https://github.com/climatepolicyradar/azure-pdf-parser.git", tag = "v0.4.3" }
# navigator_document_parser = { git = "https://github.com/climatepolicyradar/navigator-document-parser.git" }
neomodel = "^5.3.3"
typer = "0.9.4"
rapidfuzz = "^3.10.0"
python-dotenv = "^1.0.1"
hypothesis = "^6.112.2"
more-itertools = "^10.3.0"
argilla = "1.29.1"
prefect = "2.16.8"
griffe = "0.48.0"
cpr-sdk = "^1.7.1"
sentence-transformers = "^3.1.1"
wandb = "^0.18.3"
boto3 = "^1.35.31"
moto = {extras = ["s3"], version = "^5.0.16"}
# The following is to work around the fact that pytorch stopped supported intel-macs after 2.2.2
# See: https://github.com/pytorch/pytorch/issues/114602
torch = [
{version = "2.2.2", markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"},
{version = "2.4.1", markers = "sys_platform != 'darwin' and platform_machine != 'x86_64'"},
]


[[tool.poetry.source]]
name = "pytorch"
url = "https://download.pytorch.org/whl/cpu"
priority = "explicit"


[tool.poetry.scripts]
train = "scripts.train:app"
promote = "scripts.promote:app"

[tool.poetry.group.dev]
optional = true

[tool.poetry.group.dev.dependencies]
pytest = "^8.3.2"
mkdocs-material = "^9.5.39"
pre-commit = "^3.8.0"
boto3 = "^1.35.32"
syrupy = "^4.7.1"
moto = {extras = ["s3"], version = "^5.0.16"}
[project.optional-dependencies]
dev = [
"hypothesis==6.112.2",
"mkdocs-material==9.5.39",
"moto[s3]==5.0.16",
"pre-commit>=4.0.1",
"pytest>=8.3.3",
"ruff>=0.7.0",
"syrupy>=4.7.2",
]

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.uv.sources]
azure-pdf-parser = { git = "https://github.com/climatepolicyradar/azure-pdf-parser.git", tag = "v0.4.3" }

[tool.ruff]
lint.select = ["E", "F", "D", "I"]
line-length = 88
# Docstring Ignores:
# D100 - Missing docstring in public module
# D103 - Missing docstring in public function
# D104 - Missing docstring in public package
# D107 - Missing docstring in __init__
# D202 - No blank lines allowed after function docstring
# D203 - 1 blank line required before class docstring
# D213 - Multi-line docstring summary should start at the first line
# D400 - First line should end with a period
# D401 - First line should be in imperative mood
# D406 - Section name should end with a newline
# D407 - Missing dashed underline after section
# D413 - Missing blank line after last section
# D415 - First line should end with a period, question mark, or exclamation point
lint.ignore = [
"D100",
"D103",
"D104",
"D107",
"D202",
"D203",
"D212",
"D400",
"D401",
"D406",
"D407",
"D413",
"D415",
"E501",
lint.select = ["E", "F", "D", "I"]
line-length = 88
# Docstring Ignores:
# D100 - Missing docstring in public module
# D103 - Missing docstring in public function
# D104 - Missing docstring in public package
# D107 - Missing docstring in __init__
# D202 - No blank lines allowed after function docstring
# D203 - 1 blank line required before class docstring
# D213 - Multi-line docstring summary should start at the first line
# D400 - First line should end with a period
# D401 - First line should be in imperative mood
# D406 - Section name should end with a newline
# D407 - Missing dashed underline after section
# D413 - Missing blank line after last section
# D415 - First line should end with a period, question mark, or exclamation point
lint.ignore = [
"D100",
"D103",
"D104",
"D107",
"D202",
"D203",
"D212",
"D400",
"D401",
"D406",
"D407",
"D413",
"D415",
"E501",
]

# Ignore `E402` (import violations) in all `__init__.py` files, and `E501` (line too long) in all files in the `tests` directory
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]
"tests/*" = ["E501"]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]
"tests/*" = ["E501"]
7 changes: 4 additions & 3 deletions tests/test_promote.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,9 +353,10 @@ def test_copy_across_aws_envs(
mock_from_s3.head_object.return_value = {"ContentLength": len(content)}
mock_to_s3.head_object.return_value = {"ContentLength": len(content)}

with patch("scripts.promote.download") as mock_download, patch(
"scripts.promote.upload"
) as mock_upload:
with (
patch("scripts.promote.download") as mock_download,
patch("scripts.promote.upload") as mock_upload,
):
# Call the function
result_bucket, result_key = copy_across_aws_envs(
promotion,
Expand Down
Loading

0 comments on commit 00e84fc

Please sign in to comment.