Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

build: Use uv over Poetry #60

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 44 additions & 22 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,57 +11,79 @@ permissions:
contents: write

jobs:
test:
lint:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v4

- name: Install poetry
shell: bash
run: pipx install poetry==1.8.2
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "uv.lock"

- name: Install python or load from cache with dependencies
- name: Install Python or load from cache with dependencies
uses: actions/setup-python@v5
with:
python-version-file: .python-version
cache: poetry
python-version-file: "pyproject.toml"

- name: Install dependencies
run: poetry install --with dev
- name: Install the project
run: uv sync --all-extras --dev

- name: Lint
run: |
poetry run pre-commit install
poetry run pre-commit run --all-files --show-diff-on-failure
uv run pre-commit install
uv run pre-commit run --all-files --show-diff-on-failure

test:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "uv.lock"

- name: Install Python or load from cache with dependencies
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install the project
run: uv sync --all-extras --dev

- name: Test
run: poetry run pytest -v
run: uv run pytest -v

deploy:
if: github.ref == 'refs/heads/main'
needs: [test]
needs: [lint, test]
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v4

- name: Install poetry
shell: bash
run: pipx install poetry==1.8.2
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "uv.lock"

- name: Install python or load from cache with dependencies
- name: Install Python or load from cache with dependencies
uses: actions/setup-python@v5
with:
python-version-file: .python-version
cache: poetry
python-version-file: "pyproject.toml"

- name: Install dependencies
run: poetry install
- name: Install the project
run: uv sync --all-extras --dev

- name: Deploy docs
working-directory: ./docs
run: poetry run mkdocs gh-deploy --force
run: uv run mkdocs gh-deploy --force

- name: Deploy more
run: echo 'this is a placeholder'
5 changes: 0 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,3 @@ repos:
files: ^src/
types: [python]
additional_dependencies: ['pyright@1.1.294']
- repo: https://github.com/python-poetry/poetry
rev: 1.8.0
hooks:
- id: poetry-check

1 change: 0 additions & 1 deletion .python-version
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

explanation: Once less place to maintain the version, as setup-python supports pyproject.yaml.

This file was deleted.

5 changes: 3 additions & 2 deletions flows/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from dataclasses import dataclass
from io import BytesIO
from pathlib import Path
from typing import Optional

import boto3
from cpr_sdk.parser_models import BaseParserOutput
Expand Down Expand Up @@ -158,8 +159,8 @@ def determine_classifier_ids(

@flow(log_prints=True)
def classifier_inference(
document_ids: list[str] = None,
classifier_spec: list[tuple[WikibaseID, str]] = None,
document_ids: Optional[list[str]] = None,
classifier_spec: Optional[list[tuple[WikibaseID, str]]] = None,
):
"""
Flow to run inference on documents within a bucket prefix
Expand Down
32 changes: 16 additions & 16 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,57 @@ set dotenv-load

# install dependencies and set up the project
install:
poetry install --with dev
poetry run pre-commit install
poetry run ipython kernel install --user
uv sync --extra dev
uv run pre-commit install
uv run ipython kernel install --user

# test the project
test:
poetry run pytest
uv run pytest

# update the snapshots for the tests
test-snapshot-update:
poetry run pytest --snapshot-update
uv run pytest --snapshot-update

# run linters and code formatters
lint:
poetry run pre-commit run --all-files --show-diff-on-failure
uv run pre-commit run --all-files --show-diff-on-failure

# build a dataset of passages
build-dataset:
poetry run python scripts/build_dataset.py
uv run python scripts/build_dataset.py

# fetch metadata and labelled passages for a specific wikibase ID
get-concept id:
poetry run python scripts/get_concept.py --wikibase-id {{id}}
uv run python scripts/get_concept.py --wikibase-id {{id}}

# train a model for a specific wikibase ID
train id +OPTS="":
poetry run train --wikibase-id {{id}} {{OPTS}}
uv run scripts/train.py --wikibase-id {{id}} {{OPTS}}

# evaluate a model for a specific wikibase ID
evaluate id:
poetry run python scripts/evaluate.py --wikibase-id {{id}}
uv run python scripts/evaluate.py --wikibase-id {{id}}

# promote a model for a specific wikibase ID
promote id +OPTS="":
poetry run promote --wikibase-id {{id}} {{OPTS}}
uv run scripts/promote.py --wikibase-id {{id}} {{OPTS}}

# run a model for a specific wikibase ID on a supplied string
label id string:
poetry run python scripts/label.py --wikibase-id {{id}} --input-string {{string}}
uv run python scripts/label.py --wikibase-id {{id}} --input-string {{string}}

# find instances of the concept in a set of passages for a specific wikibase ID
predict id:
poetry run python scripts/predict.py --wikibase-id {{id}}
uv run python scripts/predict.py --wikibase-id {{id}}

# sample a set of passages from the dataset for a specific wikibase ID
sample id:
poetry run python scripts/sample.py --wikibase-id {{id}}
uv run python scripts/sample.py --wikibase-id {{id}}

# push a sampled set of passages to argilla for a specific wikibase ID
push-to-argilla id usernames workspace:
poetry run python scripts/push_to_argilla.py --wikibase-id {{id}} --usernames {{usernames}} --workspace {{workspace}}
uv run python scripts/push_to_argilla.py --wikibase-id {{id}} --usernames {{usernames}} --workspace {{workspace}}

# run the full pipeline for a specific wikibase ID
create-labelling-task id usernames workspace:
Expand All @@ -64,6 +64,6 @@ create-labelling-task id usernames workspace:

# visualise IAA, model vs gold-standard agreement, and positive predictions on the full dataset
visualise-labels id:
poetry run python scripts/visualise_labels.py --wikibase-id {{id}}
uv run python scripts/visualise_labels.py --wikibase-id {{id}}

analyse-classifier id: (get-concept id) (train id) (predict id) (evaluate id) (visualise-labels id)
98 changes: 37 additions & 61 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,70 +1,46 @@
[tool.poetry]
[project]
name = "knowledge-graph"
version = "0.1.0"
description = ""
authors = ["CPR Data Science <dsci@climatepolicyradar.org>"]
license = "Apache 2.0"
description = "Add your description here"
readme = "README.md"
packages = [
{ include = "src", from = "." },
{ include = "tests", from = "." },
{ include = "scripts", from = "." },
requires-python = ">=3.11"
dependencies = [
"argilla==1.29.1",
"azure-pdf-parser",
"boto3==1.35.31",
"cpr-sdk==1.9.1",
"griffe==0.48.0",
"httpx==0.26.0",
"ipykernel==6.29.3",
"more-itertools==10.3.0",
"neomodel==5.3.3",
"prefect==2.16.8",
"python-dotenv==1.0.1",
"rapidfuzz==3.10.0",
"rich==13.7.0",
"sentence-transformers==3.1.1",
"tqdm>=4.66.5",
"typer==0.9.4",
"wandb==0.18.3",
# The following is to work around the fact that pytorch stopped supported intel-macs after 2.2.2
# See: https://github.com/pytorch/pytorch/issues/114602
"torch==2.2.2; sys_platform == 'darwin' and platform_machine == 'x86_64'",
"torch==2.4.1; sys_platform != 'darwin' or platform_machine != 'x86_64'",
]

[tool.poetry.dependencies]
python = ">=3.10,<3.11"
httpx = "^0.26.0"
rich = "^13.7.0"
tqdm = "^4.66.2"
ipykernel = "^6.29.3"
azure_pdf_parser = { git = "https://github.com/climatepolicyradar/azure-pdf-parser.git", tag = "v0.4.3" }
# navigator_document_parser = { git = "https://github.com/climatepolicyradar/navigator-document-parser.git" }
neomodel = "^5.3.3"
typer = "0.9.4"
rapidfuzz = "^3.10.0"
python-dotenv = "^1.0.1"
hypothesis = "^6.112.2"
more-itertools = "^10.3.0"
argilla = "1.29.1"
prefect = "2.16.8"
griffe = "0.48.0"
cpr-sdk = "^1.7.1"
sentence-transformers = "^3.1.1"
wandb = "^0.18.3"
boto3 = "^1.35.31"
moto = {extras = ["s3"], version = "^5.0.16"}
# The following is to work around the fact that pytorch stopped supported intel-macs after 2.2.2
# See: https://github.com/pytorch/pytorch/issues/114602
torch = [
{version = "2.2.2", markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"},
{version = "2.4.1", markers = "sys_platform != 'darwin' and platform_machine != 'x86_64'"},
]


[[tool.poetry.source]]
name = "pytorch"
url = "https://download.pytorch.org/whl/cpu"
priority = "explicit"


[tool.poetry.scripts]
train = "scripts.train:app"
promote = "scripts.promote:app"

[tool.poetry.group.dev]
optional = true

[tool.poetry.group.dev.dependencies]
pytest = "^8.3.2"
mkdocs-material = "^9.5.39"
pre-commit = "^3.8.0"
boto3 = "^1.35.32"
syrupy = "^4.7.1"
moto = {extras = ["s3"], version = "^5.0.16"}
[project.optional-dependencies]
dev = [
"hypothesis==6.112.2",
"mkdocs-material==9.5.39",
"moto[s3]==5.0.16",
"pre-commit>=4.0.1",
"pytest>=8.3.3",
"ruff>=0.7.0",
"syrupy>=4.7.2",
]

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.uv.sources]
azure-pdf-parser = { git = "https://github.com/climatepolicyradar/azure-pdf-parser.git", tag = "v0.4.3" }

[tool.ruff]
lint.select = ["E", "F", "D", "I"]
Expand Down
7 changes: 4 additions & 3 deletions tests/test_promote.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,9 +353,10 @@ def test_copy_across_aws_envs(
mock_from_s3.head_object.return_value = {"ContentLength": len(content)}
mock_to_s3.head_object.return_value = {"ContentLength": len(content)}

with patch("scripts.promote.download") as mock_download, patch(
"scripts.promote.upload"
) as mock_upload:
with (
patch("scripts.promote.download") as mock_download,
patch("scripts.promote.upload") as mock_upload,
):
# Call the function
result_bucket, result_key = copy_across_aws_envs(
promotion,
Expand Down
Loading