Skip to content

Commit e64ce4c

Browse files
authored
Models API (#190)
First draft of #134. API dynamically generated from a pooch registry. Alongside the registry a TOML file mapping model names to the set of model input files. These can be built and distributed with the package at release time. Then model input files can be downloaded at runtime. This approach gives us the flexibility to support input files stored anywhere on GitHub, e.g. in the test models repos. It does not provide access to models defined with flopy, since this package does not depend on flopy.
1 parent 835de1c commit e64ce4c

File tree

5 files changed

+185
-3
lines changed

5 files changed

+185
-3
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -148,4 +148,6 @@ data_backup
148148
autotest/temp/
149149

150150
# uv lockfile
151-
uv.lock
151+
uv.lock
152+
153+
modflow_devtools/data

docs/md/models.md

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Models API
2+
3+
The `modflow_devtools.models` module provides programmatic access to MODFLOW 6 example models via [Pooch](https://www.fatiando.org/pooch/latest/index.html).
4+
5+
When the module is imported, it checks for the existence of the registry in models files. If they are found, it loads the registry and dynamically generates functions for each model, attaching them to the module namespace.
6+
7+
Each function returns a list of files. Example usage:
8+
9+
```python
10+
import modflow_devtools.models as models
11+
12+
files = models.some_model()
13+
```
14+
15+
## Developers
16+
17+
The `make_registry.py` script is responsible for generating a registry text file and a mapping between files and models. This script should be run in the CI pipeline at release time before the package is built. The generated registry file and model mapping are used to create a pooch instance for fetching model files, and should be distributed with the package.
18+
19+
The script can be executed with `python -m modflow_devtools.make_registry` and accepts the following options:
20+
21+
- `--path` or `-p`: Specifies the directory containing model directories. If not provided, the default path is used.
22+
- `--append` or `-a`: If specified, the script will append to the existing registry file instead of overwriting it.
23+
- `--base-url` or `-b`: Specifies the base URL for the registry file. If not provided, the default base URL is used.
24+
25+
For example, to create a registry of models in the MF6 test models repositories, each of which is checked out in the current working directory:
26+
27+
```shell
28+
python -m modflow_devtools.make_registry -p modflow6-testmodels -b https://github.com/MODFLOW-ORG/modflow6-testmodels/raw/master
29+
python -m modflow_devtools.make_registry -p modflow6-largetestmodels -a -b https://github.com/MODFLOW-ORG/modflow6-largetestmodels/raw/master
30+
```

modflow_devtools/make_registry.py

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import argparse
2+
import hashlib
3+
from pathlib import Path
4+
5+
import tomli_w as tomli
6+
7+
from modflow_devtools.misc import get_model_paths
8+
9+
REPO_OWNER = "MODFLOW-ORG"
10+
REPO_NAME = "modflow-devtools"
11+
REPO_REF = "develop"
12+
PROJ_ROOT = Path(__file__).parents[1]
13+
DATA_RELPATH = "data"
14+
DATA_PATH = PROJ_ROOT / REPO_NAME / DATA_RELPATH
15+
REGISTRY_PATH = DATA_PATH / "registry.txt"
16+
MODELS_PATH = DATA_PATH / "models.toml"
17+
BASE_URL = f"https://github.com/{REPO_OWNER}/{REPO_NAME}/raw/{REPO_REF}/{DATA_RELPATH}/"
18+
19+
20+
def _sha256(path: Path) -> str:
21+
"""
22+
Compute the SHA256 hash of the given file.
23+
Reference: https://stackoverflow.com/a/44873382/6514033
24+
"""
25+
h = hashlib.sha256()
26+
b = bytearray(128 * 1024)
27+
mv = memoryview(b)
28+
with path.open("rb", buffering=0) as f:
29+
for n in iter(lambda: f.readinto(mv), 0):
30+
h.update(mv[:n])
31+
return h.hexdigest()
32+
33+
34+
def write_registry(
35+
path: Path, registry_path: Path, base_url: str, append: bool = False
36+
):
37+
if not registry_path.exists():
38+
registry_path.parent.mkdir(parents=True, exist_ok=True)
39+
40+
models = {}
41+
exclude = [".DS_Store"]
42+
with registry_path.open("a+" if append else "w") as f:
43+
if not path.is_dir():
44+
raise NotADirectoryError(f"Path {path} is not a directory.")
45+
for mp in get_model_paths(path):
46+
for p in mp.rglob("*"):
47+
if "compare" in str(p):
48+
continue
49+
if p.is_file() and not any(e in p.name for e in exclude):
50+
relpath = p.relative_to(path)
51+
name = str(relpath).replace("/", "_").replace("-", "_")
52+
hash = _sha256(p)
53+
url = f"{base_url}/{relpath!s}"
54+
line = f"{name} {hash} {url}"
55+
f.write(line + "\n")
56+
key = str(relpath.parent).replace("/", "_").replace("-", "_")
57+
if key not in models:
58+
models[key] = []
59+
models[key].append(name)
60+
61+
models_path = registry_path.parent / "models.toml"
62+
with models_path.open("ab+" if append else "wb") as mf:
63+
tomli.dump(models, mf)
64+
65+
66+
if __name__ == "__main__":
67+
parser = argparse.ArgumentParser(description="Convert DFN files to TOML.")
68+
parser.add_argument(
69+
"--path",
70+
"-p",
71+
type=str,
72+
help="Directory containing model directories.",
73+
)
74+
parser.add_argument(
75+
"--append",
76+
"-a",
77+
action="store_true",
78+
help="Append to the registry file instead of overwriting.",
79+
)
80+
parser.add_argument(
81+
"--base-url",
82+
"-b",
83+
type=str,
84+
help="Base URL for the registry file.",
85+
)
86+
args = parser.parse_args()
87+
path = Path(args.path) if args.path else DATA_PATH
88+
base_url = args.base_url if args.base_url else BASE_URL
89+
90+
write_registry(path, REGISTRY_PATH, base_url, args.append)

modflow_devtools/models.py

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from pathlib import Path
2+
3+
import pooch
4+
import tomli
5+
6+
import modflow_devtools
7+
8+
REPO_OWNER = "MODFLOW-ORG"
9+
REPO_NAME = "modflow-devtools"
10+
REPO_REF = "develop"
11+
PROJ_ROOT = Path(__file__).parents[1]
12+
DATA_RELPATH = "data"
13+
DATA_PATH = PROJ_ROOT / REPO_NAME / DATA_RELPATH
14+
REGISTRY_PATH = DATA_PATH / "registry.txt"
15+
MODELS_PATH = DATA_PATH / "models.toml"
16+
BASE_URL = f"https://github.com/{REPO_OWNER}/{REPO_NAME}/raw/{REPO_REF}/{DATA_RELPATH}/"
17+
VERSION = modflow_devtools.__version__.rpartition(".dev")[0]
18+
FETCHER = pooch.create(
19+
path=pooch.os_cache(REPO_NAME),
20+
base_url=BASE_URL,
21+
version=VERSION,
22+
registry=None,
23+
)
24+
25+
if not REGISTRY_PATH.exists():
26+
raise FileNotFoundError(f"Registry file {REGISTRY_PATH} not found.")
27+
28+
if not MODELS_PATH.exists():
29+
raise FileNotFoundError(f"Models file {MODELS_PATH} not found.")
30+
31+
FETCHER.load_registry(REGISTRY_PATH)
32+
33+
34+
def _generate_function(model_name: str, files: list) -> callable:
35+
def model_function() -> list:
36+
return [FETCHER.fetch(file) for file in files]
37+
38+
model_function.__name__ = model_name
39+
return model_function
40+
41+
42+
def _make_functions(models_path: Path, registry_path: Path):
43+
with models_path.open("rb") as f:
44+
models = tomli.load(f)
45+
for model_name, files in models.items():
46+
globals()[model_name] = _generate_function(model_name, files)
47+
48+
49+
_make_functions(MODELS_PATH, REGISTRY_PATH)

pyproject.toml

+13-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,12 @@ dfn = [
7474
"tomli",
7575
"tomli-w"
7676
]
77-
dev = ["modflow-devtools[lint,test,docs,dfn]"]
77+
models = [
78+
"pooch",
79+
"tomli",
80+
"tomli-w"
81+
]
82+
dev = ["modflow-devtools[lint,test,docs,dfn,models]"]
7883

7984
[dependency-groups]
8085
build = [
@@ -111,12 +116,18 @@ dfn = [
111116
"tomli",
112117
"tomli-w"
113118
]
119+
models = [
120+
"pooch",
121+
"tomli",
122+
"tomli-w"
123+
]
114124
dev = [
115125
{include-group = "build"},
116126
{include-group = "lint"},
117127
{include-group = "test"},
118128
{include-group = "docs"},
119-
{include-group = "dfn"}
129+
{include-group = "dfn"},
130+
{include-group = "models"},
120131
]
121132

122133
[project.urls]

0 commit comments

Comments
 (0)