From 1c008ae109946f031db29c1b846910f44da21312 Mon Sep 17 00:00:00 2001 From: nicoo Date: Sat, 10 Aug 2024 15:05:03 +0000 Subject: [PATCH 1/4] utils: add `scoped_cache` helper --- bork/utils.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 bork/utils.py diff --git a/bork/utils.py b/bork/utils.py new file mode 100644 index 0000000..8373446 --- /dev/null +++ b/bork/utils.py @@ -0,0 +1,128 @@ +from inspect import get_annotations, getmembers, signature, isclass, isfunction +from functools import wraps +from typing import no_type_check +import logging + +@no_type_check +def scoped_cache(cls): + assert isclass(cls), f"scoped_cache is applied to classes, got a '{type(cls)}'" + + annotations = get_annotations(cls) + sentinel = object() # A unique value used as a sentinel for the cache + dlog = logging.getLogger() # The decorator's logger + dlog.info(f"Making '{cls.__qualname__}' memoized") + + # object's methods are shown as functions on the class object + zeroaries, dicts = set(), set() + + def zeroary_wrapper(f): + dlog.debug(f"Memoizing zero-ary '{name}'") + flog = logging.getLogger(f.__qualname__) # the wrapper's logger + + attr_name = f"_{f.__name__}" + assert attr_name not in zeroaries, "Somehow got multiple methods with the same name?" + zeroaries.add(attr_name) + + @wraps(f) + def g(self): + res = object.__getattr__(self, attr_name) + if res is not sentinel: + return res + + flog.debug("Caching the first call") + res = f(self) + object.__setattr__(self, attr_name, res) # so this works on frozen dataclasses etc. + return res + + return g + + def unary_wrapper(f): + # TODO: handle the case of a kw-only unary + dlog.debug(f"Memoizing unary '{name}'") + flog = logging.getLogger(f.__qualname__) # the wrapper's logger + + attr_name = f"_{f.__name__}" + assert attr_name not in dicts, "Somehow got multiple methods with the same name?" + dicts.add(attr_name) + + @wraps(f) + def g(self, x): + cache = object.__getattr__(self, attr_name) + if x in cache: + return cache[x] + + flog.debug(f"Cache miss for '{x}'") + res = f(self, x) + cache[x] = res + return res + + return g + + def nary_wrapper(f): + dlog.debug(f"Memoizing n-ary '{name}'") + flog = logging.getLogger(f.__qualname__) # the wrapper's logger + + attr_name = f"_{f.__name__}" + assert attr_name not in dicts, "Somehow got multiple methods with the same name?" + dicts.add(attr_name) + + @wraps(f) + def g(self, *args, **kwargs): + cache = object.__getattr__(self, attr_name) + key = (args, kwargs) + if key in cache: + return cache[key] + + flog.debug(f"Cache miss for '{key}'") + res = f(self, *args, **kwargs) + cache[key] = res + return res + + return g + + + for (name, f) in getmembers(cls, isfunction): + if name.startswith("_") or getattr(f, '_do_not_cache', False): + dlog.debug(f"Skipping '{name}'") + continue + + assert name == f.__name__ + if f"_{name}" in annotations: + raise ValueError(f"Memoization would use attribute '_{f.__name__}' which is already defined on the class") + + sig = signature(f) + assert "self" in sig.parameters, f"Method '{f.__qualname__}' did not take 'self'" + match len(sig.parameters): + case 0: + assert False + case 1: + setattr(cls, name, zeroary_wrapper(f)) + case 2: + setattr(cls, name, unary_wrapper(f)) + case n if n > 2: + setattr(cls, name, nary_wrapper(f)) + + + zeroaries, dicts = frozenset(zeroaries), frozenset(dicts) + assert zeroaries.isdisjoint(dicts), "Somehow got multiple methods with the same name?" + + orig_init = cls.__init__ + @wraps(orig_init) + def init(self, *args, **kwargs): + for k in zeroaries: + object.__setattr__(self, k, sentinel) + for k in dicts: + object.__setattr__(self, k, {}) + + orig_init(self, *args, **kwargs) + + # TODO(nicoo) support __slots__ + cls.__init__ = init + return cls + + +def _not_cached(f): + f._do_not_cache = True + return f + +scoped_cache.skip = _not_cached From c13517c150bbe221bc8059d3c543fe9849c64db1 Mon Sep 17 00:00:00 2001 From: nicoo Date: Sat, 10 Aug 2024 15:07:35 +0000 Subject: [PATCH 2/4] builder.prepare: memoize results as long as the `Builder` lives Dropped the `settings` parameter, as it couldn't be cached (dict isn't hashable) and wasn't used anyway. If this become an issue, I could special-case the hashing of `dict`s in `scoped_cache` This avoids rebuilds with successive method calls. The results are not meaningful anymore outside a given build's context, and `scoped_cache` ties the cached outputs to the `Builder` object so we cannot leak memory. --- bork/builder.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/bork/builder.py b/bork/builder.py index 401ea92..d5d02aa 100644 --- a/bork/builder.py +++ b/bork/builder.py @@ -36,6 +36,7 @@ from .filesystem import load_pyproject from .log import logger +from .utils import scoped_cache import build @@ -93,6 +94,7 @@ def prepare(src: Path, dst: Path) -> Iterator[Builder]: It will be created if it does not yet exist. :returns: A concrete :py:class:`Builder` """ + @scoped_cache @dataclass(frozen = True) class Bob(Builder): src: Path @@ -107,22 +109,23 @@ def metadata_path(self) -> Path: out_dir.mkdir(exist_ok = True) return Path(self.bld.metadata_path(out_dir)) + @scoped_cache.skip # This is just a wrapper for metadata_path def metadata(self) -> importlib.metadata.PackageMetadata: return importlib.metadata.PathDistribution( self.metadata_path() ).metadata - def build(self, dist, *, settings = {}): + def build(self, dist): logger().info(f"Building {dist}") self.env.install( - self.bld.get_requires_for_build(dist, settings) + self.bld.get_requires_for_build(dist) ) # TODO: reuse metadata_path if it was already built - return Path( self.bld.build(dist, self.dst, settings) ) + return Path( self.bld.build(dist, self.dst) ) def zipapp(self, main): log = logger() - log.info("Building zipapp") + log.info(f"Building zipapp with entrypoint '{main}'") log.debug("Loading configuration") config = load_pyproject().get("tool", {}).get("bork", {}) From 4a5558cebf2886befab7a9078a5d868f2b30bcb0 Mon Sep 17 00:00:00 2001 From: nicoo Date: Sat, 10 Aug 2024 15:33:36 +0000 Subject: [PATCH 3/4] builder.Builder.build: reuse the built metadata if it exists --- bork/builder.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bork/builder.py b/bork/builder.py index d5d02aa..1bfbe49 100644 --- a/bork/builder.py +++ b/bork/builder.py @@ -120,8 +120,10 @@ def build(self, dist): self.env.install( self.bld.get_requires_for_build(dist) ) - # TODO: reuse metadata_path if it was already built - return Path( self.bld.build(dist, self.dst) ) + return Path(self.bld.build( + dist, self.dst, + metadata_directory = self._metadata_path if isinstance(self._metadata_path, Path) else None + )) def zipapp(self, main): log = logger() From f8f035f712cc19973eb1cae51b08475f7c84ecf3 Mon Sep 17 00:00:00 2001 From: nicoo Date: Sat, 10 Aug 2024 15:29:52 +0000 Subject: [PATCH 4/4] builder.Builder.metadata_path: reuse the built wheel --- bork/builder.py | 60 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/bork/builder.py b/bork/builder.py index 1bfbe49..909f0cc 100644 --- a/bork/builder.py +++ b/bork/builder.py @@ -47,8 +47,9 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Literal, Mapping +from zipfile import ZipFile import importlib, importlib.metadata -import subprocess, sys, zipapp +import re, subprocess, sys, zipapp # The "proper" way to handle the default would be to check python_requires @@ -85,6 +86,13 @@ def zipapp(self, main: str | None) -> Path: """ +_WHEEL_FILENAME_REGEX = re.compile( + r"(?P.+)-(?P.+)" + r"(-(?P.+))?-(?P.+)" + r"-(?P.+)-(?P.+)\.whl" +) + + @contextmanager def prepare(src: Path, dst: Path) -> Iterator[Builder]: """Context manager for performing builds in an isolated environments. @@ -102,19 +110,6 @@ class Bob(Builder): env: build.env.IsolatedEnv bld: build.ProjectBuilder - def metadata_path(self) -> Path: - logger().info("Building wheel metadata") - - out_dir = Path(env.path) / 'metadata' - out_dir.mkdir(exist_ok = True) - return Path(self.bld.metadata_path(out_dir)) - - @scoped_cache.skip # This is just a wrapper for metadata_path - def metadata(self) -> importlib.metadata.PackageMetadata: - return importlib.metadata.PathDistribution( - self.metadata_path() - ).metadata - def build(self, dist): logger().info(f"Building {dist}") self.env.install( @@ -125,6 +120,43 @@ def build(self, dist): metadata_directory = self._metadata_path if isinstance(self._metadata_path, Path) else None )) + @scoped_cache.skip # This is just a wrapper for metadata_path + def metadata(self) -> importlib.metadata.PackageMetadata: + return importlib.metadata.PathDistribution( + self.metadata_path() + ).metadata + + def metadata_path(self) -> Path: + log = logger() + out_dir = Path(self.env.path) / 'metadata' + + def from_wheel() -> Path: + whl_path = self.build("wheel") + whl_parse = _WHEEL_FILENAME_REGEX.fullmatch(whl_path.name) + assert whl_parse, f"Invalid wheel filename '{whl_path.name}'" + + log.info("Extracting metadata from wheel") + distinfo = f"{whl_parse['distribution']}-{whl_parse['version']}.dist-info/" + with ZipFile(whl_path) as whl: + whl.extractall( + out_dir, + members = (fn for fn in whl.namelist() if fn.startswith(distinfo)), + ) + + return out_dir / distinfo + + + if "wheel" in self._build: + # A wheel was already built, let's extract its metadata + return from_wheel() + + metadata = self.bld.prepare("wheel", out_dir) + if metadata is not None: + return Path(metadata) + + log.debug("Package metadata cannot be built alone, building wheel") + return from_wheel() + def zipapp(self, main): log = logger() log.info(f"Building zipapp with entrypoint '{main}'")