Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

builder: entirely prevent work duplication #378

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 52 additions & 15 deletions bork/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

from .filesystem import load_pyproject
from .log import logger
from .utils import scoped_cache

import build

Expand All @@ -46,8 +47,9 @@
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Literal, Mapping
from zipfile import ZipFile
import importlib, importlib.metadata
import subprocess, sys, zipapp
import re, subprocess, sys, zipapp


# The "proper" way to handle the default would be to check python_requires
Expand Down Expand Up @@ -84,6 +86,13 @@ def zipapp(self, main: str | None) -> Path:
"""


_WHEEL_FILENAME_REGEX = re.compile(
r"(?P<distribution>.+)-(?P<version>.+)"
r"(-(?P<build_tag>.+))?-(?P<python_tag>.+)"
r"-(?P<abi_tag>.+)-(?P<platform_tag>.+)\.whl"
)


@contextmanager
def prepare(src: Path, dst: Path) -> Iterator[Builder]:
"""Context manager for performing builds in an isolated environments.
Expand All @@ -93,36 +102,64 @@ def prepare(src: Path, dst: Path) -> Iterator[Builder]:
It will be created if it does not yet exist.
:returns: A concrete :py:class:`Builder`
"""
@scoped_cache
@dataclass(frozen = True)
class Bob(Builder):
src: Path
dst: Path
env: build.env.IsolatedEnv
bld: build.ProjectBuilder

def metadata_path(self) -> Path:
logger().info("Building wheel metadata")

out_dir = Path(env.path) / 'metadata'
out_dir.mkdir(exist_ok = True)
return Path(self.bld.metadata_path(out_dir))
def build(self, dist):
logger().info(f"Building {dist}")
self.env.install(
self.bld.get_requires_for_build(dist)
)
return Path(self.bld.build(
dist, self.dst,
metadata_directory = self._metadata_path if isinstance(self._metadata_path, Path) else None
))

@scoped_cache.skip # This is just a wrapper for metadata_path
def metadata(self) -> importlib.metadata.PackageMetadata:
return importlib.metadata.PathDistribution(
self.metadata_path()
).metadata

def build(self, dist, *, settings = {}):
logger().info(f"Building {dist}")
self.env.install(
self.bld.get_requires_for_build(dist, settings)
)
# TODO: reuse metadata_path if it was already built
return Path( self.bld.build(dist, self.dst, settings) )
def metadata_path(self) -> Path:
log = logger()
out_dir = Path(self.env.path) / 'metadata'

def from_wheel() -> Path:
whl_path = self.build("wheel")
whl_parse = _WHEEL_FILENAME_REGEX.fullmatch(whl_path.name)
assert whl_parse, f"Invalid wheel filename '{whl_path.name}'"

log.info("Extracting metadata from wheel")
distinfo = f"{whl_parse['distribution']}-{whl_parse['version']}.dist-info/"
with ZipFile(whl_path) as whl:
whl.extractall(
out_dir,
members = (fn for fn in whl.namelist() if fn.startswith(distinfo)),
)

return out_dir / distinfo


if "wheel" in self._build:
# A wheel was already built, let's extract its metadata
return from_wheel()

metadata = self.bld.prepare("wheel", out_dir)
if metadata is not None:
return Path(metadata)

log.debug("Package metadata cannot be built alone, building wheel")
return from_wheel()

def zipapp(self, main):
log = logger()
log.info("Building zipapp")
log.info(f"Building zipapp with entrypoint '{main}'")

log.debug("Loading configuration")
config = load_pyproject().get("tool", {}).get("bork", {})
Expand Down
128 changes: 128 additions & 0 deletions bork/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from inspect import get_annotations, getmembers, signature, isclass, isfunction
from functools import wraps
from typing import no_type_check
import logging

@no_type_check
def scoped_cache(cls):
assert isclass(cls), f"scoped_cache is applied to classes, got a '{type(cls)}'"

annotations = get_annotations(cls)
sentinel = object() # A unique value used as a sentinel for the cache
dlog = logging.getLogger() # The decorator's logger
dlog.info(f"Making '{cls.__qualname__}' memoized")

# object's methods are shown as functions on the class object
zeroaries, dicts = set(), set()

def zeroary_wrapper(f):
dlog.debug(f"Memoizing zero-ary '{name}'")
flog = logging.getLogger(f.__qualname__) # the wrapper's logger

attr_name = f"_{f.__name__}"
assert attr_name not in zeroaries, "Somehow got multiple methods with the same name?"
zeroaries.add(attr_name)

@wraps(f)
def g(self):
res = object.__getattr__(self, attr_name)
if res is not sentinel:
return res

flog.debug("Caching the first call")
res = f(self)
object.__setattr__(self, attr_name, res) # so this works on frozen dataclasses etc.
return res

return g

def unary_wrapper(f):
# TODO: handle the case of a kw-only unary
dlog.debug(f"Memoizing unary '{name}'")
flog = logging.getLogger(f.__qualname__) # the wrapper's logger

attr_name = f"_{f.__name__}"
assert attr_name not in dicts, "Somehow got multiple methods with the same name?"
dicts.add(attr_name)

@wraps(f)
def g(self, x):
cache = object.__getattr__(self, attr_name)
if x in cache:
return cache[x]

flog.debug(f"Cache miss for '{x}'")
res = f(self, x)
cache[x] = res
return res

return g

def nary_wrapper(f):
dlog.debug(f"Memoizing n-ary '{name}'")
flog = logging.getLogger(f.__qualname__) # the wrapper's logger

attr_name = f"_{f.__name__}"
assert attr_name not in dicts, "Somehow got multiple methods with the same name?"
dicts.add(attr_name)

@wraps(f)
def g(self, *args, **kwargs):
cache = object.__getattr__(self, attr_name)
key = (args, kwargs)
if key in cache:
return cache[key]

flog.debug(f"Cache miss for '{key}'")
res = f(self, *args, **kwargs)
cache[key] = res
return res

return g


for (name, f) in getmembers(cls, isfunction):
if name.startswith("_") or getattr(f, '_do_not_cache', False):
dlog.debug(f"Skipping '{name}'")
continue

assert name == f.__name__
if f"_{name}" in annotations:
raise ValueError(f"Memoization would use attribute '_{f.__name__}' which is already defined on the class")

sig = signature(f)
assert "self" in sig.parameters, f"Method '{f.__qualname__}' did not take 'self'"
match len(sig.parameters):
case 0:
assert False
case 1:
setattr(cls, name, zeroary_wrapper(f))
case 2:
setattr(cls, name, unary_wrapper(f))
case n if n > 2:
setattr(cls, name, nary_wrapper(f))


zeroaries, dicts = frozenset(zeroaries), frozenset(dicts)
assert zeroaries.isdisjoint(dicts), "Somehow got multiple methods with the same name?"

orig_init = cls.__init__
@wraps(orig_init)
def init(self, *args, **kwargs):
for k in zeroaries:
object.__setattr__(self, k, sentinel)
for k in dicts:
object.__setattr__(self, k, {})

orig_init(self, *args, **kwargs)

# TODO(nicoo) support __slots__
cls.__init__ = init
return cls


def _not_cached(f):
f._do_not_cache = True
return f

scoped_cache.skip = _not_cached