Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update clone.py #329

Merged
merged 8 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ repos:
# supported by your project here, or alternatively use
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3.11
language_version: python3.9
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Hidden option `--location` for mepo that returns the path to the mepo directory
- Added ability to print number of stashes in `mepo status`
- Added new tests for `mepo clone`

### Changed

- Removed legacy `bin` directory
- Checked out branches are no longer in 'detached head' state

## [2.1.0] - 2024-10-02

Expand Down
22 changes: 18 additions & 4 deletions src/mepo/cmdline/parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import warnings

from .branch_parser import MepoBranchArgParser
from .stash_parser import MepoStashArgParser
Expand Down Expand Up @@ -74,6 +75,9 @@ def parse(self):
return self.parser.parse_args()

def __init(self):
warnings.warn(
"init will be removed in version 3, use clone instead", DeprecationWarning
)
init = self.subparsers.add_parser(
"init",
description="Initialize mepo based on `config-file`",
Expand Down Expand Up @@ -101,7 +105,7 @@ def __clone(self):
aliases=mepoconfig.get_command_alias("clone"),
)
clone.add_argument(
"repo_url", metavar="URL", nargs="?", default=None, help="URL to clone"
"url", metavar="URL", nargs="?", default=None, help="URL to clone"
)
clone.add_argument(
"directory",
Expand All @@ -122,7 +126,7 @@ def __clone(self):
metavar="registry",
nargs="?",
default=None,
help="Registry (default: components.yaml)",
help="Registry (default: %(default)s)",
)
clone.add_argument(
"--style",
Expand All @@ -142,8 +146,18 @@ def __clone(self):
metavar="partial-type",
nargs="?",
default=None,
choices=["off", "blobless", "treeless"],
help='Style of partial clone, default: None, allowed options: %(choices)s. Off means a "normal" full git clone, blobless means cloning with "--filter=blob:none" and treeless means cloning with "--filter=tree:0". NOTE: We do *not* recommend using "treeless" as it is very aggressive and will cause problems with many git commands.',
choices=[None, "blobless", "treeless"],
help=(
"""
Style of partial clone, default: %(default)s.
Allowed options: %(choices)s.
None: normal full git clone,
blobless: cloning with "--filter=blob:none",
treeless: cloning with "--filter=tree:0".
NOTE: We do *not* recommend using "treeless" as it is very
aggressive and will cause problems with many git commands.
"""
),
)

def __list(self):
Expand Down
220 changes: 100 additions & 120 deletions src/mepo/command/clone.py
Original file line number Diff line number Diff line change
@@ -1,145 +1,125 @@
import os
import pathlib
import shutil
import shlex
from urllib.parse import urlparse
from types import SimpleNamespace

from .init import run as mepo_init_run
from .init import run as mepo_init
from ..state import MepoState
from ..state import StateDoesNotExistError
from ..git import GitRepository
from ..utilities import shellcmd
from ..utilities import colors
from ..utilities import mepoconfig


def run(args):

# This protects against someone using branch without a URL
if args.branch and not args.repo_url:
raise RuntimeError("The branch argument can only be used with a URL")

if args.allrepos and not args.branch:
raise RuntimeError("The allrepos option must be used with a branch/tag.")

# We can get the blobless and treeless options from the config or the args
if args.partial:
# We need to set partial to None if it's off, otherwise we use the
# string. This is safe because argparse only allows for 'off',
# 'blobless', or 'treeless'
partial = None if args.partial == "off" else args.partial
elif mepoconfig.has_option("clone", "partial"):
allowed = ["blobless", "treeless"]
partial = mepoconfig.get("clone", "partial")
if partial not in allowed:
raise Exception(
f"Detected partial clone type [{partial}] from .mepoconfig is not an allowed partial clone type: {allowed}"
)
else:
"""
Entry point of clone.

Multiple ways to run clone
1. After fixture has been cloned (via git clone)
a. mepo init
mepo clone
b. mepo clone (initializes mepo)
2. Clone fixture as well
a. mepo clone <url> [<directory>]
b. mepo clone -b <branch> <url> [<directory>]

Steps -
1. Clone fixture - if url is provided
2. Read state - initialize mepo (write state) first, if needed
3. Clone components
4. Checkout all repos to the specified branch
"""
CWD = os.getcwd()

arg_partial = handle_partial(args.partial)

if args.url is not None:
fixture_dir = clone_fixture(args.url, args.branch, args.directory, arg_partial)
os.chdir(fixture_dir)
allcomps = read_state(args.style, args.registry)
clone_components(allcomps, arg_partial)
if args.allrepos:
checkout_all_repos(allcomps, args.branch)

os.chdir(CWD)


def handle_partial(partial):
"""
The `partial` argument to clone can be set either via command line or
via .mepoconfig. Non-default value set via command line takes precedence.
The default value of `partial` is None, and possible choices are None/blobless/treeless
"""
ALLOWED_NON_DEFAULT = ["blobless", "treeless"]
if partial is None: # default value from command line
if mepoconfig.has_option("clone", "partial"):
partial = mepoconfig.get("clone", "partial")
if partial not in ALLOWED_NON_DEFAULT:
raise ValueError(f"Invalid partial type [{partial}] in .mepoconfig")
print(f"Found partial clone type [{partial}] in .mepoconfig")
else:
partial = None

# If you pass in a registry, with clone, it could be outside the repo.
# So use the full path
passed_in_registry = False
if args.registry:
passed_in_registry = True
args.registry = os.path.abspath(args.registry)
else:
# If we don't pass in a registry, we need to "reset" the arg to the
# default name because we pass args to mepo_init
args.registry = "components.yaml"

if args.repo_url:
p = urlparse(args.repo_url)
return partial


def clone_fixture(url, branch=None, directory=None, partial=None):
if directory is None:
p = urlparse(url)
last_url_node = p.path.rsplit("/")[-1]
url_suffix = pathlib.Path(last_url_node).suffix
if args.directory:
local_clone(args.repo_url, args.branch, args.directory, partial)
os.chdir(args.directory)
else:
if url_suffix == ".git":
git_url_directory = pathlib.Path(last_url_node).stem
else:
git_url_directory = last_url_node

local_clone(args.repo_url, args.branch, git_url_directory, partial)
os.chdir(git_url_directory)

# Copy the new file into the repo only if we pass it in
if passed_in_registry:
try:
shutil.copy(args.registry, os.getcwd())
except shutil.SameFileError:
pass
directory = pathlib.Path(last_url_node).stem
git = GitRepository(url, directory)
git.clone(branch, partial)
return directory


# This tries to read the state and if not, calls init,
# loops back, and reads the state
def read_state(arg_style, arg_registry):
while True:
try:
allcomps = MepoState.read_state()
except StateDoesNotExistError:
mepo_init_run(args)
registry = get_registry(arg_registry)
mepo_init(SimpleNamespace(style=arg_style, registry=registry))
continue
break
return allcomps

max_namelen = len(max([comp.name for comp in allcomps], key=len))
for comp in allcomps:
if not comp.fixture:
git = GitRepository(comp.remote, comp.local)
version = comp.version.name
version = version.replace("origin/", "")
recurse = comp.recurse_submodules

# According to Git, treeless clones do not interact well with
# submodules. So we need to see if any comp has the recurse
# option set to True. If so, we need to clone that comp "normally"

_partial = None if partial == "treeless" and recurse else partial
def get_registry(arg_registry):
registry = "components.yaml"
if arg_registry is not None:
shutil.copy(arg_registry, os.getcwd())
registry = os.path.basename(arg_registry)
return registry

# We need the type to handle hashes in components.yaml
_type = comp.version.type
git.clone(version, recurse, _type, comp.name, _partial)
if comp.sparse:
git.sparsify(comp.sparse)
print_clone_info(comp, max_namelen)

if args.allrepos:
for comp in allcomps:
if not comp.fixture:
git = GitRepository(comp.remote, comp.local)
print(
"Checking out %s in %s"
% (
colors.YELLOW + args.branch + colors.RESET,
colors.RESET + comp.name + colors.RESET,
)
)
git.checkout(args.branch, detach=True)


def print_clone_info(comp, name_width):
ver_name_type = "({}) {}".format(comp.version.type, comp.version.name)
print("{:<{width}} | {:<s}".format(comp.name, ver_name_type, width=name_width))


def local_clone(url, branch=None, directory=None, partial=None):
cmd1 = "git clone "

if partial == "blobless":
cmd1 += "--filter=blob:none "
elif partial == "treeless":
cmd1 += "--filter=tree:0 "
else:
partial = None

if branch:
cmd1 += "--branch {} ".format(branch)
cmd1 += "--quiet {}".format(url)
if directory:
cmd1 += ' "{}"'.format(directory)
shellcmd.run(shlex.split(cmd1))
if branch:
cmd2 = f"git -C {directory} checkout --detach {branch}"
shellcmd.run(shlex.split(cmd2))
def clone_components(allcomps, partial):
max_namelen = max([len(comp.name) for comp in allcomps])
for comp in allcomps:
if comp.fixture:
continue # not cloning fixture
recurse_submodules = comp.recurse_submodules
# According to Git, treeless clones do not interact well with
# submodules. So if any comp has the recurse option set to True,
# we do a non-partial clone
partial = None if partial == "treeless" and recurse_submodules else partial
version = comp.version.name
version = version.replace("origin/", "")
git = GitRepository(comp.remote, comp.local)
git.clone(version, recurse_submodules, partial)
if comp.sparse:
git.sparsify(comp.sparse)
print_clone_info(comp.name, comp.version, max_namelen)


def print_clone_info(comp_name, comp_version, name_width):
ver_name_type = f"({comp_version.type}) {comp_version.name}"
print(f"{comp_name:<{name_width}} | {ver_name_type:<s}")


def checkout_all_repos(allcomps, branch):
if branch is None:
raise RuntimeError("`allrepos` option must be used with a branch/tag.")
for comp in allcomps:
branch_y = colors.YELLOW + args.branch + colors.RESET
print(f"Checking out {branch_y} in {comp.name}")
git = GitRepository(comp.remote, comp.local)
git.checkout(args.branch)
2 changes: 1 addition & 1 deletion src/mepo/command/reset.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def run(args):
"Namespace",
(object,),
{
"repo_url": None,
"url": None,
"directory": None,
"branch": None,
"registry": None,
Expand Down
41 changes: 16 additions & 25 deletions src/mepo/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,32 +44,23 @@ def get_local_path(self):
def get_remote_url(self):
return self.__remote

def clone(self, version, recurse, type, comp_name, partial=None):
cmd1 = "git clone "

if partial == "blobless":
cmd1 += "--filter=blob:none "
elif partial == "treeless":
cmd1 += "--filter=tree:0 "

if recurse:
cmd1 += "--recurse-submodules "

cmd1 += "--quiet {} {}".format(self.__remote, self.__local_path_abs)
try:
shellcmd.run(shlex.split(cmd1))
except sp.CalledProcessError:
raise RepoAlreadyClonedError(f"Error! Repo [{comp_name}] already cloned")

cmd2 = "git -C {} checkout {}".format(self.__local_path_abs, version)
shellcmd.run(shlex.split(cmd2))
cmd3 = "git -C {} checkout --detach".format(self.__local_path_abs)
shellcmd.run(shlex.split(cmd3))
def clone(self, version=None, recurse=None, partial=None):
"""
Execute `git clone` command
version is tag or branch
"""
PARTIAL = {"blobless": " --filter=blob:none", "treeless": " --filter=tree:0"}

cmd = "git clone "
if partial is not None:
cmd += PARTIAL[partial]
if recurse is not None:
cmd += "--recurse-submodules "
cmd += " --quiet {} {}".format(self.__remote, self.__local_path_abs)
shellcmd.run(shlex.split(cmd))

# NOTE: The above looks odd because of a quirk of git. You can't do
# git checkout --detach branch unless the branch is local. But
# since this is at clone time, all branches are remote. Thus,
# we have to do a git checkout branch and then detach.
if version is not None:
self.checkout(version)

def checkout(self, version, detach=False):
cmd = self.__git + " checkout "
Expand Down
7 changes: 6 additions & 1 deletion tests/test_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,9 @@ def test_MepoComponent():
if name == "fvdycore":
fvdycore = MepoComponent().registry_to_component(name, comp, None)
assert fvdycore == get_fvdycore_component()
assert fvdycore.serialize() == get_fvdycore_serialized()
fvdycore_serialized = fvdycore.serialize()
remote = fvdycore_serialized["remote"]
fvdycore_serialized["remote"] = remote.replace(
"git@github.com:", "https://github.com/"
)
assert fvdycore_serialized == get_fvdycore_serialized()
Loading
Loading