Skip to content

Commit

Permalink
Merge pull request #149 from ecmwf-projects/COPDS-2349-repo-config
Browse files Browse the repository at this point in the history
catalogue manager clones input repos
  • Loading branch information
alex75 authored Feb 21, 2025
2 parents d010008 + aa628fd commit eb198f1
Show file tree
Hide file tree
Showing 12 changed files with 530 additions and 93 deletions.
107 changes: 96 additions & 11 deletions cads_catalogue/entry_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
maintenance,
manager,
messages,
repos,
skipping_utils,
validations,
)
Expand Down Expand Up @@ -168,16 +169,100 @@ def init_db(connection_string: Optional[str] = None, force: bool = False) -> Non
logger.info("successfully created/updated the catalogue db structure")


@app.command()
def clone_update_catalogue(
repo_config_path: str,
overrides_path: Optional[str] = None,
contents_config_path: Optional[str] = None,
connection_string: Optional[str] = None,
force: bool = False,
delete_orphans: bool = True,
include: List[str] = [],
exclude: List[str] = [],
exclude_resources: bool = False,
exclude_licences: bool = False,
exclude_messages: bool = False,
exclude_contents: bool = False,
) -> None:
"""Clone source repositories and update the database with the catalogue data.
Parameters
----------
:param repo_config_path: path of the file yaml containing source repositories to clone
:param overrides_path: path of the file yaml containing overriding metadata
:param contents_config_path = path of the file yaml containing template variables for contents
:param connection_string: something like 'postgresql://user:password@netloc:port/dbname'
:param force: if True, run update regardless input folders has no changes from last update (default False)
:param delete_orphans: if True, delete resources/licences not involved. False if using include/exclude
:param include: if specified, pattern for resource uids to include in the update
:param exclude: if specified, pattern for resource uids to exclude from the update
:param exclude_resources: if True, do not consider input resources (default False)
:param exclude_licences: if True, do not consider input licences (default False)
:param exclude_messages: if True, do not consider input messages (default False)
:param exclude_contents: if True, do not consider input contents (default False)
"""
cads_common.logging.structlog_configure()
cads_common.logging.logging_configure()
config_paths = {
"contents_config_path": contents_config_path,
"overrides_path": overrides_path,
}
filtering_kwargs: Dict[str, Any] = {
"include": include,
"exclude": exclude,
"exclude_resources": exclude_resources,
"exclude_messages": exclude_messages,
"exclude_contents": exclude_contents,
"exclude_licences": exclude_licences,
}
repos_info = repos.parse_repos_config(repo_config_path, filtering_kwargs)
repos_info_cloned = repos.clone_repositories(repos_info, root_path=PACKAGE_DIR)
input_paths: dict[str, Any] = {
"resources_folder_path": None,
"cim_folder_path": None,
"messages_folder_path": None,
"licences_folder_path": None,
"contents_folder_path": None,
}
if not exclude_resources:
input_paths["resources_folder_path"] = [
r["clone_path"] for r in repos_info_cloned["cads-forms-json"]
]
input_paths["cim_folder_path"] = repos_info_cloned["cads-forms-cim-json"][0][
"clone_path"
]
if not exclude_messages:
input_paths["messages_folder_path"] = repos_info_cloned["cads-messages"][0][
"clone_path"
]
if not exclude_licences:
input_paths["licences_folder_path"] = repos_info_cloned["cads-licences"][0][
"clone_path"
]
if not exclude_contents:
input_paths["contents_folder_path"] = repos_info_cloned["cads-contents-json"][
0
]["clone_path"]
update_catalogue(
connection_string=connection_string,
force=force,
delete_orphans=delete_orphans,
**input_paths, # type: ignore
**config_paths, # type: ignore
**filtering_kwargs, # type: ignore
)


@app.command()
def update_catalogue(
overrides_path: Optional[str] = None,
resources_folder_path: Annotated[List[str], typer.Option()] = [
resources_folder_path: Annotated[List[str] | None, typer.Option()] = [
os.path.join(PACKAGE_DIR, "cads-forms-json")
],
messages_folder_path: str = os.path.join(PACKAGE_DIR, "cads-messages"),
licences_folder_path: str = os.path.join(PACKAGE_DIR, "cads-licences"),
cim_folder_path: str = os.path.join(PACKAGE_DIR, "cads-forms-cim-json"),
contents_folder_path: str = os.path.join(PACKAGE_DIR, "cads-contents-json"),
messages_folder_path: str | None = os.path.join(PACKAGE_DIR, "cads-messages"),
licences_folder_path: str | None = os.path.join(PACKAGE_DIR, "cads-licences"),
cim_folder_path: str | None = os.path.join(PACKAGE_DIR, "cads-forms-cim-json"),
contents_folder_path: str | None = os.path.join(PACKAGE_DIR, "cads-contents-json"),
contents_config_path: Optional[str] = None,
connection_string: Optional[str] = None,
force: bool = False,
Expand Down Expand Up @@ -217,7 +302,7 @@ def update_catalogue(
dbsettings = config.ensure_settings(config.dbsettings)
connection_string = dbsettings.connection_string
repo_paths = {
"metadata_repo": resources_folder_path,
"metadata_repo": resources_folder_path, # it's a list
"cim_repo": cim_folder_path,
"message_repo": messages_folder_path,
"licence_repo": licences_folder_path,
Expand Down Expand Up @@ -254,16 +339,16 @@ def update_catalogue(
logger.info("db updating of licences")
involved_licences = licence_manager.update_catalogue_licences(
session,
licences_folder_path,
licences_folder_path, # type: ignore
storage_settings,
)
if "datasets" in to_process:
logger.info("db updating of datasets")
force_datasets = force or "licences" in to_process
involved_resource_uids = manager.update_catalogue_resources(
session,
resources_folder_path,
cim_folder_path,
resources_folder_path, # type: ignore
cim_folder_path, # type: ignore
storage_settings,
force=force_datasets,
include=include,
Expand All @@ -272,12 +357,12 @@ def update_catalogue(
)
if "messages" in to_process:
logger.info("db updating of messages")
messages.update_catalogue_messages(session, messages_folder_path)
messages.update_catalogue_messages(session, messages_folder_path) # type: ignore
if "contents" in to_process:
logger.info("db updating of contents")
contents.update_catalogue_contents(
session,
contents_folder_path,
contents_folder_path, # type: ignore
storage_settings,
yaml_path=contents_config_path,
)
Expand Down
40 changes: 11 additions & 29 deletions cads_catalogue/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,30 +72,6 @@ def compute_config_hash(resource: dict[str, Any]) -> str:
return ret_value.hexdigest() # type: ignore


def get_git_hashes(folder_map: dict[str, str]) -> Dict[str, str]:
"""
Return last commit hashes of labelled folders.
Parameters
----------
folder_map: {'folder_label': 'folder_path'}
Returns
-------
{'folder_label': 'git_hash'}
"""
current_hashes = dict()
for folder_label, folder_path in folder_map.items():
try:
current_hashes[folder_label] = utils.get_last_commit_hash(folder_path)
except Exception: # noqa
logger.exception(
f"no check on commit hash for folder '{folder_path}, error follows"
)
current_hashes[folder_label] = None
return current_hashes


def get_status_of_last_update(session: sa.orm.session.Session) -> Dict[str, Any] | None:
"""
Return last stored git hashes and other information from table catalogue_updates.
Expand Down Expand Up @@ -694,22 +670,28 @@ def update_related_resources(session: sa.orm.session.Session):
def prerun_processing(repo_paths, connection_string, filtering_kwargs) -> None:
"""Preliminary processing for the catalogue manager."""
logger.info("additional input checks")
for repo_key, filter_key in [
("cim_repo", "exclude_resources"),
("licence_repo", "exclude_licences"),
("message_repo", "exclude_messages"),
("content_repo", "exclude_contents"),
for repo_key, filter_key, category in [
("cim_repo", "exclude_resources", "resources"),
("licence_repo", "exclude_licences", "licences"),
("message_repo", "exclude_messages", "messages"),
("content_repo", "exclude_contents", "contents"),
]:
repo_path = repo_paths[repo_key]
exclude = filtering_kwargs[filter_key]
if not repo_path and not exclude:
raise ValueError(f"missing required input folder for {category}")
if not os.path.isdir(repo_path) and not exclude:
raise ValueError(f"'{repo_path}' is not a folder")
for repo_key, filter_key in [
("metadata_repo", "exclude_resources"),
]:
repo_paths_list = repo_paths[repo_key]
exclude = filtering_kwargs[filter_key]
if not repo_paths_list and not exclude:
raise ValueError("missing required input folder for resources")
for repo_path in repo_paths_list:
if not repo_path and not exclude:
raise ValueError("missing required input folder for resources")
if not os.path.isdir(repo_path) and not exclude:
raise ValueError(f"'{repo_path}' is not a folder")

Expand Down
1 change: 0 additions & 1 deletion cads_catalogue/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

import cads_catalogue

THIS_PATH = os.path.abspath(os.path.dirname(__file__))
logger = structlog.get_logger(__name__)


Expand Down
Loading

0 comments on commit eb198f1

Please sign in to comment.