Skip to content

Commit

Permalink
Merge pull request #135 from ecmwf-projects/copds-1635-html-blocks
Browse files Browse the repository at this point in the history
management of html blocks in layout.json
  • Loading branch information
alex75 authored Oct 23, 2024
2 parents 2e108c1 + 238aace commit bec775c
Show file tree
Hide file tree
Showing 7 changed files with 307 additions and 14 deletions.
50 changes: 45 additions & 5 deletions cads_catalogue/contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import sqlalchemy as sa
import structlog

from cads_catalogue import config, database, object_storage
from cads_catalogue import config, database, layout_manager, object_storage

THIS_PATH = os.path.abspath(os.path.dirname(__file__))
logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -52,6 +52,9 @@ def content_sync(
site, ctype, slug = content["site"], content["type"], content["slug"]
subpath = os.path.join("contents", site, ctype, slug)
for field in OBJECT_STORAGE_UPLOAD_FIELDS:
if field == "layout":
# already done by layout manager
continue
file_path = content.get(field)
if not file_path:
continue
Expand Down Expand Up @@ -143,9 +146,7 @@ def load_content_folder(content_folder: str | pathlib.Path) -> List[dict[str, An
os.path.join(content_folder, rel_path)
)
if os.path.isfile(ancillar_file_path):
metadata[ancillar_file_field] = os.path.abspath(
os.path.join(content_folder, rel_path)
)
metadata[ancillar_file_field] = ancillar_file_path
else:
raise ValueError(
f"{metadata_file_path} contains reference to {ancillar_file_field} file not found!"
Expand All @@ -154,6 +155,44 @@ def load_content_folder(content_folder: str | pathlib.Path) -> List[dict[str, An
return ret_value


def transform_layout(
content: dict[str, Any],
storage_settings: config.ObjectStorageSettings,
):
"""
Modify layout.json information inside content metadata, with related uploads to the object storage.
Parameters
----------
content: metadata of a loaded content from files
storage_settings: object with settings to access the object storage
Returns
-------
modified version of input resource metadata
"""
if not content.get("layout"):
return content
layout_file_path = content["layout"]
if not os.path.isfile(layout_file_path):
return content
layout_folder_path = os.path.dirname(layout_file_path)
with open(layout_file_path) as fp:
layout_data = json.load(fp)
logger.debug(f"input layout_data: {layout_data}")

layout_data = layout_manager.transform_html_blocks(layout_data, layout_folder_path)

logger.debug(f"output layout_data: {layout_data}")
site, ctype, slug = content["site"], content["type"], content["slug"]
subpath = os.path.join("contents", site, ctype, slug)
content["layout"] = layout_manager.store_layout_by_data(
layout_data, content, storage_settings, subpath=subpath
)
logger.debug(f"layout url: {content['layout']}")
return content


def load_contents(contents_root_folder: str | pathlib.Path) -> List[dict[str, Any]]:
"""
Load all contents from a folder and return a dictionary of metadata extracted.
Expand Down Expand Up @@ -214,9 +253,10 @@ def update_catalogue_contents(
"loaded %s contents from folder %s" % (len(contents), contents_package_path)
)
involved_content_props = []
for content in contents:
for content in contents[:]:
site, ctype, slug = content["site"], content["type"], content["slug"]
involved_content_props.append((site, ctype, slug))
content = transform_layout(content, storage_settings)
try:
with session.begin_nested():
content_sync(session, content, storage_settings)
Expand Down
86 changes: 85 additions & 1 deletion cads_catalogue/layout_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,86 @@ def transform_cim_blocks(
return new_data


def manage_html_block_in_section(section, layout_folder_path):
"""
Look for html blocks and modify accordingly if it has references to external file.
Parameters
----------
section: section of layout.json data
layout_folder_path: path to the folder containing layout file
"""
new_section = copy.deepcopy(section)
blocks = new_section.get("blocks", [])
for i, block in enumerate(copy.deepcopy(blocks)):
if block.get("type") == "html":
block_id = block["id"]
if "content_source" in block:
content_source = block["content_source"]
source_path = os.path.abspath(
os.path.join(layout_folder_path, content_source)
)
is_content_in_block = "content" in block
if os.path.isfile(source_path):
# replacing/overwrite
if is_content_in_block:
# overwrite
msg = (
f"found html block {block_id} with both 'content' and 'content_source': "
f"applying overwrite"
)
logger.warning(msg)
with open(source_path) as fp:
blocks[i]["content"] = fp.read()
del blocks[i]["content_source"]
elif is_content_in_block:
# default
msg = (
f"found html block {block_id} with both 'content' and 'content_source': "
f"applying default (not found source {content_source})"
)
logger.warning(msg)
del blocks[i]["content_source"]
else:
# error
raise ValueError(
f"not found referred {content_source} in html block {block_id}"
)
elif block.get("type") in ("section", "accordion"):
blocks[i] = manage_html_block_in_section(block, layout_folder_path)
return new_section


def transform_html_blocks(
layout_data: dict[str, Any], layout_folder_path: str | pathlib.Path
):
"""Transform layout.json data replacing html blocks with referred external files.
Parameters
----------
layout_data: data of the layout.json to transform
layout_folder_path: path to the folder containing layout file
Returns
-------
dict: dictionary of layout_data modified
"""
new_data = copy.deepcopy(layout_data)
# search all html blocks inside body/main/sections:
body = new_data.get("body", {})
body_main = body.get("main", {})
sections = body_main.get("sections", [])
for i, section in enumerate(copy.deepcopy(sections)):
sections[i] = manage_html_block_in_section(section, layout_folder_path)
# search all html blocks inside body/aside:
aside_section = body.get("aside", {})
if aside_section:
new_data["body"]["aside"] = manage_html_block_in_section(
aside_section, layout_folder_path
)
return new_data


def has_section_id(layout_data: dict[str, Any], section_id: str):
"""
Return True if layout has section id `section_id`.
Expand All @@ -479,6 +559,7 @@ def store_layout_by_data(
layout_data: dict[str, Any],
resource: dict[str, Any],
storage_settings: config.ObjectStorageSettings,
subpath: str | None = None,
) -> str:
"""
Store a layout.json in the object storage providing its json data.
Expand All @@ -488,14 +569,16 @@ def store_layout_by_data(
layout_data: data of the layout.json to store
resource: resource dictionary (as returned by `load_resource_from_folder`)
storage_settings: object with settings to access the object storage
subpath: bucket subpath, otherwise resources/<resource_uid> is assumed
Returns
-------
str: URL of the layout.json uploaded to the object storage
"""
# upload of modified layout.json
tempdir_path = tempfile.mkdtemp()
subpath = os.path.join("resources", resource["resource_uid"])
if not subpath:
subpath = os.path.join("resources", resource["resource_uid"])
layout_temp_path = os.path.join(tempdir_path, "layout.json")
with open(layout_temp_path, "w") as fp:
json.dump(layout_data, fp, indent=2)
Expand Down Expand Up @@ -544,6 +627,7 @@ def transform_layout(
cim_layout_path = os.path.join(
cim_folder_path, resource["resource_uid"], "quality_assurance.layout.json"
)
layout_data = transform_html_blocks(layout_data, resource_folder_path)
layout_data = transform_cim_blocks(
layout_data, cim_layout_path, resource["qa_flag"]
)
Expand Down
3 changes: 2 additions & 1 deletion tests/data/cads-contents-json/how-to-api/layout.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
{
"id": "page-content",
"type": "html",
"content": "<div>TODO</div>"
"content": "<div>TODO</div>",
"content_source": "../html_block.html"
}
]
}
Expand Down
1 change: 1 addition & 0 deletions tests/data/cads-contents-json/html_block.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p>this is a content of a html block</p>
71 changes: 67 additions & 4 deletions tests/test_15_contents.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import datetime
import os.path
import unittest.mock
from operator import itemgetter
from typing import Any

import pytest_mock
import sqlalchemy as sa

from cads_catalogue import config, contents, object_storage
from cads_catalogue import config, contents, layout_manager, object_storage

THIS_PATH = os.path.abspath(os.path.dirname(__file__))
TESTDATA_PATH = os.path.join(THIS_PATH, "data")
Expand Down Expand Up @@ -216,7 +218,6 @@ def test_content_sync(
]
content1["publication_date"] = "2021-03-18T11:02:31Z"
content1["title"] = "new title"
content1["layout"] = os.path.join(content_folder, "cica-overview.png")
with session_obj() as session:
# db is not empty: update a content
db_content2 = contents.content_sync(session, content1, storage_settings)
Expand Down Expand Up @@ -253,8 +254,70 @@ def test_content_sync(
for key, value in content1.items():
if key in ("publication_date", "content_update"):
value = datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ") # type: ignore
elif key in ("layout",):
value = "an url"
elif key == "keywords":
continue
assert getattr(db_content2, key) == value


def test_transform_layout(mocker: pytest_mock.MockerFixture):
mocker.patch.object(object_storage, "store_file", return_value="an url")
_store_layout_by_data = mocker.spy(layout_manager, "store_layout_by_data")
my_settings_dict = {
"object_storage_url": "https://object/storage/url/",
"storage_admin": "admin1",
"storage_password": "secret1",
"catalogue_bucket": "mycatalogue_bucket",
"document_storage_url": "https://document/storage/url/",
}
storage_settings = config.ObjectStorageSettings(**my_settings_dict)
content_folder = os.path.join(TEST_CONTENT_ROOT_PATH, "how-to-api")
initial_md_content: dict[str, Any] = {
"site": "cds",
"type": "page",
"slug": "how-to-api",
"title": "CDSAPI setup",
"description": "Access the full data store catalogue, with search and availability features",
"publication_date": "2024-09-13T10:01:50Z",
"content_update": "2024-09-16T02:10:22Z",
"link": None,
"keywords": [],
"data": None,
"layout": os.path.join(content_folder, "layout.json"),
"image": None,
}
expected_layout_data = {
"title": "CDSAPI setup",
"description": "Access the full data store catalogue, with search and availability features",
"body": {
"main": {
"sections": [
{
"id": "main",
"blocks": [
{
"id": "page-content",
"type": "html",
"content": "<p>this is a content of a html block</p>",
}
],
}
]
}
},
}

effective_md_content = contents.transform_layout(
initial_md_content, storage_settings
)
expected_md_content = initial_md_content.copy()
expected_md_content["layout"] = "an url"

assert effective_md_content == expected_md_content
assert _store_layout_by_data.mock_calls == [
unittest.mock.call(
expected_layout_data,
expected_md_content,
storage_settings,
subpath="contents/cds/page/how-to-api",
)
]
Loading

0 comments on commit bec775c

Please sign in to comment.