From a4821e12c9e135984b156be7b4120d075a14d265 Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Wed, 15 Jan 2025 11:27:50 +0200 Subject: [PATCH 1/7] chore: [FC-0063] `attrs` dependency is added --- requirements/base.in | 1 + requirements/base.txt | 2 ++ requirements/ci.txt | 4 ++++ requirements/constraints.txt | 2 ++ requirements/dev.txt | 5 +++++ requirements/quality.txt | 4 ++++ requirements/test.txt | 4 ++++ 7 files changed, 22 insertions(+) diff --git a/requirements/base.in b/requirements/base.in index 775e0bfe..bb8cb621 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,6 +1,7 @@ # Core requirements for this package Django +attrs lxml requests youtube-dl diff --git a/requirements/base.txt b/requirements/base.txt index b8ebc4a2..adf2a821 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,6 +6,8 @@ # asgiref==3.8.1 # via django +attrs==24.3.0 + # via -r requirements/base.in backports-zoneinfo==0.2.1 # via django certifi==2024.12.14 diff --git a/requirements/ci.txt b/requirements/ci.txt index 01968445..4ffb6cb7 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -8,6 +8,10 @@ asgiref==3.8.1 # via # -r /home/misha/work/cc2olx/requirements/quality.txt # django +attrs==24.3.0 + # via + # -c /home/misha/work/cc2olx/requirements/constraints.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt backports-zoneinfo==0.2.1 # via # -r /home/misha/work/cc2olx/requirements/quality.txt diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 94595ab1..f6629968 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -7,3 +7,5 @@ # link to other information that will help people in the future to remove the # pin when possible. Writing an issue against the offending project and # linking to it here is good. + +attrs==24.3.0 diff --git a/requirements/dev.txt b/requirements/dev.txt index e212fcf2..192b7890 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -9,6 +9,11 @@ asgiref==3.8.1 # -r /home/misha/work/cc2olx/requirements/ci.txt # -r /home/misha/work/cc2olx/requirements/quality.txt # django +attrs==24.3.0 + # via + # -c /home/misha/work/cc2olx/requirements/constraints.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt backports-tarfile==1.2.0 # via jaraco-context backports-zoneinfo==0.2.1 diff --git a/requirements/quality.txt b/requirements/quality.txt index d493f5a5..9ee3dc3d 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -8,6 +8,10 @@ asgiref==3.8.1 # via # -r /home/misha/work/cc2olx/requirements/test.txt # django +attrs==24.3.0 + # via + # -c /home/misha/work/cc2olx/requirements/constraints.txt + # -r /home/misha/work/cc2olx/requirements/test.txt backports-zoneinfo==0.2.1 # via # -r /home/misha/work/cc2olx/requirements/test.txt diff --git a/requirements/test.txt b/requirements/test.txt index 1c6cc62f..22b89548 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -8,6 +8,10 @@ asgiref==3.8.1 # via # -r /home/misha/work/cc2olx/requirements/base.txt # django +attrs==24.3.0 + # via + # -c /home/misha/work/cc2olx/requirements/constraints.txt + # -r /home/misha/work/cc2olx/requirements/base.txt backports-zoneinfo==0.2.1 # via # -r /home/misha/work/cc2olx/requirements/base.txt From 8b81e3d33737af04a2069c3000bdc799b6547ddd Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Wed, 15 Jan 2025 11:47:27 +0200 Subject: [PATCH 2/7] feat: [FC-0063] Block type processors are implemented --- MANIFEST.in | 1 + src/cc2olx/constants.py | 8 +- src/cc2olx/content_parsers/__init__.py | 15 + src/cc2olx/content_parsers/abc.py | 31 ++ src/cc2olx/content_parsers/discussion.py | 44 ++ src/cc2olx/content_parsers/html.py | 140 +++++ src/cc2olx/content_parsers/lti.py | 85 +++ src/cc2olx/content_parsers/mixins.py | 30 ++ src/cc2olx/content_parsers/qti.py | 351 ++++++++++++ src/cc2olx/content_parsers/utils.py | 127 +++++ src/cc2olx/content_parsers/video.py | 20 + src/cc2olx/content_processors.py | 89 ++++ src/cc2olx/dataclasses.py | 107 +++- src/cc2olx/enums.py | 28 + src/cc2olx/exceptions.py | 4 + src/cc2olx/models.py | 26 +- src/cc2olx/olx_generators/__init__.py | 15 + src/cc2olx/olx_generators/abc.py | 21 + src/cc2olx/olx_generators/discussion.py | 32 ++ src/cc2olx/olx_generators/html.py | 60 +++ src/cc2olx/olx_generators/lti.py | 43 ++ src/cc2olx/olx_generators/qti.py | 305 +++++++++++ src/cc2olx/olx_generators/video.py | 18 + src/cc2olx/templates/external_webcontent.html | 10 + src/cc2olx/templates/image_webcontent.html | 10 + src/cc2olx/xml/__init__.py | 0 src/cc2olx/xml/cc_xml.py | 501 ++++++++++++++++++ 27 files changed, 2114 insertions(+), 7 deletions(-) create mode 100644 src/cc2olx/content_parsers/__init__.py create mode 100644 src/cc2olx/content_parsers/abc.py create mode 100644 src/cc2olx/content_parsers/discussion.py create mode 100644 src/cc2olx/content_parsers/html.py create mode 100644 src/cc2olx/content_parsers/lti.py create mode 100644 src/cc2olx/content_parsers/mixins.py create mode 100644 src/cc2olx/content_parsers/qti.py create mode 100644 src/cc2olx/content_parsers/utils.py create mode 100644 src/cc2olx/content_parsers/video.py create mode 100644 src/cc2olx/content_processors.py create mode 100644 src/cc2olx/enums.py create mode 100644 src/cc2olx/exceptions.py create mode 100644 src/cc2olx/olx_generators/__init__.py create mode 100644 src/cc2olx/olx_generators/abc.py create mode 100644 src/cc2olx/olx_generators/discussion.py create mode 100644 src/cc2olx/olx_generators/html.py create mode 100644 src/cc2olx/olx_generators/lti.py create mode 100644 src/cc2olx/olx_generators/qti.py create mode 100644 src/cc2olx/olx_generators/video.py create mode 100644 src/cc2olx/templates/external_webcontent.html create mode 100644 src/cc2olx/templates/image_webcontent.html create mode 100644 src/cc2olx/xml/__init__.py create mode 100644 src/cc2olx/xml/cc_xml.py diff --git a/MANIFEST.in b/MANIFEST.in index e8e2cb12..78416589 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ include LICENSE include README.rst +recursive-include src/cc2olx/templates * recursive-include requirements * recursive-include tests * recursive-exclude * __pycache__ diff --git a/src/cc2olx/constants.py b/src/cc2olx/constants.py index c96e8937..1a3b14ff 100644 --- a/src/cc2olx/constants.py +++ b/src/cc2olx/constants.py @@ -1,3 +1,9 @@ -CDATA_PATTERN = r".*?)\]\]>" OLX_STATIC_DIR = "static" OLX_STATIC_PATH_TEMPLATE = f"/{OLX_STATIC_DIR}/{{static_filename}}" +WEB_RESOURCES_DIR_NAME = "web_resources" + +LINK_HTML = "{text}" +YOUTUBE_LINK_PATTERN = r"youtube.com/watch\?v=(?P[-\w]+)" +CDATA_PATTERN = r".*?)\]\]>" + +QTI_RESPROCESSING_TYPES = ["general_fb", "correct_fb", "general_incorrect_fb"] diff --git a/src/cc2olx/content_parsers/__init__.py b/src/cc2olx/content_parsers/__init__.py new file mode 100644 index 00000000..269855b2 --- /dev/null +++ b/src/cc2olx/content_parsers/__init__.py @@ -0,0 +1,15 @@ +from cc2olx.content_parsers.abc import AbstractContentParser +from cc2olx.content_parsers.discussion import DiscussionContentParser +from cc2olx.content_parsers.html import HtmlContentParser +from cc2olx.content_parsers.lti import LtiContentParser +from cc2olx.content_parsers.qti import QtiContentParser +from cc2olx.content_parsers.video import VideoContentParser + +__all__ = [ + "AbstractContentParser", + "DiscussionContentParser", + "HtmlContentParser", + "LtiContentParser", + "QtiContentParser", + "VideoContentParser", +] diff --git a/src/cc2olx/content_parsers/abc.py b/src/cc2olx/content_parsers/abc.py new file mode 100644 index 00000000..6a9c04d5 --- /dev/null +++ b/src/cc2olx/content_parsers/abc.py @@ -0,0 +1,31 @@ +from abc import ABC, abstractmethod +from typing import Optional, Union + +from cc2olx.content_parsers.utils import StaticLinkProcessor +from cc2olx.dataclasses import ContentParserContext +from cc2olx.models import Cartridge + + +class AbstractContentParser(ABC): + """ + Abstract base class for parsing Common Cartridge content. + """ + + def __init__(self, cartridge: Cartridge, context: ContentParserContext) -> None: + self._cartridge = cartridge + self._context = context + + def parse(self, idref: Optional[str]) -> Optional[Union[list, dict]]: + """ + Parse the resource with the specified identifier. + """ + if content := self._parse_content(idref): + link_processor = StaticLinkProcessor(self._cartridge, self._context.relative_links_source) + content = link_processor.process_content_static_links(content) + return content + + @abstractmethod + def _parse_content(self, idref: Optional[str]) -> Optional[Union[list, dict]]: + """ + Parse content of the resource with the specified identifier. + """ diff --git a/src/cc2olx/content_parsers/discussion.py b/src/cc2olx/content_parsers/discussion.py new file mode 100644 index 00000000..0977ce19 --- /dev/null +++ b/src/cc2olx/content_parsers/discussion.py @@ -0,0 +1,44 @@ +import re +from typing import Dict, Optional + +from cc2olx import filesystem +from cc2olx.content_parsers import AbstractContentParser +from cc2olx.enums import CommonCartridgeResourceType +from cc2olx.models import ResourceFile + + +class DiscussionContentParser(AbstractContentParser): + """ + Discussion resource content parser. + """ + + def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]: + if idref: + if resource := self._cartridge.define_resource(idref): + if re.match(CommonCartridgeResourceType.DISCUSSION_TOPIC, resource["type"]): + return self._parse_discussion(resource) + return None + + def _parse_discussion(self, resource: dict) -> Dict[str, str]: + """ + Parse the discussion content. + """ + data = {} + + for child in resource["children"]: + if isinstance(child, ResourceFile): + data.update(self._parse_resource_file_data(child, resource["type"])) + + return data + + def _parse_resource_file_data(self, resource_file: ResourceFile, resource_type: str) -> Dict[str, str]: + """ + Parse the discussion resource file. + """ + tree = filesystem.get_xml_tree(self._cartridge.build_resource_file_path(resource_file.href)) + root = tree.getroot() + + return { + "title": root.get_title(resource_type).text, + "text": root.get_text(resource_type).text, + } diff --git a/src/cc2olx/content_parsers/html.py b/src/cc2olx/content_parsers/html.py new file mode 100644 index 00000000..187fe44b --- /dev/null +++ b/src/cc2olx/content_parsers/html.py @@ -0,0 +1,140 @@ +import imghdr +import logging +import re +from pathlib import Path +from typing import Dict, Optional + +from django.conf import settings + +from cc2olx.constants import LINK_HTML, OLX_STATIC_PATH_TEMPLATE, WEB_RESOURCES_DIR_NAME +from cc2olx.content_parsers import AbstractContentParser +from cc2olx.content_parsers.mixins import WebLinkParserMixin +from cc2olx.enums import CommonCartridgeResourceType + +logger = logging.getLogger() + +HTML_FILENAME_SUFFIX = ".html" + + +class HtmlContentParser(WebLinkParserMixin, AbstractContentParser): + """ + HTML resource content parser. + """ + + DEFAULT_CONTENT = {"html": "

MISSING CONTENT

"} + + def _parse_content(self, idref: Optional[str]) -> Dict[str, str]: + if idref: + resource = self._cartridge.define_resource(idref) + if resource is None: + logger.info("Missing resource: %s", idref) + content = self.DEFAULT_CONTENT + elif resource["type"] == CommonCartridgeResourceType.WEB_CONTENT: + content = self._parse_webcontent(idref, resource) + elif web_link_content := self._parse_web_link_content(resource): + content = self._transform_web_link_content_to_html(web_link_content) + elif self.is_known_unprocessed_resource_type(resource["type"]): + content = self.DEFAULT_CONTENT + else: + content = self._parse_not_imported_content(resource) + return content + return self.DEFAULT_CONTENT + + def _parse_webcontent(self, idref: str, resource: dict) -> Dict[str, str]: + """ + Parse the resource with "webcontent" type. + """ + resource_file = resource["children"][0] + resource_relative_link = resource_file.href + resource_file_path = self._cartridge.build_resource_file_path(resource_relative_link) + + if resource_file_path.suffix == HTML_FILENAME_SUFFIX: + content = self._parse_webcontent_html_file(idref, resource_file_path) + elif WEB_RESOURCES_DIR_NAME in str(resource_file_path) and imghdr.what(str(resource_file_path)): + content = self._parse_image_webcontent_from_web_resources_dir(resource_file_path) + elif WEB_RESOURCES_DIR_NAME not in str(resource_file_path): + content = self._parse_webcontent_outside_web_resources_dir(resource_relative_link) + else: + logger.info("Skipping webcontent: %s", resource_file_path) + content = self.DEFAULT_CONTENT + + return content + + @staticmethod + def _parse_webcontent_html_file(idref: str, resource_file_path: Path) -> Dict[str, str]: + """ + Parse webcontent HTML file. + """ + try: + with open(resource_file_path, encoding="utf-8") as resource_file: + html = resource_file.read() + except: # noqa: E722 + logger.error("Failure reading %s from id %s", resource_file_path, idref) # noqa: E722 + raise + return {"html": html} + + def _parse_image_webcontent_from_web_resources_dir(self, resource_file_path: Path) -> Dict[str, str]: + """ + Parse webcontent image from "web_resources" directory. + """ + static_filename = str(resource_file_path).split(f"{WEB_RESOURCES_DIR_NAME}/")[1] + olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename) + self._cartridge.olx_to_original_static_file_paths.add_web_resource_path(olx_static_path, resource_file_path) + image_webcontent_tpl_path = settings.TEMPLATES_DIR / "image_webcontent.html" + + with open(image_webcontent_tpl_path, encoding="utf-8") as image_webcontent_tpl: + tpl_content = image_webcontent_tpl.read() + html = tpl_content.format(olx_static_path=olx_static_path, static_filename=static_filename) + + return {"html": html} + + def _parse_webcontent_outside_web_resources_dir(self, resource_relative_path: str) -> Dict[str, str]: + """ + Parse webcontent located outside "web_resources" directory. + """ + # This webcontent is outside ``web_resources`` directory + # So we need to manually copy it to OLX_STATIC_DIR + olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=resource_relative_path) + self._cartridge.olx_to_original_static_file_paths.add_extra_path(olx_static_path, resource_relative_path) + external_webcontent_tpl_path = settings.TEMPLATES_DIR / "external_webcontent.html" + + with open(external_webcontent_tpl_path, encoding="utf-8") as external_webcontent_tpl: + tpl_content = external_webcontent_tpl.read() + html = tpl_content.format(olx_static_path=olx_static_path, resource_relative_path=resource_relative_path) + + return {"html": html} + + @staticmethod + def _transform_web_link_content_to_html(web_link_content: Dict[str, str]) -> Dict[str, str]: + """ + Generate HTML for weblink. + """ + video_link_html = LINK_HTML.format(url=web_link_content["href"], text=web_link_content.get("text", "")) + return {"html": video_link_html} + + @staticmethod + def is_known_unprocessed_resource_type(resource_type: str) -> bool: + """ + Decides whether the resource type is a known CC type to be unprocessed. + """ + return any( + re.match(type_pattern, resource_type) + for type_pattern in ( + CommonCartridgeResourceType.LTI_LINK, + CommonCartridgeResourceType.QTI_ASSESSMENT, + CommonCartridgeResourceType.DISCUSSION_TOPIC, + ) + ) + + @staticmethod + def _parse_not_imported_content(resource: dict) -> Dict[str, str]: + """ + Parse the resource which content type cannot be processed. + """ + resource_type = resource["type"] + text = f"Not imported content: type = {resource_type!r}" + if "href" in resource: + text += ", href = {!r}".format(resource["href"]) + + logger.info("%s", text) + return {"html": text} diff --git a/src/cc2olx/content_parsers/lti.py b/src/cc2olx/content_parsers/lti.py new file mode 100644 index 00000000..521f3c40 --- /dev/null +++ b/src/cc2olx/content_parsers/lti.py @@ -0,0 +1,85 @@ +import re +from typing import Dict, Optional + +from cc2olx import filesystem +from cc2olx.content_parsers import AbstractContentParser +from cc2olx.enums import CommonCartridgeResourceType +from cc2olx.utils import simple_slug +from cc2olx.xml import cc_xml + + +class LtiContentParser(AbstractContentParser): + """ + LTI resource content parser. + """ + + DEFAULT_WIDTH = "500" + DEFAULT_HEIGHT = "500" + + def _parse_content(self, idref: Optional[str]) -> Optional[dict]: + if idref: + if resource := self._cartridge.define_resource(idref): + if re.match(CommonCartridgeResourceType.LTI_LINK, resource["type"]): + data = self._parse_lti(resource) + # Canvas flavored courses have correct url in module meta for lti links + if self._cartridge.is_canvas_flavor: + if item_data := self._cartridge.module_meta.get_external_tool_item_data(idref): + data["launch_url"] = item_data.get("url", data["launch_url"]) + return data + return None + + def _parse_lti(self, resource: dict) -> dict: + """ + Parse LTI resource. + """ + resource_file = resource["children"][0] + resource_file_path = self._cartridge.build_resource_file_path(resource_file.href) + tree = filesystem.get_xml_tree(resource_file_path) + root = tree.getroot() + title = root.title.text + + return { + "title": title, + "description": root.description.text, + "launch_url": self._parse_launch_url(root), + "height": self._parse_height(root), + "width": self._parse_width(root), + "custom_parameters": self._parse_custom_parameters(root), + "lti_id": self._parse_lti_id(root, title), + } + + def _parse_launch_url(self, resource_root: cc_xml.BasicLtiLink) -> str: + """ + Parse URL to launch LTI. + """ + if (launch_url := resource_root.secure_launch_url) is None: + launch_url = resource_root.launch_url + return getattr(launch_url, "text", "") + + def _parse_width(self, resource_root: cc_xml.BasicLtiLink) -> str: + """ + Parse width. + """ + return getattr(resource_root.width, "text", self.DEFAULT_WIDTH) + + def _parse_height(self, resource_root: cc_xml.BasicLtiLink) -> str: + """ + Parse height. + """ + return getattr(resource_root.height, "text", self.DEFAULT_HEIGHT) + + def _parse_custom_parameters(self, resource_root: cc_xml.BasicLtiLink) -> Dict[str, str]: + """ + Parse custom parameters. + """ + custom = resource_root.custom + return {} if custom is None else {option.get("name"): option.text for option in custom} + + def _parse_lti_id(self, resource_root: cc_xml.BasicLtiLink, title: str) -> str: + """ + Parse LTI identifier. + + For Canvas flavored CC, tool_id is used as lti_id if present. + """ + tool_id = resource_root.canvas_tool_id + return simple_slug(title) if tool_id is None else tool_id.text diff --git a/src/cc2olx/content_parsers/mixins.py b/src/cc2olx/content_parsers/mixins.py new file mode 100644 index 00000000..58c7d0f9 --- /dev/null +++ b/src/cc2olx/content_parsers/mixins.py @@ -0,0 +1,30 @@ +import re +from typing import Dict, Optional + +from cc2olx import filesystem +from cc2olx.enums import CommonCartridgeResourceType +from cc2olx.models import Cartridge + + +class WebLinkParserMixin: + """ + Provide Common Cartridge Web Link resource parsing functionality. + """ + + _cartridge: Cartridge + + def _parse_web_link_content(self, resource: dict) -> Optional[Dict[str, str]]: + """ + Provide Web Link resource data. + """ + resource_type = resource["type"] + if re.match(CommonCartridgeResourceType.WEB_LINK, resource_type): + resource_file = resource["children"][0] + resource_file_path = self._cartridge.build_resource_file_path(resource_file.href) + tree = filesystem.get_xml_tree(resource_file_path) + root = tree.getroot() + return { + "href": root.get_url(resource_type).get("href"), + "text": root.get_title(resource_type).text, + } + return None diff --git a/src/cc2olx/content_parsers/qti.py b/src/cc2olx/content_parsers/qti.py new file mode 100644 index 00000000..c588e5c1 --- /dev/null +++ b/src/cc2olx/content_parsers/qti.py @@ -0,0 +1,351 @@ +import functools +import logging +import re +from collections import OrderedDict +from pathlib import Path +from typing import Callable, Dict, List, Optional, OrderedDict as OrderedDictType, Union + +from cc2olx import filesystem +from cc2olx.constants import QTI_RESPROCESSING_TYPES +from cc2olx.content_parsers import AbstractContentParser +from cc2olx.dataclasses import FibProblemRawAnswers +from cc2olx.enums import CommonCartridgeResourceType, QtiQuestionType +from cc2olx.exceptions import QtiError +from cc2olx.xml import cc_xml + +logger = logging.getLogger() + + +class QtiContentParser(AbstractContentParser): + """ + QTI resource content parser. + """ + + def _parse_content(self, idref: Optional[str]) -> Optional[List[dict]]: + if idref: + if resource := self._cartridge.define_resource(idref): + if re.match(CommonCartridgeResourceType.QTI_ASSESSMENT, resource["type"]): + resource_file = resource["children"][0] + resource_file_path = self._cartridge.build_resource_file_path(resource_file.href) + return self._parse_qti(resource_file_path) + return None + + def _parse_qti(self, resource_file_path: Path) -> List[dict]: + """ + Parse resource of ``imsqti_xmlv1p2/imscc_xmlv1p1/assessment`` type. + """ + tree = filesystem.get_xml_tree(resource_file_path) + root = tree.getroot() + + parsed_problems = [] + + for index, problem in enumerate(root.items): + parsed_problems.append(self._parse_problem(problem, index, resource_file_path)) + + return parsed_problems + + def _parse_problem(self, problem: cc_xml.QtiItem, problem_index: int, resource_file_path: Path) -> dict: + """ + Parse a QTI item. + + When the malformed course (due to a weird Canvas behaviour) with equal + identifiers is gotten, a unique string is added to the raw identifier. + LMS doesn't support blocks with the same identifiers. + """ + data = {} + + attributes = problem.attrib + + data["ident"] = attributes["ident"] + str(problem_index) + if title := attributes.get("title"): + data["title"] = title + + cc_profile = problem.profile + data["cc_profile"] = cc_profile + + parse_problem = self._problem_parsers_map.get(cc_profile) + + if parse_problem is None: + raise QtiError(f'Unknown cc_profile: "{cc_profile}"') + + try: + data.update(parse_problem(problem)) + except NotImplementedError: + logger.info("Problem with ID %s can't be converted.", problem.attrib.get("ident")) + logger.info(" Profile %s is not supported.", cc_profile) + logger.info(" At file %s.", resource_file_path) + + return data + + @functools.cached_property + def _problem_parsers_map(self) -> Dict[QtiQuestionType, Callable[[cc_xml.QtiItem], dict]]: + """ + Provide mapping between CC profile value and problem node type parser. + + Note: Since True/False problems in QTI are constructed identically to + QTI Multiple Choice problems, we reuse `_parse_multiple_choice_problem` + for BOOLEAN type problems. + """ + return { + QtiQuestionType.MULTIPLE_CHOICE: self._parse_multiple_choice_problem, + QtiQuestionType.MULTIPLE_RESPONSE: self._parse_multiple_response_problem, + QtiQuestionType.FILL_IN_THE_BLANK: self._parse_fib_problem, + QtiQuestionType.ESSAY: self._parse_essay_problem, + QtiQuestionType.BOOLEAN: self._parse_multiple_choice_problem, + QtiQuestionType.PATTERN_MATCH: self._parse_pattern_match_problem, + } + + def _parse_fixed_answer_question_responses( + self, + presentation: cc_xml.QtiPresentation, + ) -> OrderedDictType[str, Dict[str, Union[bool, str]]]: + """ + Provide mapping with response IDs as keys and response data as values. + + Example of ```` structure for the following profiles: + - ``cc.multiple_choice.v0p1`` + - ``cc.multiple_response.v0p1`` + - ``cc.true_false.v0p1`` + ``` + + + + + Response 1 + + + + + Response 2 + + + + + ``` + """ + responses = OrderedDict() + + for response in presentation.response_labels: + response_id = response.attrib["ident"] + responses[response_id] = {"text": response.mattext.text or "", "correct": False} + + return responses + + def _mark_correct_responses(self, resprocessing: cc_xml.QtiResprocessing, responses: OrderedDict) -> None: + """ + Add the information about correctness to responses data. + + Example of ```` structure for the following profiles: + - ``cc.multiple_choice.v0p1`` + - ``cc.true_false.v0p1`` + ``` + + + + + + + 8157 + + + + + + 5534 + + + + + + 4226 + + 100 + + + + ``` + + This XML is a sort of instruction about how responses should be evaluated. In this + particular example we have three correct answers with ids: 8157, 5534, 4226. + + Example of ```` structure for ``cc.multiple_response.v0p1``: + ``` + + + + + + + + 1759 + + 5954 + + 8170 + 9303 + + 15 + + + + + + ``` + Above example is for a multiple response type problem. In this example 1759, 8170 and + 9303 are correct answers while 15 and 5954 are not. Note that this code also support + ``or`` opearator too. + + For now, we just consider these responses correct in OLX, but according specification, + conditions can be arbitrarily nested, and score can be computed by some formula, so to + implement 100% conversion we need to write new XBlock. + """ + for respcondition in resprocessing.respconditions: + correct_answers = respcondition.varequals + + if len(correct_answers) == 0: + correct_answers = respcondition.and_varequals + correct_answers += respcondition.or_varequals + + for answer in correct_answers: + responses[answer.text]["correct"] = True + + if respcondition.attrib.get("continue", "No") == "No": + break + + def _parse_multiple_choice_problem(self, problem: cc_xml.QtiItem) -> dict: + """ + Provide the multiple choice problem data. + """ + choices = self._parse_fixed_answer_question_responses(problem.presentation) + self._mark_correct_responses(problem.resprocessing, choices) + + return { + "problem_description": problem.description, + "choices": choices, + } + + def _parse_multiple_response_problem(self, problem: cc_xml.QtiItem) -> dict: + """ + Provide the multiple response problem data. + """ + return self._parse_multiple_choice_problem(problem) + + def _parse_fib_problem(self, problem: cc_xml.QtiItem) -> dict: + """ + Provide the Fill-In-The-Blank problem data. + """ + return { + "problem_description": problem.description, + **self._parse_fib_problem_answers(problem), + } + + def _parse_fib_problem_answers(self, problem: cc_xml.QtiItem) -> dict: + """ + Parse the Fill-In-The-Blank problem answers data. + """ + raw_answers = self._parse_fib_problem_raw_answers(problem) + + data = {"is_regexp": bool(raw_answers.answer_patterns)} + + if data["is_regexp"]: + data.update(self._build_fib_problem_regexp_answers(raw_answers)) + else: + data.update(self._build_fib_problem_exact_answers(raw_answers)) + return data + + def _parse_fib_problem_raw_answers(self, problem: cc_xml.QtiItem) -> FibProblemRawAnswers: + """ + Parse the Fill-In-The-Blank problem answers without processing. + """ + exact_answers = [] + answer_patterns = [] + + resprocessing = problem.resprocessing + + for respcondition in resprocessing.respconditions: + for varequal in respcondition.varequals: + exact_answers.append(varequal.text) + + for varsubstring in respcondition.varsubstrings: + answer_patterns.append(varsubstring.text) + + if respcondition.attrib.get("continue", "No") == "No": + break + + return FibProblemRawAnswers(exact_answers, answer_patterns) + + @staticmethod + def _build_fib_problem_regexp_answers(raw_answers: FibProblemRawAnswers) -> dict: + """ + Build the Fill-In-The-Blank problem regular expression answers data. + """ + exact_answers = raw_answers.exact_answers.copy() + answer_patterns = raw_answers.answer_patterns.copy() + + data = {"answer": answer_patterns.pop(0)} + exact_answers = [re.escape(answer) for answer in exact_answers] + data["additional_answers"] = [*answer_patterns, *exact_answers] + + return data + + @staticmethod + def _build_fib_problem_exact_answers(raw_answers: FibProblemRawAnswers) -> dict: + """ + Build the Fill-In-The-Blank problem exact answers data. + """ + # Primary answer is the first one, additional answers are what is left + exact_answers = raw_answers.exact_answers.copy() + + return { + "answer": exact_answers.pop(0), + "additional_answers": exact_answers, + } + + def _parse_essay_problem(self, problem: cc_xml.QtiItem) -> dict: + """ + Parse `cc.essay.v0p1` problem type. + + Provide a dictionary with presentation & sample solution if exists. + """ + data = {"problem_description": problem.description, **self._parse_essay_feedback(problem)} + + if sample_solution := self._parse_essay_sample_solution(problem): + data["sample_solution"] = sample_solution + + return data + + def _parse_essay_sample_solution(self, problem: cc_xml.QtiItem) -> Optional[str]: + """ + Parse the essay sample solution. + """ + if (solution := problem.solution) is not None: + return solution.mattext.text + return None + + def _parse_essay_feedback(self, problem: cc_xml.QtiItem) -> dict: + """ + Parse the essay feedback. + """ + data = {} + + if problem.get_itemfeedback() is not None: + for resp_type in QTI_RESPROCESSING_TYPES: + if response_text := self._parse_essay_response_text(problem, resp_type): + data[resp_type] = response_text + + return data + + def _parse_essay_response_text(self, problem: cc_xml.QtiItem, resp_type: str) -> Optional[str]: + """ + Parse the essay response text. + """ + respcondition = problem.resprocessing.respconditions[0] + if respcondition.get_display_feedback(resp_type) is not None: + return problem.get_itemfeedback(resp_type).flow_mat.material.mattext.text + return None + + def _parse_pattern_match_problem(self, problem: cc_xml.QtiItem) -> dict: + """ + Provide the pattern match problem data. + """ + raise NotImplementedError diff --git a/src/cc2olx/content_parsers/utils.py b/src/cc2olx/content_parsers/utils.py new file mode 100644 index 00000000..9ded5986 --- /dev/null +++ b/src/cc2olx/content_parsers/utils.py @@ -0,0 +1,127 @@ +import html as html_parser +import logging +import re +import urllib +from typing import TypeVar, Optional + +from cc2olx.dataclasses import LinkKeywordProcessor +from cc2olx.models import Cartridge + +logger = logging.getLogger() + +Content = TypeVar("Content") + + +class StaticLinkProcessor: + """ + Provide static links processing functionality. + """ + + def __init__(self, cartridge: Cartridge, relative_links_source: Optional[str]) -> None: + self._cartridge = cartridge + self._relative_links_source = relative_links_source + + def process_content_static_links(self, content: Content) -> Content: + """ + Take a node data and recursively find and escape static links. + + Provide detail data with static link escaped to an OLX-friendly format. + """ + + if isinstance(content, str): + return self.process_static_links(content) + + if isinstance(content, list): + for index, value in enumerate(content): + content[index] = self.process_content_static_links(value) + elif isinstance(content, dict): + for key, value in content.items(): + content[key] = self.process_content_static_links(value) + + return content + + def process_static_links(self, html: str) -> str: + """ + Process static links like src and href to have appropriate links. + """ + items = re.findall(r'(src|href)\s*=\s*"(.+?)"', html) + + link_keyword_processors = ( + LinkKeywordProcessor("IMS-CC-FILEBASE", self._process_ims_cc_filebase), + LinkKeywordProcessor("WIKI_REFERENCE", self._process_wiki_reference), + LinkKeywordProcessor("external_tools", self._process_external_tools_link), + LinkKeywordProcessor("CANVAS_OBJECT_REFERENCE", self._process_canvas_reference), + ) + + for _, link in items: + for keyword, processor in link_keyword_processors: + if keyword in link: + html = processor(link, html) + break + else: + html = self._process_relative_external_links(link, html) + + return html + + def _process_wiki_reference(self, link: str, html: str) -> str: + """ + Replace $WIKI_REFERENCE$ with edx /jump_to_id/. + """ + search_key = urllib.parse.unquote(link).replace("$WIKI_REFERENCE$/pages/", "") + + # remove query params and add suffix .html to match with resource_id_by_href + search_key = search_key.split("?")[0] + ".html" + for key in self._cartridge.resource_id_by_href.keys(): + if key.endswith(search_key): + replace_with = "/jump_to_id/{}".format(self._cartridge.resource_id_by_href[key]) + return html.replace(link, replace_with) + + logger.warning("Unable to process Wiki link - %s", link) + return html + + @staticmethod + def _process_canvas_reference(link: str, html: str) -> str: + """ + Replace $CANVAS_OBJECT_REFERENCE$ with edx /jump_to_id/. + """ + object_id = urllib.parse.unquote(link).replace("$CANVAS_OBJECT_REFERENCE$/quizzes/", "/jump_to_id/") + return html.replace(link, object_id) + + @staticmethod + def _process_ims_cc_filebase(link: str, html: str) -> str: + """ + Replace $IMS-CC-FILEBASE$ with /static. + """ + new_link = urllib.parse.unquote(link).replace("$IMS-CC-FILEBASE$", "/static") + # skip query parameters for static files + new_link = new_link.split("?")[0] + # & is not valid in an URL. But some file seem to have it when it should be & + new_link = new_link.replace("&", "&") + return html.replace(link, new_link) + + @staticmethod + def _process_external_tools_link(link: str, html: str) -> str: + """ + Replace $CANVAS_OBJECT_REFERENCE$/external_tools/retrieve with appropriate external link. + """ + external_tool_query = urllib.parse.urlparse(link).query + # unescape query that has been HTML encoded so it can be parsed correctly + unescaped_external_tool_query = html_parser.unescape(external_tool_query) + external_tool_url = urllib.parse.parse_qs(unescaped_external_tool_query).get("url", [""])[0] + return html.replace(link, external_tool_url) + + def _process_relative_external_links(self, link: str, html: str) -> str: + """ + Turn static file URLs outside OLX_STATIC_DIR into absolute URLs. + + Allow to avoid a situation when the original course page links have + relative URLs, such URLs weren't included into the exported Common + Cartridge course file that causes broken URLs in the imported OeX + course. The function adds the origin source to URLs to make them + absolute ones. + """ + if self._relative_links_source is None or link in self._cartridge.olx_to_original_static_file_paths.all: + return html + + url = urllib.parse.urljoin(self._relative_links_source, link) + return html.replace(link, url) diff --git a/src/cc2olx/content_parsers/video.py b/src/cc2olx/content_parsers/video.py new file mode 100644 index 00000000..aec3730c --- /dev/null +++ b/src/cc2olx/content_parsers/video.py @@ -0,0 +1,20 @@ +import re +from typing import Dict, Optional + +from cc2olx.constants import YOUTUBE_LINK_PATTERN +from cc2olx.content_parsers import AbstractContentParser +from cc2olx.content_parsers.mixins import WebLinkParserMixin + + +class VideoContentParser(WebLinkParserMixin, AbstractContentParser): + """ + Video resource content parser. + """ + + def _parse_content(self, idref: Optional[str]) -> Optional[Dict[str, str]]: + if idref: + if resource := self._cartridge.define_resource(idref): + if web_link_content := self._parse_web_link_content(resource): + if youtube_match := re.search(YOUTUBE_LINK_PATTERN, web_link_content["href"]): + return {"youtube": youtube_match.group("video_id")} + return None diff --git a/src/cc2olx/content_processors.py b/src/cc2olx/content_processors.py new file mode 100644 index 00000000..26391d32 --- /dev/null +++ b/src/cc2olx/content_processors.py @@ -0,0 +1,89 @@ +import xml.dom.minidom +from typing import List, Optional, Type, Union + +from cc2olx import content_parsers, olx_generators +from cc2olx.dataclasses import ContentParserContext, ContentProcessorContext, OlxGeneratorContext +from cc2olx.models import Cartridge + + +class AbstractContentProcessor: + """ + Abstract base class for Common Cartridge content processing. + """ + + content_parser_class: Type[content_parsers.AbstractContentParser] + olx_generator_class: Type[olx_generators.AbstractOlxGenerator] + + def __init__(self, cartridge: Cartridge, context: ContentProcessorContext) -> None: + self._cartridge = cartridge + self._context = context + + def process(self, idref: Optional[str]) -> Optional[List[xml.dom.minidom.Element]]: + """ + Process a Common Cartridge resource content. + """ + parser = self.content_parser_class( + self._cartridge, + ContentParserContext.from_content_processor_context(self._context), + ) + if content := parser.parse(idref): + self._pre_olx_generation(content) + olx_generator = self.olx_generator_class(OlxGeneratorContext.from_content_processor_context(self._context)) + return olx_generator.create_nodes(content) + return None + + def _pre_olx_generation(self, content: Union[list, dict]) -> None: + """ + The hook for actions performing before OLX generation. + """ + + +class HtmlContentProcessor(AbstractContentProcessor): + """ + HTML content processor. + """ + + content_parser_class = content_parsers.HtmlContentParser + olx_generator_class = olx_generators.HtmlOlxGenerator + + +class VideoContentProcessor(AbstractContentProcessor): + """ + Video content processor. + """ + + content_parser_class = content_parsers.VideoContentParser + olx_generator_class = olx_generators.VideoOlxGenerator + + +class LtiContentProcessor(AbstractContentProcessor): + """ + LTI content processor. + """ + + content_parser_class = content_parsers.LtiContentParser + olx_generator_class = olx_generators.LtiOlxGenerator + + def _pre_olx_generation(self, content: dict) -> None: + """ + Populate LTI consumer IDs with the resource LTI ID. + """ + self._context.add_lti_consumer_id(content["lti_id"]) + + +class QtiContentProcessor(AbstractContentProcessor): + """ + QTI content processor. + """ + + content_parser_class = content_parsers.QtiContentParser + olx_generator_class = olx_generators.QtiOlxGenerator + + +class DiscussionContentProcessor(AbstractContentProcessor): + """ + Discussion content processor. + """ + + content_parser_class = content_parsers.DiscussionContentParser + olx_generator_class = olx_generators.DiscussionOlxGenerator diff --git a/src/cc2olx/dataclasses.py b/src/cc2olx/dataclasses.py index c53870f7..47f22ed5 100644 --- a/src/cc2olx/dataclasses.py +++ b/src/cc2olx/dataclasses.py @@ -1,6 +1,10 @@ from dataclasses import dataclass, field from collections import ChainMap -from typing import Dict +from typing import Callable, Dict, List, NamedTuple, Optional, Set + +import attrs + +from cc2olx.iframe_link_parser import IframeLinkParser @dataclass @@ -14,5 +18,106 @@ class OlxToOriginalStaticFilePaths: # Static files that are outside of `web_resources` directory, but still required extra: Dict[str, str] = field(default_factory=dict) + def add_web_resource_path(self, olx_static_path: str, cc_static_path: str) -> None: + """ + Add web resource static file mapping. + """ + self.web_resources[olx_static_path] = cc_static_path + + def add_extra_path(self, olx_static_path: str, cc_static_path: str) -> None: + """ + Add extra static file mapping. + """ + self.extra[olx_static_path] = cc_static_path + def __post_init__(self) -> None: self.all = ChainMap(self.extra, self.web_resources) + + +class LinkKeywordProcessor(NamedTuple): + """ + Encapsulate a link keyword and it's processor. + """ + + keyword: str + processor: Callable[[str, str], str] + + +class FibProblemRawAnswers(NamedTuple): + """ + Encapsulate answers data for a Fill-In-The-Blank problem. + """ + + exact_answers: List[str] + answer_patterns: List[str] + + +@attrs.define(frozen=True, slots=False) +class OlxGeneratorContextMixin: + """ + Encapsulate an OLX generator context data. + """ + + iframe_link_parser: Optional[IframeLinkParser] + _lti_consumer_ids: Set[str] + + def add_lti_consumer_id(self, lti_consumer_id: str) -> None: + """ + Populate LTI consumer IDs set with a provided value. + """ + self._lti_consumer_ids.add(lti_consumer_id) + + +class OlxGeneratorContext(OlxGeneratorContextMixin): + """ + Encapsulate an OLX generator context. + + Provide additional initialization methods. + """ + + @classmethod + def from_content_processor_context( + cls, + content_processor_context: "ContentProcessorContext", + ) -> "OlxGeneratorContext": + """ + Create the OLX generator context from the content processor context. + """ + return cls( + iframe_link_parser=content_processor_context.iframe_link_parser, + lti_consumer_ids=content_processor_context._lti_consumer_ids, + ) + + +@attrs.define(frozen=True, slots=False) +class ContentParserContextMixin: + """ + Encapsulate a content parser context data. + """ + + relative_links_source: Optional[str] + + +class ContentParserContext(ContentParserContextMixin): + """ + Encapsulate a content parser context. + + Provide additional initialization methods. + """ + + @classmethod + def from_content_processor_context( + cls, + content_processor_context: "ContentProcessorContext", + ) -> "ContentParserContext": + """ + Create the content parser context from the content processor context. + """ + return cls(content_processor_context.relative_links_source) + + +@attrs.define(frozen=True, slots=False) +class ContentProcessorContext(ContentParserContextMixin, OlxGeneratorContextMixin): + """ + Encapsulate a content processor context. + """ diff --git a/src/cc2olx/enums.py b/src/cc2olx/enums.py new file mode 100644 index 00000000..8b213e08 --- /dev/null +++ b/src/cc2olx/enums.py @@ -0,0 +1,28 @@ +from enum import Enum + + +class CommonCartridgeResourceType(str, Enum): + """ + Enumerate Common Cartridge resource types. + + Contain the exact type values and regular expressions to match the type. + """ + + WEB_CONTENT = "webcontent" + WEB_LINK = r"^imswl_xmlv\d+p\d+$" + LTI_LINK = r"^imsbasiclti_xmlv\d+p\d+$" + QTI_ASSESSMENT = r"^imsqti_xmlv\d+p\d+/imscc_xmlv\d+p\d+/assessment$" + DISCUSSION_TOPIC = r"^imsdt_xmlv\d+p\d+$" + + +class QtiQuestionType(str, Enum): + """ + Enumerate QTI question types. + """ + + MULTIPLE_CHOICE = "cc.multiple_choice.v0p1" + MULTIPLE_RESPONSE = "cc.multiple_response.v0p1" + FILL_IN_THE_BLANK = "cc.fib.v0p1" + ESSAY = "cc.essay.v0p1" + BOOLEAN = "cc.true_false.v0p1" + PATTERN_MATCH = "cc.pattern_match.v0p1" diff --git a/src/cc2olx/exceptions.py b/src/cc2olx/exceptions.py new file mode 100644 index 00000000..7aae35e6 --- /dev/null +++ b/src/cc2olx/exceptions.py @@ -0,0 +1,4 @@ +class QtiError(Exception): + """ + Exception type for QTI parsing/conversion errors. + """ diff --git a/src/cc2olx/models.py b/src/cc2olx/models.py index 02dbed9b..dc5690ea 100644 --- a/src/cc2olx/models.py +++ b/src/cc2olx/models.py @@ -295,6 +295,16 @@ def flatten(self, container): output.extend(leaves) return output + def define_resource(self, idref): + """ + Define a resource by its identifier. + """ + resource = self.resources_by_id.get(idref) + if resource is None and self.is_canvas_flavor: + module_item_idref = self.module_meta.get_identifierref(idref) + resource = self.resources_by_id.get(module_item_idref) + return resource + def get_resource_content(self, identifier): """ Get the resource named by `identifier`. @@ -466,6 +476,12 @@ def get_course_run(self): # TODO: find a better value for this; lifecycle.contribute_date? return "run" + def build_resource_file_path(self, file_name: str) -> Path: + """ + Build the resource file path. + """ + return self.directory / file_name + def _extract(self): path_extracted = filesystem.unzip_directory(self.file_path, self.workspace) self.directory = path_extracted @@ -497,11 +513,11 @@ def _update_namespaces(self, root): ) def _parse_manifest(self, node): - data = dict() - data["metadata"] = self._parse_metadata(node) - data["organizations"] = self._parse_organizations(node) - data["resources"] = self._parse_resources(node) - return data + return { + "metadata": self._parse_metadata(node), + "organizations": self._parse_organizations(node), + "resources": self._parse_resources(node), + } def _clean_manifest(self, node): """ diff --git a/src/cc2olx/olx_generators/__init__.py b/src/cc2olx/olx_generators/__init__.py new file mode 100644 index 00000000..546237d7 --- /dev/null +++ b/src/cc2olx/olx_generators/__init__.py @@ -0,0 +1,15 @@ +from cc2olx.olx_generators.abc import AbstractOlxGenerator +from cc2olx.olx_generators.discussion import DiscussionOlxGenerator +from cc2olx.olx_generators.html import HtmlOlxGenerator +from cc2olx.olx_generators.lti import LtiOlxGenerator +from cc2olx.olx_generators.qti import QtiOlxGenerator +from cc2olx.olx_generators.video import VideoOlxGenerator + +__all__ = [ + "AbstractOlxGenerator", + "DiscussionOlxGenerator", + "HtmlOlxGenerator", + "LtiOlxGenerator", + "QtiOlxGenerator", + "VideoOlxGenerator", +] diff --git a/src/cc2olx/olx_generators/abc.py b/src/cc2olx/olx_generators/abc.py new file mode 100644 index 00000000..79242d12 --- /dev/null +++ b/src/cc2olx/olx_generators/abc.py @@ -0,0 +1,21 @@ +import xml.dom.minidom +from abc import ABC, abstractmethod +from typing import List, Union + +from cc2olx.dataclasses import OlxGeneratorContext + + +class AbstractOlxGenerator(ABC): + """ + Abstract base class for OLX generation for Common Cartridge content. + """ + + def __init__(self, context: OlxGeneratorContext) -> None: + self._doc = xml.dom.minidom.Document() + self._context = context + + @abstractmethod + def create_nodes(self, content: Union[dict, List[dict]]) -> List[xml.dom.minidom.Element]: + """ + Create OLX nodes. + """ diff --git a/src/cc2olx/olx_generators/discussion.py b/src/cc2olx/olx_generators/discussion.py new file mode 100644 index 00000000..889e12cf --- /dev/null +++ b/src/cc2olx/olx_generators/discussion.py @@ -0,0 +1,32 @@ +import xml.dom.minidom +from typing import List + +from cc2olx.olx_generators import AbstractOlxGenerator +from cc2olx.utils import clean_from_cdata, element_builder + + +class DiscussionOlxGenerator(AbstractOlxGenerator): + """ + Generate OLX for discussions. + """ + + DEFAULT_TEXT = "MISSING CONTENT" + + def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]: + el = element_builder(self._doc) + + txt = self.DEFAULT_TEXT if content["text"] is None else content["text"] + txt = clean_from_cdata(txt) + html_node = el("html", [self._doc.createCDATASection(txt)], {}) + + discussion_node = el( + "discussion", + [], + { + "display_name": "", + "discussion_category": content["title"], + "discussion_target": content["title"], + }, + ) + + return [html_node, discussion_node] diff --git a/src/cc2olx/olx_generators/html.py b/src/cc2olx/olx_generators/html.py new file mode 100644 index 00000000..3cfbf21b --- /dev/null +++ b/src/cc2olx/olx_generators/html.py @@ -0,0 +1,60 @@ +import xml.dom.minidom +from typing import List, Tuple + +import lxml.html + +from cc2olx.olx_generators import AbstractOlxGenerator +from cc2olx.utils import clean_from_cdata + + +class HtmlOlxGenerator(AbstractOlxGenerator): + """ + Generate OLX for HTML content. + """ + + def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]: + """ + Process the HTML and gives out corresponding HTML or Video OLX nodes. + """ + video_olx = [] + nodes = [] + html = content["html"] + if self._context.iframe_link_parser: + html, video_olx = self._process_html_for_iframe(html) + html = clean_from_cdata(html) + txt = self._doc.createCDATASection(html) + + html_node = self._doc.createElement("html") + html_node.appendChild(txt) + nodes.append(html_node) + + nodes.extend(video_olx) + + return nodes + + def _process_html_for_iframe(self, html_str: str) -> Tuple[str, List[xml.dom.minidom.Element]]: + """ + Parse the iframe with embedded video, to be converted into video xblock. + + Provide the html content of the file, if iframe is present and + converted into xblock then iframe is removed from the HTML, as well as + a list of XML children, i.e video xblock. + """ + video_olx = [] + parsed_html = lxml.html.fromstring(html_str) + iframes = parsed_html.xpath("//iframe") + if not iframes: + return html_str, video_olx + + video_olx, converted_iframes = self._context.iframe_link_parser.get_video_olx(self._doc, iframes) + if video_olx: + # If video xblock is present then we modify the HTML to remove the iframe + # hence we need to convert the modified HTML back to string. We also remove + # the parent if there are no other children. + for iframe in converted_iframes: + parent = iframe.getparent() + parent.remove(iframe) + if not parent.getchildren(): + parent.getparent().remove(parent) + return lxml.html.tostring(parsed_html).decode("utf-8"), video_olx + return html_str, video_olx diff --git a/src/cc2olx/olx_generators/lti.py b/src/cc2olx/olx_generators/lti.py new file mode 100644 index 00000000..530cc4ff --- /dev/null +++ b/src/cc2olx/olx_generators/lti.py @@ -0,0 +1,43 @@ +import xml.dom.minidom +from typing import List + +from cc2olx.olx_generators import AbstractOlxGenerator +from cc2olx.utils import element_builder + + +class LtiOlxGenerator(AbstractOlxGenerator): + """ + Generate OLX for LTIs. + """ + + def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]: + el = element_builder(self._doc) + + custom_parameters = "[{params}]".format( + params=", ".join( + [ + '"{key}={value}"'.format( + key=key, + value=value, + ) + for key, value in content["custom_parameters"].items() + ] + ), + ) + lti_consumer_node = el( + "lti_consumer", + [], + { + "custom_parameters": custom_parameters, + "description": content["description"], + "display_name": content["title"], + "inline_height": content["height"], + "inline_width": content["width"], + "launch_url": content["launch_url"], + "modal_height": content["height"], + "modal_width": content["width"], + "xblock-family": "xblock.v1", + "lti_id": content["lti_id"], + }, + ) + return [lti_consumer_node] diff --git a/src/cc2olx/olx_generators/qti.py b/src/cc2olx/olx_generators/qti.py new file mode 100644 index 00000000..2b1fe996 --- /dev/null +++ b/src/cc2olx/olx_generators/qti.py @@ -0,0 +1,305 @@ +import functools +import urllib.parse +import xml.dom.minidom +from html import unescape +from typing import Callable, Collection, Dict, List, Tuple, Union + +from lxml import etree, html + +from cc2olx.constants import QTI_RESPROCESSING_TYPES +from cc2olx.enums import QtiQuestionType +from cc2olx.exceptions import QtiError +from cc2olx.olx_generators import AbstractOlxGenerator +from cc2olx.utils import element_builder + + +class QtiOlxGenerator(AbstractOlxGenerator): + """ + Generate OLX for QTIs. + """ + + FIB_PROBLEM_TEXTLINE_SIZE_BUFFER = 10 + + def create_nodes(self, content: List[dict]) -> List[xml.dom.minidom.Element]: + problems = [] + + for problem_data in content: + cc_profile = problem_data["cc_profile"] + create_problem = self._problem_creators_map.get(cc_profile) + + if create_problem is None: + raise QtiError('Unknown cc_profile: "{}"'.format(problem_data["cc_profile"])) + + problem = create_problem(problem_data) + + # sometimes we might want to have additional items from one CC item + if isinstance(problem, list) or isinstance(problem, tuple): + problems += problem + else: + problems.append(problem) + + return problems + + @functools.cached_property + def _problem_creators_map( + self, + ) -> Dict[ + QtiQuestionType, + Callable[[dict], Union[xml.dom.minidom.Element, Collection[xml.dom.minidom.Element]]], + ]: + """ + Provide CC profile value to actual problem node creators mapping. + + Note: Since True/False problems in OLX are constructed identically to + OLX Multiple Choice problems, we reuse `_create_multiple_choice_problem` + for BOOLEAN type problems + """ + return { + QtiQuestionType.MULTIPLE_CHOICE: self._create_multiple_choice_problem, + QtiQuestionType.MULTIPLE_RESPONSE: self._create_multiple_response_problem, + QtiQuestionType.FILL_IN_THE_BLANK: self._create_fib_problem, + QtiQuestionType.ESSAY: self._create_essay_problem, + QtiQuestionType.BOOLEAN: self._create_multiple_choice_problem, + QtiQuestionType.PATTERN_MATCH: self._create_pattern_match_problem, + } + + @staticmethod + def _create_problem_description(description_html_str: str) -> xml.dom.minidom.Element: + """ + Create a problem description node. + + Material texts can come in form of escaped HTML markup, which + can't be considered as valid XML. ``xml.dom.minidom`` has no + features to convert HTML to XML, so we use lxml parser here. + """ + description_html_str = unescape(description_html_str) + + description_html_str = urllib.parse.unquote(description_html_str) + + element = html.fromstring(description_html_str) + xml_string = etree.tostring(element) + return xml.dom.minidom.parseString(xml_string).firstChild + + def _add_choice(self, parent: xml.dom.minidom.Element, is_correct: bool, text: str) -> None: + """ + Append choices to given ``checkboxgroup`` or ``choicegroup`` parent. + """ + choice = self._doc.createElement("choice") + choice.setAttribute("correct", "true" if is_correct else "false") + self._set_text(choice, text) + parent.appendChild(choice) + + def _set_text(self, node: xml.dom.minidom.Element, new_text: str) -> None: + """ + Set a node text. + """ + text_node = self._doc.createTextNode(new_text) + node.appendChild(text_node) + + def _create_multiple_choice_problem(self, problem_data: dict) -> xml.dom.minidom.Element: + """ + Create multiple choice problem OLX. + """ + problem = self._doc.createElement("problem") + problem_content = self._doc.createElement("multiplechoiceresponse") + + problem_description = self._create_problem_description(problem_data["problem_description"]) + + choice_group = self._doc.createElement("choicegroup") + choice_group.setAttribute("type", "MultipleChoice") + + for choice_data in problem_data["choices"].values(): + self._add_choice(choice_group, choice_data["correct"], choice_data["text"]) + + problem_content.appendChild(problem_description) + problem_content.appendChild(choice_group) + problem.appendChild(problem_content) + + return problem + + def _create_multiple_response_problem(self, problem_data: dict) -> xml.dom.minidom.Element: + """ + Create multiple response problem OLX. + + Set partial_credit to EDC by default. + """ + el = element_builder(self._doc) + + problem_description = self._create_problem_description(problem_data["problem_description"]) + + problem = el( + "problem", + [ + el( + "choiceresponse", + [ + problem_description, + el( + "checkboxgroup", + [ + el( + "choice", + choice["text"], + {"correct": "true" if choice["correct"] else "false"}, + ) + for choice in problem_data["choices"].values() + ], + {"type": "MultipleChoice"}, + ), + ], + {"partial_credit": "EDC"}, + ), + ], + ) + return problem + + def _create_fib_problem(self, problem_data: dict) -> xml.dom.minidom.Element: + """ + Create Fill-In-The-Blank problem OLX. + """ + # Track maximum answer length for textline at the bottom + max_answer_length = 0 + + problem = self._doc.createElement("problem") + + # Set the primary answer on the stringresponse + # and set the type to case insensitive + problem_content = self._doc.createElement("stringresponse") + problem_content.setAttribute("answer", problem_data["answer"]) + problem_content.setAttribute("type", self._build_fib_problem_type(problem_data)) + + if len(problem_data["answer"]) > max_answer_length: + max_answer_length = len(problem_data["answer"]) + + problem_description = self._create_problem_description(problem_data["problem_description"]) + problem_content.appendChild(problem_description) + + # For any (optional) additional accepted answers, add an + # additional_answer element with that answer + for answer in problem_data.get("additional_answers", []): + additional_answer = self._doc.createElement("additional_answer") + additional_answer.setAttribute("answer", answer) + problem_content.appendChild(additional_answer) + + if len(answer) > max_answer_length: + max_answer_length = len(answer) + + # Add a textline element with the max answer length plus a buffer + textline = self._doc.createElement("textline") + textline.setAttribute("size", str(max_answer_length + self.FIB_PROBLEM_TEXTLINE_SIZE_BUFFER)) + problem_content.appendChild(textline) + + problem.appendChild(problem_content) + + return problem + + @staticmethod + def _build_fib_problem_type(problem_data: dict) -> str: + """ + Build `stringresponse` OLX type for a Fill-In-The-Blank problem. + """ + problem_types = ["ci"] + + if problem_data["is_regexp"]: + problem_types.append("regexp") + + return " ".join(problem_types) + + def _create_essay_problem( + self, + problem_data: dict, + ) -> Union[xml.dom.minidom.Element, Tuple[xml.dom.minidom.Element, xml.dom.minidom.Element]]: + """ + Create an essay problem OLX. + + Given parsed essay problem data, returns a openassessment component. If a sample + solution provided, returns that as a HTML block before openassessment. + """ + el = element_builder(self._doc) + + if any(key in QTI_RESPROCESSING_TYPES for key in problem_data.keys()): + resp_samples = [ + el("name", "Feedback"), + el("label", "Feedback"), + el("prompt", "Example Feedback"), + ] + + for desc, key in zip(["General", "Correct", "Incorrect"], QTI_RESPROCESSING_TYPES): + resp_samples.append( + el( + "option", + [el("name", desc), el("label", desc), el("explanation", problem_data.get(key, desc))], + {"points": "0"}, + ) + ) + criterion = el("criterion", resp_samples, {"feedback": "optional"}) + else: + criterion = el( + "criterion", + [ + el("name", "Ideas"), + el("label", "Ideas"), + el("prompt", "Example criterion"), + el( + "option", + [el("name", "Poor"), el("label", "Poor"), el("explanation", "Explanation")], + {"points": "0"}, + ), + el( + "option", + [el("name", "Good"), el("label", "Good"), el("explanation", "Explanation")], + {"points": "1"}, + ), + ], + {"feedback": "optional"}, + ) + + description = problem_data["problem_description"] + ora = el( + "openassessment", + [ + el("title", "Open Response Assessment"), + el( + "assessments", + [ + el("assessment", None, attributes={"name": "staff-assessment", "required": "True"}), + ], + ), + el( + "prompts", + [ + el( + "prompt", + [el("description", description)], + ), + ], + ), + el( + "rubric", + [ + criterion, + el("feedbackprompt", "Feedback prompt text"), + el("feedback_default_text", "Feedback prompt default text"), + ], + ), + ], + { + "url_name": problem_data["ident"], + "text_response": "required", + "prompts_type": "html", + }, + ) + + # if a sample solution exists add on top of ora, because + # olx doesn't have a sample solution equivalent. + if problem_data.get("sample_solution"): + child = el("html", self._doc.createCDATASection(problem_data["sample_solution"])) + return child, ora + + return ora + + def _create_pattern_match_problem(self, problem_data: dict) -> xml.dom.minidom.Element: + """ + Create pattern match problem OLX. + """ + raise NotImplementedError diff --git a/src/cc2olx/olx_generators/video.py b/src/cc2olx/olx_generators/video.py new file mode 100644 index 00000000..0b86fddd --- /dev/null +++ b/src/cc2olx/olx_generators/video.py @@ -0,0 +1,18 @@ +import xml.dom.minidom +from typing import List + +from cc2olx.olx_generators import AbstractOlxGenerator +from cc2olx.utils import element_builder + + +class VideoOlxGenerator(AbstractOlxGenerator): + """ + Generate OLX for video content. + """ + + def create_nodes(self, content: dict) -> List[xml.dom.minidom.Element]: + xml_element = element_builder(self._doc) + youtube_video_id = content["youtube"] + attributes = {"youtube": f"1.00:{youtube_video_id}", "youtube_id_1_0": content["youtube"]} + video_element = xml_element("video", children=None, attributes=attributes) + return [video_element] diff --git a/src/cc2olx/templates/external_webcontent.html b/src/cc2olx/templates/external_webcontent.html new file mode 100644 index 00000000..fc39edfb --- /dev/null +++ b/src/cc2olx/templates/external_webcontent.html @@ -0,0 +1,10 @@ + + + + + +

+ {resource_relative_path} +

+ + diff --git a/src/cc2olx/templates/image_webcontent.html b/src/cc2olx/templates/image_webcontent.html new file mode 100644 index 00000000..c55beeb7 --- /dev/null +++ b/src/cc2olx/templates/image_webcontent.html @@ -0,0 +1,10 @@ + + + + + +

+ {static_filename} +

+ + diff --git a/src/cc2olx/xml/__init__.py b/src/cc2olx/xml/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cc2olx/xml/cc_xml.py b/src/cc2olx/xml/cc_xml.py new file mode 100644 index 00000000..b0871a6f --- /dev/null +++ b/src/cc2olx/xml/cc_xml.py @@ -0,0 +1,501 @@ +from collections import defaultdict +from typing import Dict, List, Optional, Type, TypeVar + +from lxml import etree + + +QTI_NAMESPACE = "http://www.imsglobal.org/xsd/ims_qtiasiv1p2" + + +class CommonCartridgeElementBase(etree.ElementBase): + """ + Base Common Cartridge XML element. + """ + + NODE_NAMESPACES: List[str] + NODE_NAME: str + + +class CommonCartridgeElementClassLookup(etree.CustomElementClassLookup): + """ + The lookup class for Common Cartridge XML elements. + """ + + _element_classes = defaultdict(dict) + + def lookup( + self, + node_type: str, + document: etree._Document, + namespace: Optional[dict], + name: Optional[str], + ) -> Optional[Type[CommonCartridgeElementBase]]: + if node_type == "element": + return self._element_classes.get(namespace, {}).get(name.lower(), CommonCartridgeElementBase) + return None + + +CommonCartridgeElementType = TypeVar("CommonCartridgeElementType", bound=CommonCartridgeElementBase) + + +def common_cartridge_element(cls: CommonCartridgeElementType) -> CommonCartridgeElementType: + """ + Add a type to the Common Cartridge XML lookup. + """ + for namespace in cls.NODE_NAMESPACES: + CommonCartridgeElementClassLookup._element_classes[namespace][cls.NODE_NAME] = cls + + return cls + + +@common_cartridge_element +class WebLink(CommonCartridgeElementBase): + """ + Represent Common Cartridge element. + """ + + SEARCH_NAMESPACE_OPTIONS = { + "imswl_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imswl_v1p1", + "imswl_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imswl_v1p2", + "imswl_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3", + } + NODE_NAMESPACES = list(SEARCH_NAMESPACE_OPTIONS.values()) + NODE_NAME = "weblink" + + def get_title(self, resource_type: str) -> CommonCartridgeElementBase: + """ + Provide child tag. + """ + return self.find("wl:title", self._define_search_namespace(resource_type)) + + def get_url(self, resource_type: str) -> CommonCartridgeElementBase: + """ + Provide <url> child tag. + """ + return self.find("wl:url", self._define_search_namespace(resource_type)) + + def _define_search_namespace(self, resource_type: str) -> Dict[str, str]: + """ + Define a search namespace based on resource type. + """ + return {"wl": self.SEARCH_NAMESPACE_OPTIONS.get(resource_type)} + + +@common_cartridge_element +class BasicLtiLink(CommonCartridgeElementBase): + """ + Represent <cartridge_basiclti_link> Common Cartridge element. + """ + + SEARCH_NAMESPACES = { + "blti": "http://www.imsglobal.org/xsd/imsbasiclti_v1p0", + "lticp": "http://www.imsglobal.org/xsd/imslticp_v1p0", + "lticm": "http://www.imsglobal.org/xsd/imslticm_v1p0", + } + NODE_NAMESPACES = ["http://www.imsglobal.org/xsd/imslticc_v1p0"] + NODE_NAME = "cartridge_basiclti_link" + + @property + def title(self) -> CommonCartridgeElementBase: + """ + Provide <title> child tag. + """ + return self.find("blti:title", self.SEARCH_NAMESPACES) + + @property + def description(self) -> CommonCartridgeElementBase: + """ + Provide <description> child tag. + """ + return self.find("blti:description", self.SEARCH_NAMESPACES) + + @property + def secure_launch_url(self) -> Optional[CommonCartridgeElementBase]: + """ + Provide <secure_launch_url> child tag. + """ + return self.find("blti:secure_launch_url", self.SEARCH_NAMESPACES) + + @property + def launch_url(self) -> Optional[CommonCartridgeElementBase]: + """ + Provide <launch_url> child tag. + """ + return self.find("blti:launch_url", self.SEARCH_NAMESPACES) + + @property + def width(self) -> Optional[CommonCartridgeElementBase]: + """ + Provide width property descendant tag. + """ + return self.find("blti:extensions/lticm:property[@name='selection_width']", self.SEARCH_NAMESPACES) + + @property + def height(self) -> Optional[CommonCartridgeElementBase]: + """ + Provide height property descendant tag. + """ + return self.find("blti:extensions/lticm:property[@name='selection_height']", self.SEARCH_NAMESPACES) + + @property + def custom(self) -> Optional[CommonCartridgeElementBase]: + """ + Provide <custom> child tag. + """ + return self.find("blti:custom", self.SEARCH_NAMESPACES) + + @property + def canvas_tool_id(self) -> Optional[CommonCartridgeElementBase]: + """ + Provide Canvas tool identifier property descendant tag. + """ + return self.find("blti:extensions/lticm:property[@name='tool_id']", self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class DiscussionTopic(CommonCartridgeElementBase): + """ + Represent discussion <topic> Common Cartridge element. + """ + + SEARCH_NAMESPACE_OPTIONS = { + "imsdt_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imsdt_v1p1", + "imsdt_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imsdt_v1p2", + "imsdt_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imsdt_v1p3", + } + NODE_NAMESPACES = list(SEARCH_NAMESPACE_OPTIONS.values()) + NODE_NAME = "topic" + + def get_title(self, resource_type: str) -> CommonCartridgeElementBase: + """ + Provide <title> child tag. + """ + return self.find("dt:title", self._define_search_namespace(resource_type)) + + def get_text(self, resource_type: str) -> CommonCartridgeElementBase: + """ + Provide <text> child tag. + """ + return self.find("dt:text", self._define_search_namespace(resource_type)) + + def _define_search_namespace(self, resource_type: str) -> Dict[str, str]: + """ + Define a search namespace based on resource type. + """ + return {"dt": self.SEARCH_NAMESPACE_OPTIONS.get(resource_type)} + + +@common_cartridge_element +class QtiElement(CommonCartridgeElementBase): + """ + Represent <questestinterop> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "questestinterop" + + @property + def items(self) -> List["QtiItem"]: + """ + Provide <item> child tags. + """ + return self.findall(".//qti:section/qti:item", self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class QtiItem(CommonCartridgeElementBase): + """ + Represent QTI <item> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "item" + + @property + def presentation(self) -> "QtiPresentation": + """ + Provide <presentation> child tag. + """ + return self.find("qti:presentation", self.SEARCH_NAMESPACES) + + @property + def description(self) -> str: + """ + Provide <mattext> descendant tag. + """ + return self.presentation.mattext.text + + @property + def resprocessing(self) -> "QtiResprocessing": + """ + Provide <resprocessing> child tag. + """ + return self.find("qti:resprocessing", self.SEARCH_NAMESPACES) + + @property + def qtimetadatafields(self) -> List["QtiMetadataField"]: + """ + Provide <qtimetadatafield> descendant tag. + """ + return self.findall("qti:itemmetadata/qti:qtimetadata/qti:qtimetadatafield", self.SEARCH_NAMESPACES) + + @property + def profile(self) -> str: + """ + Provide ``cc_profile`` value from problem metadata. + + This field is mandatory for problem, so the exception is thrown if + it's not present. + + Example of metadata structure: + ``` + <itemmetadata> + <qtimetadata> + <qtimetadatafield> + <fieldlabel>cc_profile</fieldlabel> + <fieldentry>cc.true_false.v0p1</fieldentry> + </qtimetadatafield> + </qtimetadata> + </itemmetadata> + ``` + """ + for field in self.qtimetadatafields: + label = field.fieldlabel.text + entry = field.fieldentry.text + + if label == "cc_profile": + return entry + + raise ValueError('QTI metadata must contain "cc_profile" field.') + + @property + def solution(self) -> Optional["QtiSolution"]: + """ + Provide <solution> descendant tag. + """ + return self.find("qti:itemfeedback/qti:solution", self.SEARCH_NAMESPACES) + + def get_itemfeedback(self, response_type: Optional[str] = None) -> Optional["QtiItemFeedback"]: + """ + Provide <itemfeedback> child tag. + """ + selector = "qti:itemfeedback" + if response_type: + selector = f"{selector}[@ident='{response_type}']" + return self.find(selector, self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class QtiMetadataField(CommonCartridgeElementBase): + """ + Represent QTI <qtimetadatafield> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "qtimetadatafield" + + @property + def fieldlabel(self) -> CommonCartridgeElementBase: + """ + Provide <fieldlabel> child tag. + """ + return self.find("qti:fieldlabel", self.SEARCH_NAMESPACES) + + @property + def fieldentry(self) -> CommonCartridgeElementBase: + """ + Provide <fieldentry> child tag. + """ + return self.find("qti:fieldentry", self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class QtiPresentation(CommonCartridgeElementBase): + """ + Represent QTI <presentation> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "presentation" + + @property + def response_labels(self) -> List["QtiResponseLabel"]: + """ + Provide <response_label> descendant tags. + """ + return self.findall("qti:response_lid/qti:render_choice/qti:response_label", self.SEARCH_NAMESPACES) + + @property + def mattext(self) -> CommonCartridgeElementBase: + """ + Provide <mattext> descendant tag. + """ + return self.find("qti:material/qti:mattext", self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class QtiResponseLabel(CommonCartridgeElementBase): + """ + Represent QTI <response_label> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "response_label" + + @property + def mattext(self) -> CommonCartridgeElementBase: + """ + Provide <mattext> descendant tag. + """ + return self.find("qti:material/qti:mattext", self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class QtiResprocessing(CommonCartridgeElementBase): + """ + Represent QTI <resprocessing> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "resprocessing" + + @property + def respconditions(self) -> List["QtiRespcondition"]: + """ + Provide <respcondition> descendant tags. + """ + return self.findall("qti:respcondition", self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class QtiRespcondition(CommonCartridgeElementBase): + """ + Represent QTI <respcondition> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "respcondition" + + @property + def varequals(self) -> List[CommonCartridgeElementBase]: + """ + Provide <varequal> descendant tags. + """ + return self.findall("qti:conditionvar/qti:varequal", self.SEARCH_NAMESPACES) + + @property + def and_varequals(self) -> List[CommonCartridgeElementBase]: + """ + Provide <varequal> descendant tags wrapped by <and> tag. + """ + return self.findall("qti:conditionvar/qti:and/qti:varequal", self.SEARCH_NAMESPACES) + + @property + def or_varequals(self) -> List[CommonCartridgeElementBase]: + """ + Provide <varequal> descendant tags wrapped by <or> tag. + """ + return self.findall("qti:conditionvar/qti:or/qti:varequal", self.SEARCH_NAMESPACES) + + @property + def varsubstrings(self) -> List[CommonCartridgeElementBase]: + """ + Provide <varsubstring> descendant tags. + """ + return self.findall("qti:conditionvar/qti:varsubstring", self.SEARCH_NAMESPACES) + + def get_display_feedback(self, response_type: str) -> Optional[CommonCartridgeElementBase]: + """ + Provide <displayfeedback> child tag. + """ + return self.find(f"qti:displayfeedback[@linkrefid='{response_type}']", self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class QtiSolution(CommonCartridgeElementBase): + """ + Represent QTI <solution> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "solution" + + @property + def mattext(self) -> CommonCartridgeElementBase: + """ + Provide <mattext> descendant tag. + """ + return self.find("qti:solutionmaterial//qti:material//qti:mattext", self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class QtiItemFeedback(CommonCartridgeElementBase): + """ + Represent QTI <itemfeedback> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "itemfeedback" + + @property + def flow_mat(self) -> "QtiFlowMat": + """ + Provide <flow_mat> child tag. + """ + return self.find("qti:flow_mat", self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class QtiFlowMat(CommonCartridgeElementBase): + """ + Represent QTI <flow_mat> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "flow_mat" + + @property + def material(self) -> "QtiMaterial": + """ + Provide <material> child tag. + """ + return self.find("qti:material", self.SEARCH_NAMESPACES) + + +@common_cartridge_element +class QtiMaterial(CommonCartridgeElementBase): + """ + Represent QTI <material> Common Cartridge element. + """ + + SEARCH_NAMESPACES = {"qti": QTI_NAMESPACE} + NODE_NAMESPACES = [QTI_NAMESPACE] + NODE_NAME = "material" + + @property + def mattext(self) -> CommonCartridgeElementBase: + """ + Provide <mattext> child tag. + """ + return self.find("qti:mattext", self.SEARCH_NAMESPACES) + + +class CommonCartridgeXmlParser(etree.XMLParser): + """ + An XML parser configured to return Common Cartridge element objects. + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.set_element_class_lookup(CommonCartridgeElementClassLookup()) From b55f9184cb0440094c9010e0240d64886d875e39 Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov <mykhailo.chernyshov@raccoongang.com> Date: Wed, 15 Jan 2025 11:50:09 +0200 Subject: [PATCH 3/7] refactor: [FC-0063] Block type processors are integrated into the script workflow --- setup.py | 2 +- src/cc2olx/constants.py | 2 +- src/cc2olx/django_settings.py | 2 - src/cc2olx/filesystem.py | 4 +- src/cc2olx/main.py | 30 +- src/cc2olx/models.py | 189 +--------- src/cc2olx/olx.py | 337 ++---------------- src/cc2olx/parser.py | 46 +++ src/cc2olx/qti.py | 624 ---------------------------------- src/cc2olx/settings.py | 59 +--- 10 files changed, 111 insertions(+), 1184 deletions(-) delete mode 100644 src/cc2olx/django_settings.py create mode 100644 src/cc2olx/parser.py delete mode 100644 src/cc2olx/qti.py diff --git a/setup.py b/setup.py index 0df0a5f1..e222ad67 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ "Programming Language :: Python :: 3.8", "Topic :: Utilities", ], - description=("Command line tool, that converts Common Cartridge " "courses to Open edX Studio imports."), + description="Command line tool, that converts Common Cartridge courses to Open edX Studio imports.", entry_points={"console_scripts": ["cc2olx=cc2olx.main:main"]}, install_requires=load_requirements("requirements/base.in"), license="GNU Affero General Public License", diff --git a/src/cc2olx/constants.py b/src/cc2olx/constants.py index 1a3b14ff..cdc9fe1f 100644 --- a/src/cc2olx/constants.py +++ b/src/cc2olx/constants.py @@ -2,7 +2,7 @@ OLX_STATIC_PATH_TEMPLATE = f"/{OLX_STATIC_DIR}/{{static_filename}}" WEB_RESOURCES_DIR_NAME = "web_resources" -LINK_HTML = "<a href='{url}'>{text}</a>" +LINK_HTML = '<a href="{url}">{text}</a>' YOUTUBE_LINK_PATTERN = r"youtube.com/watch\?v=(?P<video_id>[-\w]+)" CDATA_PATTERN = r"<!\[CDATA\[(?P<content>.*?)\]\]>" diff --git a/src/cc2olx/django_settings.py b/src/cc2olx/django_settings.py deleted file mode 100644 index c13f3ec2..00000000 --- a/src/cc2olx/django_settings.py +++ /dev/null @@ -1,2 +0,0 @@ -USE_I18N = False -USE_TZ = False diff --git a/src/cc2olx/filesystem.py b/src/cc2olx/filesystem.py index 2ad20e77..9c4eb921 100644 --- a/src/cc2olx/filesystem.py +++ b/src/cc2olx/filesystem.py @@ -3,9 +3,9 @@ import zipfile from xml.etree import ElementTree -from lxml import etree from cc2olx.utils import clean_file_name +from cc2olx.xml.cc_xml import CommonCartridgeXmlParser logger = logging.getLogger() @@ -32,7 +32,7 @@ def get_xml_tree(path_src): # We are using this parser with recover and encoding options so that we are # able to parse malformed xml without much issue. The xml that we are # anticipating can even be having certain non-acceptable characters like  . - parser = etree.XMLParser(encoding="utf-8", recover=True, ns_clean=True) + parser = CommonCartridgeXmlParser(encoding="utf-8", recover=True, ns_clean=True) tree = ElementTree.parse(str(path_src), parser=parser) return tree except ElementTree.ParseError: diff --git a/src/cc2olx/main.py b/src/cc2olx/main.py index 981955d7..8cae4486 100644 --- a/src/cc2olx/main.py +++ b/src/cc2olx/main.py @@ -6,13 +6,13 @@ from pathlib import Path import django +from django.conf import settings -from cc2olx import filesystem -from cc2olx import olx +from cc2olx import filesystem, olx from cc2olx.cli import parse_args, RESULT_TYPE_FOLDER, RESULT_TYPE_ZIP from cc2olx.constants import OLX_STATIC_DIR from cc2olx.models import Cartridge -from cc2olx.settings import collect_settings +from cc2olx.parser import parse_options def convert_one_file( @@ -58,23 +58,22 @@ def convert_one_file( def main(): initialize_django() - parsed_args = parse_args() - settings = collect_settings(parsed_args) + args = parse_args() + options = parse_options(args) - workspace = settings["workspace"] - link_file = settings["link_file"] - passport_file = settings["passport_file"] - relative_links_source = settings["relative_links_source"] + workspace = options["workspace"] + link_file = options["link_file"] + passport_file = options["passport_file"] + relative_links_source = options["relative_links_source"] # setup logger - logging_config = settings["logging_config"] - logging.basicConfig(level=logging_config["level"], format=logging_config["format"]) + logging.basicConfig(level=options["log_level"], format=settings.LOG_FORMAT) logger = logging.getLogger() with tempfile.TemporaryDirectory() as tmpdirname: temp_workspace = Path(tmpdirname) / workspace.stem - for input_file in settings["input_files"]: + for input_file in options["input_files"]: try: convert_one_file( input_file, @@ -83,15 +82,14 @@ def main(): passport_file, relative_links_source, ) - except Exception: logger.exception("Error while converting %s file", input_file) - if settings["output_format"] == RESULT_TYPE_FOLDER: + if options["output_format"] == RESULT_TYPE_FOLDER: shutil.rmtree(str(workspace), ignore_errors=True) shutil.copytree(str(temp_workspace), str(workspace)) - if settings["output_format"] == RESULT_TYPE_ZIP: + if options["output_format"] == RESULT_TYPE_ZIP: shutil.make_archive(str(workspace), "zip", str(temp_workspace)) logger.info("Conversion completed") @@ -103,7 +101,7 @@ def initialize_django(): """ Initialize the Django package. """ - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cc2olx.django_settings") + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cc2olx.settings") django.setup() diff --git a/src/cc2olx/models.py b/src/cc2olx/models.py index dc5690ea..e6cefc6f 100644 --- a/src/cc2olx/models.py +++ b/src/cc2olx/models.py @@ -1,19 +1,16 @@ -import imghdr import logging import os.path import re -from textwrap import dedent import zipfile +from pathlib import Path +from textwrap import dedent +from typing import Optional from cc2olx import filesystem -from cc2olx.constants import OLX_STATIC_PATH_TEMPLATE from cc2olx.dataclasses import OlxToOriginalStaticFilePaths from cc2olx.external.canvas import ModuleMeta -from cc2olx.qti import QtiParser from cc2olx.utils import clean_file_name -from .utils import simple_slug - logger = logging.getLogger() MANIFEST = "imsmanifest.xml" @@ -295,7 +292,7 @@ def flatten(self, container): output.extend(leaves) return output - def define_resource(self, idref): + def define_resource(self, idref: Optional[str]) -> dict: """ Define a resource by its identifier. """ @@ -305,104 +302,6 @@ def define_resource(self, idref): resource = self.resources_by_id.get(module_item_idref) return resource - def get_resource_content(self, identifier): - """ - Get the resource named by `identifier`. - - If the resource can be retrieved, returns a tuple: the first element - indicates the type of content, either "html" or "link". The second - element is a dict with details, which vary by the type. - - If the resource can't be retrieved, returns a tuple of None, None. - - """ - res = self.resources_by_id.get(identifier) - if res is None and self.is_canvas_flavor: - res = self.resources_by_id.get(self.module_meta.get_identifierref(identifier)) - if res is None: - logger.info("Missing resource: %s", identifier) - return None, None - - res_type = res["type"] - - if res_type == "webcontent": - res_relative_path = res["children"][0].href - res_filename = self._res_filename(res_relative_path) - if res_filename.suffix == ".html": - try: - with open(str(res_filename), encoding="utf-8") as res_file: - html = res_file.read() - except: # noqa: E722 - logger.error("Failure reading %s from id %s", res_filename, identifier) # noqa: E722 - raise - return "html", {"html": html} - elif "web_resources" in str(res_filename) and imghdr.what(str(res_filename)): - static_filename = str(res_filename).split("web_resources/")[1] - olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename) - self.olx_to_original_static_file_paths.web_resources[olx_static_path] = static_filename - html = ( - '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>' - '</head><body><p><img src="{}" alt="{}"></p></body></html>'.format(olx_static_path, static_filename) - ) - return "html", {"html": html} - elif "web_resources" not in str(res_filename): - olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=res_relative_path) - # This webcontent is outside of ``web_resources`` directory - # So we need to manually copy it to OLX_STATIC_DIR - self.olx_to_original_static_file_paths.extra[olx_static_path] = res_relative_path - html = ( - '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>' - '</head><body><p><a href="{}" alt="{}">{}<a></p></body></html>'.format( - olx_static_path, res_relative_path, res_relative_path - ) - ) - return "html", {"html": html} - else: - logger.info("Skipping webcontent: %s", res_filename) - return None, None - - # Match any of imswl_xmlv1p1, imswl_xmlv1p2 etc - elif re.match(r"^imswl_xmlv\d+p\d+$", res_type): - tree = filesystem.get_xml_tree(self._res_filename(res["children"][0].href)) - root = tree.getroot() - namespaces = { - "imswl_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imswl_v1p1", - "imswl_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imswl_v1p2", - "imswl_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3", - } - ns = {"wl": namespaces[res_type]} - title = root.find("wl:title", ns).text - url = root.find("wl:url", ns).get("href") - return "link", {"href": url, "text": title} - - # Match any of imsbasiclti_xmlv1p0, imsbasiclti_xmlv1p3 etc - elif re.match(r"^imsbasiclti_xmlv\d+p\d+$", res_type): - data = self._parse_lti(res) - # Canvas flavored courses have correct url in module meta for lti links - if self.is_canvas_flavor: - item_data = self.module_meta.get_external_tool_item_data(identifier) - if item_data: - data["launch_url"] = item_data.get("url", data["launch_url"]) - return "lti", data - - # Match any of imsqti_xmlv1p2/imscc_xmlv1p1/assessment, imsqti_xmlv1p3/imscc_xmlv1p3/assessment etc - elif re.match(r"^imsqti_xmlv\d+p\d+/imscc_xmlv\d+p\d+/assessment$", res_type): - res_filename = self._res_filename(res["children"][0].href) - qti_parser = QtiParser(res_filename) - return "qti", qti_parser.parse_qti() - - # Match any of imsdt_xmlv1p1, imsdt_xmlv1p2, imsdt_xmlv1p3 etc - elif re.match(r"^imsdt_xmlv\d+p\d+$", res_type): - data = self._parse_discussion(res, res_type) - return "discussion", data - - else: - text = f"Unimported content: type = {res_type!r}" - if "href" in res: - text += ", href = {!r}".format(res["href"]) - logger.info("%s", text) - return "html", {"html": text} - def load_manifest_extracted(self): manifest = self._extract() @@ -718,83 +617,3 @@ def _parse_dependency(self, node): def _parse_resource_metadata(self, node): # TODO: this return None - - def _res_filename(self, file_name): - return self.directory / file_name - - def _parse_lti(self, resource): - """ - Parses LTI resource. - """ - - tree = filesystem.get_xml_tree(self._res_filename(resource["children"][0].href)) - root = tree.getroot() - ns = { - "blti": "http://www.imsglobal.org/xsd/imsbasiclti_v1p0", - "lticp": "http://www.imsglobal.org/xsd/imslticp_v1p0", - "lticm": "http://www.imsglobal.org/xsd/imslticm_v1p0", - } - title = root.find("blti:title", ns).text - description = root.find("blti:description", ns).text - launch_url = root.find("blti:secure_launch_url", ns) - if launch_url is None: - launch_url = root.find("blti:launch_url", ns) - if launch_url is not None: - launch_url = launch_url.text - else: - launch_url = "" - width = root.find("blti:extensions/lticm:property[@name='selection_width']", ns) - if width is None: - width = "500" - else: - width = width.text - height = root.find("blti:extensions/lticm:property[@name='selection_height']", ns) - if height is None: - height = "500" - else: - height = height.text - custom = root.find("blti:custom", ns) - if custom is None: - parameters = dict() - else: - parameters = {option.get("name"): option.text for option in custom} - # For Canvas flavored CC, tool_id can be used as lti_id if present - tool_id = root.find("blti:extensions/lticm:property[@name='tool_id']", ns) - if tool_id is None: - # Create a simple slug lti_id from title - lti_id = simple_slug(title) - else: - lti_id = tool_id.text - data = { - "title": title, - "description": description, - "launch_url": launch_url, - "height": height, - "width": width, - "custom_parameters": parameters, - "lti_id": lti_id, - } - return data - - def _parse_discussion(self, res, res_type): - """ - Parses discussion content. - """ - - namespaces = { - "imsdt_xmlv1p1": "http://www.imsglobal.org/xsd/imsccv1p1/imsdt_v1p1", - "imsdt_xmlv1p2": "http://www.imsglobal.org/xsd/imsccv1p2/imsdt_v1p2", - "imsdt_xmlv1p3": "http://www.imsglobal.org/xsd/imsccv1p3/imsdt_v1p3", - } - - data = {"dependencies": []} - for child in res["children"]: - if isinstance(child, ResourceFile): - tree = filesystem.get_xml_tree(self._res_filename(child.href)) - root = tree.getroot() - ns = {"dt": namespaces[res_type]} - data["title"] = root.find("dt:title", ns).text - data["text"] = root.find("dt:text", ns).text - elif isinstance(child, ResourceDependency): - data["dependencies"].append(self.get_resource_content(child.identifierref)) - return data diff --git a/src/cc2olx/olx.py b/src/cc2olx/olx.py index 8cb285ee..b7b161c5 100644 --- a/src/cc2olx/olx.py +++ b/src/cc2olx/olx.py @@ -1,14 +1,15 @@ -import html as HTMLParser import json import logging -import re -import urllib import xml.dom.minidom -from lxml import html -from cc2olx.iframe_link_parser import KalturaIframeLinkParser +from typing import List, Type + +from django.conf import settings +from django.utils.module_loading import import_string -from cc2olx.qti import QtiExport -from cc2olx.utils import clean_from_cdata, element_builder, passport_file_parser +from cc2olx.content_processors import AbstractContentProcessor +from cc2olx.dataclasses import ContentProcessorContext +from cc2olx.iframe_link_parser import KalturaIframeLinkParser +from cc2olx.utils import passport_file_parser logger = logging.getLogger() @@ -28,25 +29,23 @@ class OlxExport: OLX guide: https://edx.readthedocs.io/projects/edx-open-learning-xml/en/latest/ """ - # content types - HTML = "html" - LINK = "link" - VIDEO = "video" - LTI = "lti" - QTI = "qti" - DISCUSSION = "discussion" - def __init__(self, cartridge, link_file=None, passport_file=None, relative_links_source=None): self.cartridge = cartridge self.doc = None self.link_file = link_file self.passport_file = passport_file self.relative_links_source = relative_links_source - self.iframe_link_parser = None - if link_file: - self.iframe_link_parser = KalturaIframeLinkParser(self.link_file) + self.iframe_link_parser = KalturaIframeLinkParser(self.link_file) if link_file else None self.lti_consumer_present = False self.lti_consumer_ids = set() + self._content_processor_types = self._load_content_processor_types() + + @staticmethod + def _load_content_processor_types() -> List[Type[AbstractContentProcessor]]: + """ + Load content processor types. + """ + return [import_string(processor_path) for processor_path in settings.CONTENT_PROCESSORS] def xml(self): self.doc = xml.dom.minidom.Document() @@ -108,7 +107,7 @@ def policy(self): lti_passports = self._get_lti_passport_list() - if self.lti_consumer_present: + if self.lti_consumer_ids: policy["course/course"]["advanced_modules"] = ["lti_consumer"] if len(lti_passports): @@ -157,8 +156,7 @@ def _add_olx_nodes(self, element, course_data, tags): leaf = not tags for element_data in course_data: if leaf: - content_type, details = self._get_content(element_data) - children = self._create_olx_nodes(content_type, details) + children = self._create_olx_nodes(element_data) else: children = [self.doc.createElement(tags[0])] @@ -175,146 +173,13 @@ def _add_olx_nodes(self, element, course_data, tags): if "children" in element_data: self._add_olx_nodes(child, element_data["children"], tags[1:]) - def _get_content(self, element_data): - """ - Gets content type and details from element's data. - """ - - content_type = None - details = None - - if "identifierref" in element_data: - idref = element_data["identifierref"] - content_type, details = self.cartridge.get_resource_content(idref) - - if content_type is None or not details: - content_type = self.HTML - details = { - "html": "<p>MISSING CONTENT</p>", - } - - if content_type == self.LINK: - content_type, details = process_link(details) - - return content_type, details - - def _process_static_links(self, html): - """ - Process static links like src and href to have appropriate links. - """ - items = re.findall(r'(src|href)\s*=\s*"(.+?)"', html) - - def process_wiki_reference(item, html): - """ - Replace $WIKI_REFERENCE$ with edx /jump_to_id/<url_name> - """ - search_key = urllib.parse.unquote(item).replace("$WIKI_REFERENCE$/pages/", "") - - # remove query params and add suffix .html to match with resource_id_by_href - search_key = search_key.split("?")[0] + ".html" - for key in self.cartridge.resource_id_by_href.keys(): - if key.endswith(search_key): - replace_with = "/jump_to_id/{}".format(self.cartridge.resource_id_by_href[key]) - html = html.replace(item, replace_with) - return html - logger.warn("Unable to process Wiki link - %s", item) - return html - - def process_canvas_reference(item, html): - """ - Replace $CANVAS_OBJECT_REFERENCE$ with edx /jump_to_id/<url_name> - """ - object_id = urllib.parse.unquote(item).replace("$CANVAS_OBJECT_REFERENCE$/quizzes/", "/jump_to_id/") - html = html.replace(item, object_id) - return html - - def process_ims_cc_filebase(item, html): - """ - Replace $IMS-CC-FILEBASE$ with /static - """ - new_item = urllib.parse.unquote(item).replace("$IMS-CC-FILEBASE$", "/static") - # skip query parameters for static files - new_item = new_item.split("?")[0] - # & is not valid in an URL. But some file seem to have it when it should be & - new_item = new_item.replace("&", "&") - html = html.replace(item, new_item) - return html - - def process_external_tools_link(item, html): - """ - Replace $CANVAS_OBJECT_REFERENCE$/external_tools/retrieve with appropriate external link - """ - external_tool_query = urllib.parse.urlparse(item).query - # unescape query that has been HTML encoded so it can be parsed correctly - unescaped_external_tool_query = HTMLParser.unescape(external_tool_query) - external_tool_url = urllib.parse.parse_qs(unescaped_external_tool_query).get("url", [""])[0] - html = html.replace(item, external_tool_url) - return html - - def process_relative_external_links(item, html): - """ - Turn static file URLs outside OLX_STATIC_DIR into absolute URLs. - - Allow to avoid a situation when the original course page links have - relative URLs, such URLs weren't included into the exported Common - Cartridge course file that causes broken URLs in the imported OeX - course. The function adds the origin source to URLs to make them - absolute ones. - """ - if self.relative_links_source is None or item in self.cartridge.olx_to_original_static_file_paths.all: - return html - - url = urllib.parse.urljoin(self.relative_links_source, item) - html = html.replace(item, url) - return html - - for _, item in items: - if "IMS-CC-FILEBASE" in item: - html = process_ims_cc_filebase(item, html) - elif "WIKI_REFERENCE" in item: - html = process_wiki_reference(item, html) - elif "external_tools" in item: - html = process_external_tools_link(item, html) - elif "CANVAS_OBJECT_REFERENCE" in item: - html = process_canvas_reference(item, html) - else: - html = process_relative_external_links(item, html) - - return html - - def _process_static_links_from_details(self, details): - """ - Take a variable and recursively find & escape all static links within strings - - Args: - self: self - details: A dictionary or list of dictionaries containing node data. - - Returns: - details: Returns detail data with static link - escaped to an OLX-friendly format. - """ - - if isinstance(details, str): - return self._process_static_links(details) - - if isinstance(details, list): - for index, value in enumerate(details): - details[index] = self._process_static_links_from_details(value) - elif isinstance(details, dict): - for key, value in details.items(): - details[key] = self._process_static_links_from_details(value) - - return details - - def _create_olx_nodes(self, content_type, details): + def _create_olx_nodes(self, element_data: dict) -> List["xml.dom.minidom.Element"]: """ This helps to create OLX node of different type. For eg HTML, VIDEO, QTI, LTI, Discussion. Args: - content_type ([str]): The type of node that has to be created. - details (Dict[str, str]): Dictionary of the element and content of the element. + element_data (dict): a normalized CC element data. Raises: OlxExportException: Exception when nodes are not able to be created. @@ -322,157 +187,17 @@ def _create_olx_nodes(self, content_type, details): Returns: [List]: List of OLX nodes that needs to be written. """ - - nodes = [] - details = self._process_static_links_from_details(details) - - if content_type == self.HTML: - nodes += self._process_html(details) - - elif content_type == self.VIDEO: - nodes += self._create_video_node(details) - - elif content_type == self.LTI: - # There is an LTI resource - # Add lti_consumer in policy with lti_passports - self.lti_consumer_present = True - self.lti_consumer_ids.add(details["lti_id"]) - nodes.append(self._create_lti_node(details)) - - elif content_type == self.QTI: - qti_export = QtiExport(self.doc) - nodes += qti_export.create_qti_node(details) - - elif content_type == self.DISCUSSION: - nodes += self._create_discussion_node(details) - - else: - raise OlxExportException(f'Content type "{content_type}" is not supported.') - - return nodes - - def _create_video_node(self, details): - """ - This function creates Video OLX nodes. - - Args: - details (Dict[str, str]): Dictionary that has Video tag value. - - Returns: - [OLX Element]: Video OLX element. - """ - xml_element = element_builder(self.doc) - attributes = {"youtube": "1.00:" + details["youtube"], "youtube_id_1_0": details["youtube"]} - child = xml_element("video", children=None, attributes=attributes) - return [child] - - def _process_html(self, details): - """ - This function helps to process the html and gives out - corresponding HTML or Video OLX nodes. - - Args: - details (Dict[str, str]): Dictionary that has HTML tag value. - - Returns: - List[OLX Element]: List of html/Video OLX element. - """ - video_olx = [] - nodes = [] - child = self.doc.createElement("html") - html = self._process_static_links(details["html"]) - if self.link_file: - html, video_olx = self._process_html_for_iframe(html) - html = clean_from_cdata(html) - txt = self.doc.createCDATASection(html) - child.appendChild(txt) - nodes.append(child) - for olx in video_olx: - nodes.append(olx) - return nodes - - def _process_html_for_iframe(self, html_str): - """ - This function helps to parse the iframe with - embedded video, to be converted into video xblock. - - Args: - html_str ([str]): Html file content. - - Returns: - html_str [str]: The html content of the file, if iframe is present - and converted into xblock then iframe is removed - from the HTML. - video_olx [List[xml]]: List of xml children, i.e video xblock. - """ - video_olx = [] - parsed_html = html.fromstring(html_str) - iframes = parsed_html.xpath("//iframe") - if not iframes: - return html_str, video_olx - video_olx, converted_iframes = self.iframe_link_parser.get_video_olx(self.doc, iframes) - if video_olx: - # If video xblock is present then we modify the HTML to remove the iframe - # hence we need to convert the modified HTML back to string. We also remove - # the parent if there are no other children. - for iframe in converted_iframes: - parent = iframe.getparent() - parent.remove(iframe) - if not parent.getchildren(): - parent.getparent().remove(parent) - return html.tostring(parsed_html).decode("utf-8"), video_olx - return html_str, video_olx - - def _create_lti_node(self, details): - node = self.doc.createElement("lti_consumer") - custom_parameters = "[{params}]".format( - params=", ".join( - [ - '"{key}={value}"'.format( - key=key, - value=value, - ) - for key, value in details["custom_parameters"].items() - ] - ), + idref = element_data.get("identifierref") + context = ContentProcessorContext( + iframe_link_parser=self.iframe_link_parser, + lti_consumer_ids=self.lti_consumer_ids, + relative_links_source=self.relative_links_source, ) - node.setAttribute("custom_parameters", custom_parameters) - node.setAttribute("description", details["description"]) - node.setAttribute("display_name", details["title"]) - node.setAttribute("inline_height", details["height"]) - node.setAttribute("inline_width", details["width"]) - node.setAttribute("launch_url", details["launch_url"]) - node.setAttribute("modal_height", details["height"]) - node.setAttribute("modal_width", details["width"]) - node.setAttribute("xblock-family", "xblock.v1") - node.setAttribute("lti_id", details["lti_id"]) - return node - - def _create_discussion_node(self, details): - node = self.doc.createElement("discussion") - node.setAttribute("display_name", "") - node.setAttribute("discussion_category", details["title"]) - node.setAttribute("discussion_target", details["title"]) - html_node = self.doc.createElement("html") - txt = "MISSING CONTENT" if details["text"] is None else details["text"] - txt = clean_from_cdata(txt) - txt = self.doc.createCDATASection(txt) - html_node.appendChild(txt) - return [html_node, node] - - -def process_link(details): - """ - Possibly convert a link to a video. - """ - # YouTube links can be like this: https://www.youtube.com/watch?v=gQ-cZRmHfs4&amp;list=PL5B350D511278A56B - ytmatch = re.search(r"youtube.com/watch\?v=([-\w]+)", details["href"]) - if ytmatch: - return "video", {"youtube": ytmatch.group(1)} + for processor_type in self._content_processor_types: + processor = processor_type(self.cartridge, context) - details = { - "html": "<a href='{}'>{}</a>".format(details["href"], details.get("text", "")), - } + if olx_nodes := processor.process(idref): + return olx_nodes - return "html", details + raise OlxExportException(f'The resource with "{idref}" identifier value is not supported.') diff --git a/src/cc2olx/parser.py b/src/cc2olx/parser.py new file mode 100644 index 00000000..72834748 --- /dev/null +++ b/src/cc2olx/parser.py @@ -0,0 +1,46 @@ +from pathlib import Path + +COMMON_CARTRIDGE_FILE_EXTENSION = ".imscc" + + +def _is_cartridge_file(path): + return path.is_file() and path.suffix == COMMON_CARTRIDGE_FILE_EXTENSION + + +def _get_files(parsed_args): + """ + Collects all Common Cartridge files from list of files and directories. + """ + + files = set() + + for path in parsed_args.inputs: + if not path.exists(): + raise FileNotFoundError + + if _is_cartridge_file(path): + files.add(path) + + if path.is_dir(): + for input_file in path.iterdir(): + if _is_cartridge_file(input_file): + files.add(input_file) + + return files + + +def parse_options(args): + """ + Parses script options from argparse arguments. + """ + input_files = _get_files(args) + + return { + "input_files": input_files, + "output_format": args.result, + "log_level": args.loglevel, + "workspace": Path.cwd() / args.output, + "link_file": args.link_file, + "passport_file": args.passport_file, + "relative_links_source": args.relative_links_source, + } diff --git a/src/cc2olx/qti.py b/src/cc2olx/qti.py deleted file mode 100644 index 444ab7ab..00000000 --- a/src/cc2olx/qti.py +++ /dev/null @@ -1,624 +0,0 @@ -import logging -import re -import urllib.parse -import xml.dom.minidom -from collections import OrderedDict -from html import unescape - -from lxml import etree, html - -from cc2olx import filesystem - -from .utils import element_builder - -logger = logging.getLogger() - -# problem types -MULTIPLE_CHOICE = "cc.multiple_choice.v0p1" -MULTIPLE_RESPONSE = "cc.multiple_response.v0p1" -FILL_IN_THE_BLANK = "cc.fib.v0p1" -ESSAY = "cc.essay.v0p1" -BOOLEAN = "cc.true_false.v0p1" -PATTERN_MATCH = "cc.pattern_match.v0p1" -RESPROCESSING_TYPES = ["general_fb", "correct_fb", "general_incorrect_fb"] - - -class QtiError(Exception): - """ - Exception type for Qti parsing/conversion errors. - """ - - -class QtiExport: - """ - Contains methods for processing and conversion - IMS Question & Test Interoperability (QTI) <= v1.2 into OLX markup - """ - - FIB_PROBLEM_TEXTLINE_SIZE_BUFFER = 10 - - def __init__(self, root_xml_doc): - self.doc = root_xml_doc - - def create_qti_node(self, details): - """ - Creates OLX xml node, that represents content of unit with problems. - - Args: - details: list of dictionaries, where each contains data to - render problem. - """ - - problems = [] - - for problem_data in details: - cc_profile = problem_data["cc_profile"] - create_problem = self._problem_creators_map.get(cc_profile) - - if create_problem is None: - raise QtiError('Unknown cc_profile: "{}"'.format(problem_data["cc_profile"])) - - problem = create_problem(problem_data) - - # sometimes we might want to have additional items from one cc item - if isinstance(problem, list) or isinstance(problem, tuple): - problems += problem - else: - problems.append(problem) - - return problems - - @property - def _problem_creators_map(self): - """ - Returns: mapping between Common Cartridge profile value and function - that creates actual problem node. - - Note: Since True/False problems in OLX are constructed identically to - OLX Multiple Choice problems, we reuse `_create_multiple_choice_problem` - for BOOLEAN type problems - """ - return { - MULTIPLE_CHOICE: self._create_multiple_choice_problem, - MULTIPLE_RESPONSE: self._create_multiple_response_problem, - FILL_IN_THE_BLANK: self._create_fib_problem, - ESSAY: self._create_essay_problem, - BOOLEAN: self._create_multiple_choice_problem, - PATTERN_MATCH: self._create_pattern_match_problem, - } - - def _create_problem_description(self, description_html_str): - """ - Material texts can come in form of escaped HTML markup, which - can't be considered as valid XML. ``xml.dom.minidom`` has no - features to convert HTML to XML, so we use lxml parser here. - - Args: - description_html_str: escaped HTML string - - Returns: instance of ``xml.dom.minidom.Node`` - """ - description_html_str = unescape(description_html_str) - - description_html_str = urllib.parse.unquote(description_html_str) - - element = html.fromstring(description_html_str) - xml_string = etree.tostring(element) - description = xml.dom.minidom.parseString(xml_string).firstChild - - return description - - def _add_choice(self, parent, is_correct, text): - """ - Appends choices to given ``checkboxgroup`` or ``choicegroup`` parent. - """ - choice = self.doc.createElement("choice") - choice.setAttribute("correct", "true" if is_correct else "false") - self._set_text(choice, text) - parent.appendChild(choice) - - def _set_text(self, node, new_text): - text_node = self.doc.createTextNode(new_text) - node.appendChild(text_node) - - def _create_multiple_choice_problem(self, problem_data): - """ - Creates XML node of problem. - """ - - problem = self.doc.createElement("problem") - problem_content = self.doc.createElement("multiplechoiceresponse") - - problem_description = self._create_problem_description(problem_data["problem_description"]) - - choice_group = self.doc.createElement("choicegroup") - choice_group.setAttribute("type", "MultipleChoice") - - for choice_data in problem_data["choices"].values(): - self._add_choice(choice_group, choice_data["correct"], choice_data["text"]) - - problem_content.appendChild(problem_description) - problem_content.appendChild(choice_group) - problem.appendChild(problem_content) - - return problem - - def _create_multiple_response_problem(self, problem_data): - """ - Create XML node for multiple response problem. Sets partial_credit to EDC by default. - """ - - el = element_builder(self.doc) - - problem_description = self._create_problem_description(problem_data["problem_description"]) - - # fmt: off - problem = el('problem', [ - el('choiceresponse', [ - - problem_description, - - el('checkboxgroup', [ - el('choice', - choice['text'], - {'correct': 'true' if choice['correct'] else 'false'} - ) - for choice in problem_data['choices'].values() - ], {'type': 'MultipleChoice'}) - - ], {'partial_credit': 'EDC'}) - ]) - # fmt: on - return problem - - def _create_fib_problem(self, problem_data): - """ - Creates XML node of fill in the blank problems - """ - - # Track maximum answer length for textline at the bottom - max_answer_length = 0 - - problem = self.doc.createElement("problem") - - # Set the primary answer on the stringresponse - # and set the type to case insensitive - problem_content = self.doc.createElement("stringresponse") - problem_content.setAttribute("answer", problem_data["answer"]) - problem_content.setAttribute("type", self._build_fib_problem_type(problem_data)) - - if len(problem_data["answer"]) > max_answer_length: - max_answer_length = len(problem_data["answer"]) - - problem_description = self._create_problem_description(problem_data["problem_description"]) - problem_content.appendChild(problem_description) - - # For any (optional) additional accepted answers, add an - # additional_answer element with that answer - for answer in problem_data.get("additional_answers", []): - additional_answer = self.doc.createElement("additional_answer") - additional_answer.setAttribute("answer", answer) - problem_content.appendChild(additional_answer) - - if len(answer) > max_answer_length: - max_answer_length = len(answer) - - # Add a textline element with the max answer length plus a buffer - textline = self.doc.createElement("textline") - textline.setAttribute("size", str(max_answer_length + self.FIB_PROBLEM_TEXTLINE_SIZE_BUFFER)) - problem_content.appendChild(textline) - - problem.appendChild(problem_content) - - return problem - - @staticmethod - def _build_fib_problem_type(problem_data): - """ - Build `stringresponse` OLX type for a fill in the blank problem. - """ - problem_types = ["ci"] - - if problem_data["is_regexp"]: - problem_types.append("regexp") - - return " ".join(problem_types) - - def _create_essay_problem(self, problem_data): - """ - Given parsed essay problem data, returns a openassessment component. If a sample - solution provided, returns that as a HTML block before openassessment. - """ - - description = problem_data["problem_description"] - - el = element_builder(self.doc) - - if any(key in RESPROCESSING_TYPES for key in problem_data.keys()): - resp_samples = [ - el("name", "Feedback"), - el("label", "Feedback"), - el("prompt", "Example Feedback"), - ] - - for desc, key in zip(["General", "Correct", "Incorrect"], RESPROCESSING_TYPES): - resp_samples.append( - el( - "option", - [el("name", desc), el("label", desc), el("explanation", problem_data.get(key, desc))], - {"points": "0"}, - ) - ) - criterion = el("criterion", resp_samples, {"feedback": "optional"}) - else: - criterion = el( - "criterion", - [ - el("name", "Ideas"), - el("label", "Ideas"), - el("prompt", "Example criterion"), - el( - "option", - [el("name", "Poor"), el("label", "Poor"), el("explanation", "Explanation")], - {"points": "0"}, - ), - el( - "option", - [el("name", "Good"), el("label", "Good"), el("explanation", "Explanation")], - {"points": "1"}, - ), - ], - {"feedback": "optional"}, - ) - - # fmt: off - ora = el( - 'openassessment', - [ - el('title', 'Open Response Assessment'), - el('assessments', [ - el( - 'assessment', - None, - attributes={'name': 'staff-assessment', 'required': 'True'} - ) - ]), - el('prompts', [ - el('prompt', [ - el('description', description) - ]) - ]), - el('rubric', [ - criterion, - el('feedbackprompt', 'Feedback prompt text'), - el('feedback_default_text', 'Feedback prompt default text'), - ]) - ], - { - 'url_name': problem_data['ident'], - 'text_response': 'required', - 'prompts_type': 'html' - } - ) - # fmt: on - - # if a sample solution exists add on top of ora, because - # olx doesn't have a sample solution equivalent. - if problem_data.get("sample_solution"): - child = el("html", self.doc.createCDATASection(problem_data["sample_solution"])) - return child, ora - - return ora - - def _create_pattern_match_problem(self, problem_data): - raise NotImplementedError - - -class QtiParser: - """ - Used to parse Qti xml resource. - """ - - # Xml namespaces - NS = {"qti": "http://www.imsglobal.org/xsd/ims_qtiasiv1p2"} - - def __init__(self, resource_filename): - self.resource_filename = resource_filename - - def parse_qti(self): - """ - Parses resource of ``imsqti_xmlv1p2/imscc_xmlv1p1/assessment`` type. - """ - - tree = filesystem.get_xml_tree(self.resource_filename) - root = tree.getroot() - - # qti xml can contain multiple problems represented by <item/> elements - problems = root.findall(".//qti:section/qti:item", self.NS) - - parsed_problems = [] - - for i, problem in enumerate(problems): - data = {} - - attributes = problem.attrib - - # We're adding unique string to identifier here to handle cases, - # when we're getting malformed course (due to a weird Canvas behaviour) - # with equal identifiers. LMS doesn't support blocks with the same identifiers. - data["ident"] = attributes["ident"] + str(i) - if title := attributes.get("title"): - data["title"] = title - - cc_profile = self._parse_problem_profile(problem) - data["cc_profile"] = cc_profile - - parse_problem = self._problem_parsers_map.get(cc_profile) - - if parse_problem is None: - raise QtiError(f'Unknown cc_profile: "{cc_profile}"') - - try: - data.update(parse_problem(problem)) - parsed_problems.append(data) - except NotImplementedError: - logger.info("Problem with ID %s can't be converted.", problem.attrib.get("ident")) - logger.info(" Profile %s is not supported.", cc_profile) - logger.info(" At file %s.", self.resource_filename) - - return parsed_problems - - def _parse_problem_profile(self, problem): - """ - Returns ``cc_profile`` value from problem metadata. This field is mandatory for problem, - so we throw exception if it's not present. - - Example of metadata structure: - ``` - <itemmetadata> - <qtimetadata> - <qtimetadatafield> - <fieldlabel>cc_profile</fieldlabel> - <fieldentry>cc.true_false.v0p1</fieldentry> - </qtimetadatafield> - </qtimetadata> - </itemmetadata> - ``` - """ - - metadata = problem.findall("qti:itemmetadata/qti:qtimetadata/qti:qtimetadatafield", self.NS) - - for field in metadata: - label = field.find("qti:fieldlabel", self.NS).text - entry = field.find("qti:fieldentry", self.NS).text - - if label == "cc_profile": - return entry - - raise ValueError('Problem metadata must contain "cc_profile" field.') - - @property - def _problem_parsers_map(self): - """ - Returns: mapping between Common Cartridge profile value and function - that parses actual problem node. - - Note: Since True/False problems in QTI are constructed identically to - QTI Multiple Choice problems, we reuse `_parse_multiple_choice_problem` - for BOOLEAN type problems - """ - return { - MULTIPLE_CHOICE: self._parse_multiple_choice_problem, - MULTIPLE_RESPONSE: self._parse_multiple_response_problem, - FILL_IN_THE_BLANK: self._parse_fib_problem, - ESSAY: self._parse_essay_problem, - BOOLEAN: self._parse_multiple_choice_problem, - PATTERN_MATCH: self._parse_pattern_match_problem, - } - - def _parse_fixed_answer_question_responses(self, presentation): - """ - Returns dictionary where keys are response identifiers and values are - response data. - - Example of ``<response_lid/>`` structure for the following profiles: - - ``cc.multiple_choice.v0p1`` - - ``cc.multiple_response.v0p1`` - - ``cc.true_false.v0p1`` - ``` - <response_lid ident="response1" rcardinality="Single"> - <render_choice> - <response_label ident="8157"> - <material> - <mattext texttype="text/plain">Response 1</mattext> - </material> - </response_label> - <response_label ident="4226"> - <material> - <mattext texttype="text/plain">Response 2</mattext> - </material> - </response_label> - </render_choice> - </response_lid> - ``` - """ - responses = OrderedDict() - - for response in presentation.findall("qti:response_lid/qti:render_choice/qti:response_label", self.NS): - response_id = response.attrib["ident"] - responses[response_id] = { - "text": response.find("qti:material/qti:mattext", self.NS).text or "", - "correct": False, - } - - return responses - - def _mark_correct_responses(self, resprocessing, responses): - """ - Example of ``<resprocessing/>`` structure for the following profiles: - - ``cc.multiple_choice.v0p1`` - - ``cc.true_false.v0p1`` - ``` - <resprocessing> - <outcomes> - <decvar maxvalue="100" minvalue="0" varname="SCORE" vartype="Decimal"/> - </outcomes> - <respcondition continue="Yes"> - <conditionvar> - <varequal respident="response1">8157</varequal> - </conditionvar> - <displayfeedback feedbacktype="Response" linkrefid="8157_fb"/> - </respcondition> - <respcondition continue="Yes"> - <conditionvar> - <varequal respident="response1">5534</varequal> - </conditionvar> - <displayfeedback feedbacktype="Response" linkrefid="5534_fb"/> - </respcondition> - <respcondition continue="No"> - <conditionvar> - <varequal respident="response1">4226</varequal> - </conditionvar> - <setvar action="Set" varname="SCORE">100</setvar> - <displayfeedback feedbacktype="Response" linkrefid="correct_fb"/> - </respcondition> - </resprocessing> - ``` - - This XML is a sort of instruction about how responses should be evaluated. In this - particular example we have three correct answers with ids: 8157, 5534, 4226. - - Example of ``<resprocessing/>`` structure for ``cc.multiple_response.v0p1``: - ``` - <resprocessing> - <outcomes> - <decvar maxvalue="100" minvalue="0" varname="SCORE" vartype="Decimal"/> - </outcomes> - <respcondition continue="No"> - <conditionvar> - <and> - <varequal respident="response1">1759</varequal> - <not> - <varequal respident="response1">5954</varequal> - </not> - <varequal respident="response1">8170</varequal> - <varequal respident="response1">9303</varequal> - <not> - <varequal respident="response1">15</varequal> - </not> - </and> - </conditionvar> - </respcondition> - </resprocessing> - ``` - Above example is for a multiple response type problem. In this example 1759, 8170 and - 9303 are correct answers while 15 and 5954 are not. Note that this code also support - ``or`` opearator too. - - For now, we just consider these responses correct in OLX, but according specification, - conditions can be arbitrarily nested, and score can be computed by some formula, so to - implement 100% conversion we need to write new XBlock. - """ - - for respcondition in resprocessing.findall("qti:respcondition", self.NS): - correct_answers = respcondition.findall("qti:conditionvar/qti:varequal", self.NS) - - if len(correct_answers) == 0: - correct_answers = respcondition.findall("qti:conditionvar/qti:and/qti:varequal", self.NS) - correct_answers += respcondition.findall("qti:conditionvar/qti:or/qti:varequal", self.NS) - - for ans in correct_answers: - responses[ans.text]["correct"] = True - - if respcondition.attrib.get("continue", "No") == "No": - break - - def _parse_multiple_choice_problem(self, problem): - """ - Returns ``problem_description``, ``choices`` and marks the correct answer - """ - data = {} - - presentation = problem.find("qti:presentation", self.NS) - resprocessing = problem.find("qti:resprocessing", self.NS) - - data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NS).text - - data["choices"] = self._parse_fixed_answer_question_responses(presentation) - self._mark_correct_responses(resprocessing, data["choices"]) - - return data - - def _parse_multiple_response_problem(self, problem): - """ - Returns ``problem_description``, ``choices`` and marks all the correct answers. - """ - return self._parse_multiple_choice_problem(problem) - - def _parse_fib_problem(self, problem): - """ - Returns ``problem_description``, ``answer``, and ``additional_answers`` - """ - data = {} - - presentation = problem.find("qti:presentation", self.NS) - resprocessing = problem.find("qti:resprocessing", self.NS) - - data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NS).text - - answers = [] - patterns = [] - for respcondition in resprocessing.findall("qti:respcondition", self.NS): - for varequal in respcondition.findall("qti:conditionvar/qti:varequal", self.NS): - answers.append(varequal.text) - - for varsubstring in respcondition.findall("qti:conditionvar/qti:varsubstring", self.NS): - patterns.append(varsubstring.text) - - if respcondition.attrib.get("continue", "No") == "No": - break - - data["is_regexp"] = bool(patterns) - if data["is_regexp"]: - data["answer"] = patterns.pop(0) - answers = [re.escape(answer) for answer in answers] - data["additional_answers"] = [*patterns, *answers] - else: - # Primary answer is the first one, additional answers are what is left - data["answer"] = answers.pop(0) - data["additional_answers"] = answers - - return data - - def _parse_essay_problem(self, problem): - """ - Parses `cc.essay.v0p1` problem type and returns dictionary with - presentation & sample solution if exists. - """ - - data = {} - presentation = problem.find("qti:presentation", self.NS) - itemfeedback = problem.find("qti:itemfeedback", self.NS) - solution = problem.find("qti:itemfeedback/qti:solution", self.NS) - - data["problem_description"] = presentation.find("qti:material/qti:mattext", self.NS).text - - if solution is not None: - sample_solution_selector = "qti:solutionmaterial//qti:material//qti:mattext" - data["sample_solution"] = solution.find(sample_solution_selector, self.NS).text - - if itemfeedback is not None: - for resp_type in RESPROCESSING_TYPES: - response_text = self._essay_response_processing(problem, resp_type) - if response_text: - data[resp_type] = response_text - return data - - def _essay_response_processing(self, problem, resp_type): - respconditions = problem.find("qti:resprocessing/qti:respcondition", self.NS) - if respconditions.find(f"qti:displayfeedback[@linkrefid='{resp_type}']", self.NS) is not None: - text_selector = f"qti:itemfeedback[@ident='{resp_type}']/qti:flow_mat/qti:material/qti:mattext" - return problem.find(text_selector, self.NS).text - - def _parse_pattern_match_problem(self, problem): - raise NotImplementedError diff --git a/src/cc2olx/settings.py b/src/cc2olx/settings.py index 5055a01c..f1225c56 100644 --- a/src/cc2olx/settings.py +++ b/src/cc2olx/settings.py @@ -1,52 +1,17 @@ from pathlib import Path -COMMON_CARTRIDGE_FILE_EXTENSION = ".imscc" +BASE_DIR = Path(__file__).resolve().parent +TEMPLATES_DIR = BASE_DIR / "templates" +LOG_FORMAT = "{%(filename)s:%(lineno)d} - %(message)s" -def _is_cartridge_file(path): - return path.is_file() and path.suffix == COMMON_CARTRIDGE_FILE_EXTENSION +CONTENT_PROCESSORS = [ + "cc2olx.content_processors.VideoContentProcessor", + "cc2olx.content_processors.LtiContentProcessor", + "cc2olx.content_processors.QtiContentProcessor", + "cc2olx.content_processors.DiscussionContentProcessor", + "cc2olx.content_processors.HtmlContentProcessor", +] - -def _get_files(parsed_args): - """ - Collects all Common Cartridge files from list of files and directories. - """ - - files = set() - - for path in parsed_args.inputs: - if not path.exists(): - raise FileNotFoundError - - if _is_cartridge_file(path): - files.add(path) - - if path.is_dir(): - for input_file in path.iterdir(): - if _is_cartridge_file(input_file): - files.add(input_file) - - return files - - -def collect_settings(parsed_args): - """ - Collects settings dictionary from argparse arguments. - """ - - input_files = _get_files(parsed_args) - log_level = parsed_args.loglevel - logging_config = { - "level": log_level, - "format": "{%(filename)s:%(lineno)d} - %(message)s", - } - settings = { - "input_files": input_files, - "output_format": parsed_args.result, - "logging_config": logging_config, - "workspace": Path.cwd() / parsed_args.output, - "link_file": parsed_args.link_file, - "passport_file": parsed_args.passport_file, - "relative_links_source": parsed_args.relative_links_source, - } - return settings +USE_I18N = False +USE_TZ = False From 253b223fcbd35a483cef5c5d229ef91d68b39af1 Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov <mykhailo.chernyshov@raccoongang.com> Date: Wed, 15 Jan 2025 12:29:04 +0200 Subject: [PATCH 4/7] test: [FC-0063] Content processors are tested --- pytest.ini | 2 +- tests/conftest.py | 46 ++--- .../imscc_file/web_link_content.xml | 2 +- .../studio_course_xml/course.xml | 26 ++- tests/test_content_parsers/__init__.py | 0 tests/test_content_parsers/test_html.py | 191 ++++++++++++++++++ tests/test_content_parsers/test_lti.py | 18 ++ tests/test_content_parsers/test_qti.py | 42 ++++ tests/test_content_parsers/test_video.py | 24 +++ tests/test_main.py | 27 ++- tests/test_models.py | 95 +-------- tests/test_olx.py | 185 +---------------- tests/test_olx_generators/__init__.py | 0 tests/test_olx_generators/test_discussion.py | 44 ++++ tests/test_olx_generators/test_html.py | 65 ++++++ tests/test_olx_generators/test_qti.py | 17 ++ tests/test_olx_generators/test_video.py | 14 ++ tests/{test_settings.py => test_options.py} | 13 +- 18 files changed, 488 insertions(+), 323 deletions(-) create mode 100644 tests/test_content_parsers/__init__.py create mode 100644 tests/test_content_parsers/test_html.py create mode 100644 tests/test_content_parsers/test_lti.py create mode 100644 tests/test_content_parsers/test_qti.py create mode 100644 tests/test_content_parsers/test_video.py create mode 100644 tests/test_olx_generators/__init__.py create mode 100644 tests/test_olx_generators/test_discussion.py create mode 100644 tests/test_olx_generators/test_html.py create mode 100644 tests/test_olx_generators/test_qti.py create mode 100644 tests/test_olx_generators/test_video.py rename tests/{test_settings.py => test_options.py} (53%) diff --git a/pytest.ini b/pytest.ini index 11c3a49d..179a37a5 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,3 @@ [pytest] usefixtures = chdir_to_workspace -DJANGO_SETTINGS_MODULE = cc2olx.django_settings +DJANGO_SETTINGS_MODULE = cc2olx.settings diff --git a/tests/conftest.py b/tests/conftest.py index 31b10605..d6a14a77 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,7 +4,6 @@ import shutil import zipfile -import xml.dom.minidom from pathlib import Path from tempfile import NamedTemporaryFile from xml.dom.minidom import parse @@ -13,8 +12,7 @@ from cc2olx.cli import parse_args from cc2olx.models import Cartridge -from cc2olx.olx import OlxExport -from cc2olx.settings import collect_settings +from cc2olx.parser import parse_options @pytest.fixture(scope="session") @@ -78,30 +76,38 @@ def studio_course_xml(fixtures_data_dir): return parse(course_xml_filename).toprettyxml() +@pytest.fixture(scope="session") +def relative_links_source() -> str: + """ + Provide a relative links source. + """ + return "https://relative.source.domain" + + @pytest.fixture -def settings(imscc_file, link_map_csv): +def options(imscc_file, link_map_csv, relative_links_source): """ - Basic settings fixture. + Basic options fixture. """ - parsed_args = parse_args(["-i", str(imscc_file), "-f", str(link_map_csv)]) + args = parse_args(["-i", str(imscc_file), "-f", str(link_map_csv), "-s", relative_links_source]) - _settings = collect_settings(parsed_args) + options = parse_options(args) - yield _settings + yield options - shutil.rmtree(_settings["workspace"], ignore_errors=True) + shutil.rmtree(options["workspace"], ignore_errors=True) @pytest.fixture -def cartridge(imscc_file, settings): - cartridge = Cartridge(imscc_file, settings["workspace"]) +def cartridge(imscc_file, options): + cartridge = Cartridge(imscc_file, options["workspace"]) cartridge.load_manifest_extracted() cartridge.normalize() yield cartridge - shutil.rmtree(str(settings["workspace"] / imscc_file.stem)) + shutil.rmtree(str(options["workspace"] / imscc_file.stem)) @pytest.fixture(scope="session") @@ -289,19 +295,3 @@ def expected_cleaned_cdata_containing_html(fixtures_data_dir: Path) -> str: """ html_without_cdata_path = fixtures_data_dir / "html_files/cleaned-cdata-containing-html.html" return html_without_cdata_path.read_text() - - -@pytest.fixture -def bare_olx_exporter(cartridge: Cartridge) -> OlxExport: - """ - Provides bare OLX exporter. - - Args: - cartridge (Cartridge): Cartridge class instance. - - Returns: - OlxExport: OlxExport instance. - """ - olx_exporter = OlxExport(cartridge) - olx_exporter.doc = xml.dom.minidom.Document() - return olx_exporter diff --git a/tests/fixtures_data/imscc_file/web_link_content.xml b/tests/fixtures_data/imscc_file/web_link_content.xml index 7d6b1880..d7a1ef83 100644 --- a/tests/fixtures_data/imscc_file/web_link_content.xml +++ b/tests/fixtures_data/imscc_file/web_link_content.xml @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> <webLink xmlns="http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3 http://www.imsglobal.org/profile/cc/ccv1p3/ccv1p3_imswl_v1p3.xsd"> <title>Web Link Content - + diff --git a/tests/fixtures_data/studio_course_xml/course.xml b/tests/fixtures_data/studio_course_xml/course.xml index f494f616..f0162820 100644 --- a/tests/fixtures_data/studio_course_xml/course.xml +++ b/tests/fixtures_data/studio_course_xml/course.xml @@ -152,7 +152,17 @@ -

elearning.png

]]> + + + + + +

+ elearning.png +

+ + +]]>
@@ -227,10 +237,20 @@ -

extra_files/example.pdf

]]> + + + + + +

+ extra_files/example.pdf +

+ + +]]> - Web Link Content]]> + Web Link Content]]> diff --git a/tests/test_content_parsers/__init__.py b/tests/test_content_parsers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_content_parsers/test_html.py b/tests/test_content_parsers/test_html.py new file mode 100644 index 00000000..bfbc51c1 --- /dev/null +++ b/tests/test_content_parsers/test_html.py @@ -0,0 +1,191 @@ +from pathlib import Path +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from cc2olx.content_parsers import HtmlContentParser + + +class TestHtmlContentParser: + def test_parse_content_returns_default_content_if_there_is_no_resource_identifier(self): + parser = HtmlContentParser(Mock(), Mock()) + expected_content = {"html": "

MISSING CONTENT

"} + + actual_content = parser._parse_content(None) + + assert actual_content == expected_content + + def test_parse_content_returns_default_content_if_the_resource_is_missed_in_cartridge(self): + cartridge_mock = Mock(define_resource=Mock(return_value=None)) + parser = HtmlContentParser(cartridge_mock, Mock()) + expected_content = {"html": "

MISSING CONTENT

"} + + actual_content = parser._parse_content(Mock()) + + assert actual_content == expected_content + + @patch("cc2olx.content_parsers.html.logger") + def test_parse_content_logs_missing_resource(self, logger_mock): + cartridge_mock = Mock(define_resource=Mock(return_value=None)) + parser = HtmlContentParser(cartridge_mock, Mock()) + idref_mock = Mock() + + parser._parse_content(idref_mock) + + logger_mock.info.assert_called_once_with("Missing resource: %s", idref_mock) + + @patch("cc2olx.content_parsers.html.HtmlContentParser._parse_web_link_content", Mock(return_value=None)) + @patch("cc2olx.content_parsers.html.HtmlContentParser.is_known_unprocessed_resource_type", Mock(return_value=True)) + def test_parse_content_returns_default_content_for_known_unprocessed_resource_types(self): + parser = HtmlContentParser(MagicMock(), Mock()) + expected_content = {"html": "

MISSING CONTENT

"} + + actual_content = parser._parse_content(Mock()) + + assert actual_content == expected_content + + @pytest.mark.parametrize( + "resource_type", + [ + "imsbasiclti_xmlv1p2", + "imsbasiclti_xmlv1p3", + "imsqti_xmlv1p3/imscc_xmlv1p1/assessment", + "imsqti_xmlv1p3/imscc_xmlv1p3/assessment", + "imsdt_xmlv1p2", + "imsdt_xmlv1p3", + ], + ) + def test_known_unprocessed_resource_types_is_detected(self, resource_type): + parser = HtmlContentParser(Mock(), Mock()) + + assert parser.is_known_unprocessed_resource_type(resource_type) is True + + @pytest.mark.parametrize("resource_type", ["imsbasicabc_xmlv1p2", "imsexample_xmlv1p3", "not_cc_type", "imsscorm"]) + def test_not_known_unprocessed_resource_types_is_detected(self, resource_type): + parser = HtmlContentParser(Mock(), Mock()) + + assert parser.is_known_unprocessed_resource_type(resource_type) is False + + @pytest.mark.parametrize( + "resource_type", + ["unsupported_resource_type", "chess_game_xmlv1p1", "drag_and_drop_xmlv1p1", "imsab_xmlv1p2"], + ) + @patch("cc2olx.content_parsers.html.HtmlContentParser._parse_web_link_content", Mock(return_value=None)) + @patch("cc2olx.content_parsers.html.HtmlContentParser._parse_not_imported_content") + def test_parse_content_parses_not_imported_content(self, parse_not_imported_content_mock, resource_type): + cartridge_mock = Mock(define_resource=Mock(return_value={"type": "imsqti_xmlv1p2"})) + parser = HtmlContentParser(cartridge_mock, Mock()) + + actual_content = parser._parse_content(Mock()) + + assert actual_content == parse_not_imported_content_mock.return_value + + @patch("cc2olx.content_parsers.html.imghdr.what", Mock(return_value=None)) + def test_parse_webcontent_returns_default_content_for_unknown_webcontent_type_from_web_resources_dir(self): + parser = HtmlContentParser( + Mock(build_resource_file_path=Mock(return_value=Path("web_resources/unknown/path/to/file.ext"))), + Mock(), + ) + expected_content = {"html": "

MISSING CONTENT

"} + + actual_content = parser._parse_webcontent(Mock(), MagicMock()) + + assert actual_content == expected_content + + @patch("cc2olx.content_parsers.html.logger") + @patch("cc2olx.content_parsers.html.imghdr.what", Mock(return_value=None)) + def test_parse_webcontent_logs_skipping_webcontent(self, logger_mock): + resource_file_path = Path("web_resources/unknown/path/to/file.ext") + parser = HtmlContentParser(Mock(build_resource_file_path=Mock(return_value=resource_file_path)), Mock()) + + parser._parse_webcontent(Mock(), MagicMock()) + + logger_mock.info.assert_called_once_with("Skipping webcontent: %s", resource_file_path) + + @patch("cc2olx.content_parsers.html.logger") + @patch("cc2olx.content_parsers.html.open", Mock(side_effect=FileNotFoundError)) + def test_webcontent_html_file_reading_failure_is_logged(self, logger_mock): + parser = HtmlContentParser(Mock(), Mock()) + idref_mock = Mock() + resource_file_path_mock = Mock() + + with pytest.raises(FileNotFoundError): + parser._parse_webcontent_html_file(idref_mock, resource_file_path_mock) + + logger_mock.error.assert_called_once_with("Failure reading %s from id %s", resource_file_path_mock, idref_mock) + + @pytest.mark.parametrize( + "resource,message", + [ + ( + {"type": "some_type_mock", "href": "https://example.com/some/type/link/"}, + "Not imported content: type = 'some_type_mock', href = 'https://example.com/some/type/link/'", + ), + ({"type": "some_type_mock"}, "Not imported content: type = 'some_type_mock'"), + ], + ) + @patch("cc2olx.content_parsers.html.logger") + def test_not_imported_content_parsing_with_href_in_resource(self, logger_mock, resource, message): + parser = HtmlContentParser(Mock(), Mock()) + expected_content = {"html": message} + + actual_content = parser._parse_not_imported_content(resource) + + logger_mock.info.assert_called_once_with("%s", message) + assert actual_content == expected_content + + def test_parsing_results(self, cartridge): + parser = HtmlContentParser(cartridge, Mock()) + + assert parser.parse("resource_1_course") == { + "html": "Not imported content: type = 'associatedcontent/imscc_xmlv1p1/learning-application-resource', " + "href = 'course_settings/canvas_export.txt'" + } + + assert parser.parse("resource_3_vertical") == { + "html": '\n\n\n' + "Vertical\n" + '\n' + '\n' + '\n' + "\n\n" + 'fractal.jpg\n' + "

Fractal Image Fractal Image

\n' + "\n\n" + } + + assert parser.parse("resource_6_wiki_content") == { + "html": '\n\n\n' + "Vertical\n" + '\n' + '\n' + '\n' + "\n\n" + '

Lorem ipsum...

\nWiki Content' + "\n\n\n" + } + + assert parser.parse("resource_7_canvas_content") == { + "html": '\n\n\n' + "Vertical\n" + '\n' + '\n' + '\n' + "\n\n" + '

Lorem ipsum...

\nCanvas Content' + "\n\n\n" + } + + assert parser.parse("resource_module-|-introduction") == { + "html": '\n\n\n' + "Vertical\n" + '\n' + '\n' + '\n' + "\n\n" + '

Lorem ipsum...

\nWiki Content' + "\n\n\n" + } diff --git a/tests/test_content_parsers/test_lti.py b/tests/test_content_parsers/test_lti.py new file mode 100644 index 00000000..fc55841e --- /dev/null +++ b/tests/test_content_parsers/test_lti.py @@ -0,0 +1,18 @@ +from unittest.mock import Mock + +from cc2olx.content_parsers import LtiContentParser + + +class TestLtiContentParser: + def test_parsing_results(self, cartridge): + parser = LtiContentParser(cartridge, Mock()) + + assert parser.parse("resource_2_lti") == { + "title": "Learning Tools Interoperability", + "description": "https://www.imsglobal.org/activity/learning-tools-interoperability", + "launch_url": "https://lti.local/launch", + "height": "500", + "width": "500", + "custom_parameters": {}, + "lti_id": "learning_tools_interoperability", + } diff --git a/tests/test_content_parsers/test_qti.py b/tests/test_content_parsers/test_qti.py new file mode 100644 index 00000000..d27e33c7 --- /dev/null +++ b/tests/test_content_parsers/test_qti.py @@ -0,0 +1,42 @@ +from unittest.mock import MagicMock, Mock, PropertyMock, call, patch + +import pytest + +from cc2olx.content_parsers import QtiContentParser +from cc2olx.exceptions import QtiError + + +class TestQtiContentParser: + @pytest.mark.parametrize("cc_profile", ["unknown_profile", "cc.chess.v0p1", "cc.drag_and_drop.v0p1", "123"]) + def test_parse_problem_raises_qti_error_if_cc_profile_is_unknown(self, cc_profile): + parser = QtiContentParser(Mock(), Mock()) + problem_mock = MagicMock(profile=cc_profile) + + with pytest.raises(QtiError) as exc_info: + parser._parse_problem(problem_mock, Mock(), Mock()) + + assert str(exc_info.value) == f'Unknown cc_profile: "{cc_profile}"' + + @patch("cc2olx.content_parsers.qti.logger") + def test_parse_problem_logs_inability_to_process_problem(self, logger_mock): + parser = QtiContentParser(Mock(), Mock()) + ident_mock = MagicMock() + resource_file_path_mock = Mock() + cc_profile_mock = Mock() + problem_mock = Mock(profile=cc_profile_mock, attrib={"ident": ident_mock}) + expected_logger_info_call_args_list = [ + call("Problem with ID %s can't be converted.", ident_mock), + call(" Profile %s is not supported.", cc_profile_mock), + call(" At file %s.", resource_file_path_mock), + ] + + with patch( + "cc2olx.content_parsers.qti.QtiContentParser._problem_parsers_map", + new_callable=PropertyMock, + ) as problem_parsers_map_mock: + problem_parsers_map_mock.return_value = {cc_profile_mock: Mock(side_effect=NotImplementedError)} + + parser._parse_problem(problem_mock, Mock(), resource_file_path_mock) + + assert logger_mock.info.call_count == 3 + assert logger_mock.info.call_args_list == expected_logger_info_call_args_list diff --git a/tests/test_content_parsers/test_video.py b/tests/test_content_parsers/test_video.py new file mode 100644 index 00000000..f77d8b30 --- /dev/null +++ b/tests/test_content_parsers/test_video.py @@ -0,0 +1,24 @@ +from unittest.mock import Mock, patch + +from cc2olx.content_parsers import VideoContentParser + + +class TestVideoContentParser: + def test_parse_content_returns_none_if_there_is_no_resource_identifier(self): + parser = VideoContentParser(Mock(), Mock()) + + actual_content = parser._parse_content(None) + + assert actual_content is None + + @patch( + "cc2olx.content_parsers.video.VideoContentParser._parse_web_link_content", + Mock(return_value={"href": "youtube.com/watch?v=ABCDeF12345"}), + ) + def test_parse_content_parses_youtube_link(self): + parser = VideoContentParser(Mock(), Mock()) + expected_content = {"youtube": "ABCDeF12345"} + + actual_content = parser._parse_content(Mock()) + + assert actual_content == expected_content diff --git a/tests/test_main.py b/tests/test_main.py index 69d88842..f1066a23 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -5,14 +5,19 @@ from .utils import format_xml -def test_convert_one_file(settings, imscc_file, studio_course_xml): +def test_convert_one_file(options, imscc_file, studio_course_xml): """ Tests, that ``convert_one_file`` call for ``imscc`` file results in tar.gz archive with olx course. """ expected_tgz_members_num = 7 - convert_one_file(imscc_file, settings["workspace"], settings["link_file"]) + convert_one_file( + imscc_file, + options["workspace"], + options["link_file"], + relative_links_source=options["relative_links_source"], + ) tgz_path = str((imscc_file.parent / "output" / imscc_file.stem).with_suffix(".tar.gz")) @@ -28,36 +33,36 @@ def test_convert_one_file(settings, imscc_file, studio_course_xml): break -def test_main(mocker, imscc_file, settings): +def test_main(mocker, imscc_file, options): """ Tests, that invocation of main function results in converted ``.imscc`` file. """ mocker.patch("cc2olx.main.parse_args") - mocker.patch("cc2olx.main.collect_settings", return_value=settings) + mocker.patch("cc2olx.main.parse_options", return_value=options) main() # workspace has been created - assert settings["workspace"].exists() + assert options["workspace"].exists() # content of imscc has been extracted - assert (settings["workspace"] / imscc_file.stem).exists() + assert (options["workspace"] / imscc_file.stem).exists() # archived olx course has been generated - assert (settings["workspace"] / imscc_file.stem).with_suffix(".tar.gz").exists() + assert (options["workspace"] / imscc_file.stem).with_suffix(".tar.gz").exists() -def test_main_zip_output(mocker, settings): +def test_main_zip_output(mocker, options): """ Tests, that ``--result zip`` cli option works fine. """ - settings["output_format"] = RESULT_TYPE_ZIP + options["output_format"] = RESULT_TYPE_ZIP mocker.patch("cc2olx.main.parse_args") - mocker.patch("cc2olx.main.collect_settings", return_value=settings) + mocker.patch("cc2olx.main.parse_options", return_value=options) main() - assert settings["workspace"].with_suffix(".zip").exists() + assert options["workspace"].with_suffix(".zip").exists() diff --git a/tests/test_models.py b/tests/test_models.py index 0b26b07d..fab6e07d 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -5,12 +5,12 @@ from cc2olx.models import Cartridge, ResourceFile -def test_cartridge_initialize(imscc_file, settings): +def test_cartridge_initialize(imscc_file, options): """ Tests, that ``Cartridge`` initializes without errors. """ - cartridge = Cartridge(imscc_file, settings["workspace"]) + cartridge = Cartridge(imscc_file, options["workspace"]) assert cartridge.normalized is None assert cartridge.resources is None @@ -19,12 +19,12 @@ def test_cartridge_initialize(imscc_file, settings): assert cartridge.file_path == imscc_file -def test_load_manifest_extracted(imscc_file, settings, temp_workspace_dir): +def test_load_manifest_extracted(imscc_file, options, temp_workspace_dir): """ Tests, that all resources and metadata are loaded fine. """ - cartridge = Cartridge(imscc_file, settings["workspace"]) + cartridge = Cartridge(imscc_file, options["workspace"]) cartridge.load_manifest_extracted() cartridge_version = "1.3.0" @@ -42,8 +42,8 @@ def test_load_manifest_extracted(imscc_file, settings, temp_workspace_dir): assert isinstance(cartridge.resources[0]["children"][0], ResourceFile) -def test_cartridge_normalize(imscc_file, settings): - cartridge = Cartridge(imscc_file, settings["workspace"]) +def test_cartridge_normalize(imscc_file, options): + cartridge = Cartridge(imscc_file, options["workspace"]) cartridge.load_manifest_extracted() cartridge.normalize() @@ -299,86 +299,3 @@ def test_cartridge_normalize(imscc_file, settings): "identifier": "org_1", "structure": "rooted-hierarchy", } - - -def test_cartridge_get_resource_content(cartridge): - assert cartridge.get_resource_content("resource_1_course") == ( - "html", - { - "html": "Unimported content: type = 'associatedcontent/imscc_xmlv1p1/learning-application-resource', " - "href = 'course_settings/canvas_export.txt'" - }, - ) - - assert cartridge.get_resource_content("resource_2_lti") == ( - "lti", - { - "title": "Learning Tools Interoperability", - "description": "https://www.imsglobal.org/activity/learning-tools-interoperability", - "launch_url": "https://lti.local/launch", - "height": "500", - "width": "500", - "custom_parameters": {}, - "lti_id": "learning_tools_interoperability", - }, - ) - - assert cartridge.get_resource_content("resource_3_vertical") == ( - "html", - { - "html": '\n\n\n' - "Vertical\n" - '\n' - '\n' - '\n' - "\n\n" - 'fractal.jpg\n' - "

Fractal Image Fractal Image

\n' - "\n\n" - }, - ) - - assert cartridge.get_resource_content("resource_6_wiki_content") == ( - "html", - { - "html": '\n\n\n' - "Vertical\n" - '\n' - '\n' - '\n' - "\n\n" - '

Lorem ipsum...

\nWiki Content' - "\n\n\n" - }, - ) - - assert cartridge.get_resource_content("resource_7_canvas_content") == ( - "html", - { - "html": '\n\n\n' - "Vertical\n" - '\n' - '\n' - '\n' - "\n\n" - '

Lorem ipsum...

\nCanvas Content' - "\n\n\n" - }, - ) - - assert cartridge.get_resource_content("resource_module-|-introduction") == ( - "html", - { - "html": '\n\n\n' - "Vertical\n" - '\n' - '\n' - '\n' - "\n\n" - '

Lorem ipsum...

\nWiki Content' - "\n\n\n" - }, - ) diff --git a/tests/test_olx.py b/tests/test_olx.py index a35d67c6..792f804f 100644 --- a/tests/test_olx.py +++ b/tests/test_olx.py @@ -1,16 +1,12 @@ import json -from unittest.mock import Mock - -import lxml import xml.dom.minidom from cc2olx import olx - from .utils import format_xml -def test_olx_export_xml(cartridge, link_map_csv, studio_course_xml): - xml = olx.OlxExport(cartridge, link_map_csv).xml() +def test_olx_export_xml(cartridge, link_map_csv, studio_course_xml, relative_links_source): + xml = olx.OlxExport(cartridge, link_map_csv, relative_links_source=relative_links_source).xml() assert format_xml(xml) == format_xml(studio_course_xml) @@ -25,132 +21,6 @@ def test_olx_export_wiki_page_disabled(cartridge, link_map_csv, studio_course_xm assert tab["is_hidden"] -def test_process_link(): - details = {"href": "https://example.com/path"} - details_with_youtube_link = {"href": "https://www.youtube.com/watch?v=gQ-cZRmHfs4&amp;list=PL5B350D511278A56B"} - - assert olx.process_link(details) == ( - "html", - {"html": "".format(details["href"])}, - ) - - assert olx.process_link(details_with_youtube_link) == ( - "video", - {"youtube": "gQ-cZRmHfs4"}, - ) - - -class TestOlXExporeterHTMLProcessing: - """ - Test the OLX exporter for HTML parsing flow. - """ - - def test_html_cleaning_from_cdata( - self, - mocker, - bare_olx_exporter, - cdata_containing_html, - expected_cleaned_cdata_containing_html, - ): - """ - Test that CDATA cleaning function is called during HTML processing. - - Args: - mocker (MockerFixture): MockerFixture instance. - bare_olx_exporter (OlxExport): bare OLX exporter. - cdata_containing_html (str): HTML that contains CDATA tags. - expected_cleaned_cdata_containing_html (str): Expected HTML after - successful cleaning. - """ - details = {"html": cdata_containing_html} - - clean_from_cdata_mock = mocker.patch( - "cc2olx.olx.clean_from_cdata", - return_value=expected_cleaned_cdata_containing_html, - ) - - bare_olx_exporter._process_html(details) - - clean_from_cdata_mock.assert_called_once() - - def test_processed_html_content_is_wrapped_into_cdata(self, bare_olx_exporter, cdata_containing_html): - """ - Test that processed HTML content is wrapped into CDATA section. - - Args: - bare_olx_exporter (OlxExport): bare OLX exporter. - cdata_containing_html (str): HTML that contains CDATA tags. - """ - details = {"html": cdata_containing_html} - - result_html, *__ = bare_olx_exporter._process_html(details) - - assert isinstance(result_html.childNodes[0], xml.dom.minidom.CDATASection) - - -class TestOlXExporeterIframeParser: - """ - Test the olx exporter for iframe link parsing flow - """ - - def _get_oxl_exporter(self, cartridge, link_map_csv): - """ - Helper function to create olx exporter. - - Args: - cartridge ([Cartridge]): Cartridge class instance. - link_map_csv ([str]): Csv file path. - - Returns: - [OlxExport]: OlxExport instance. - """ - olx_exporter = olx.OlxExport(cartridge, link_file=link_map_csv) - olx_exporter.doc = xml.dom.minidom.Document() - return olx_exporter - - def test_process_html_for_iframe_video_blocks(self, cartridge, link_map_csv, iframe_content): - """ - Test if the iframe is getting parsed and video blocks being generated. - - Args: - cartridge ([Cartridge]): Cartridge class instance. - link_map_csv ([str]): Csv file path. - iframe_content ([str]): Html file content. - """ - olx_exporter = self._get_oxl_exporter(cartridge, link_map_csv) - _, video_olx = olx_exporter._process_html_for_iframe(iframe_content) - assert len(video_olx) == 1 - - def test_process_html_for_iframe_html_removed(self, cartridge, link_map_csv, iframe_content): - """ - Test if iframe is removed from html. - - Args: - cartridge ([Cartridge]): Cartridge class instance. - link_map_csv ([str]): Csv file path. - iframe_content ([str]): Html file content. - """ - olx_exporter = self._get_oxl_exporter(cartridge, link_map_csv) - html_str, _ = olx_exporter._process_html_for_iframe(iframe_content) - html = lxml.html.fromstring(html_str) - iframe = html.xpath("//iframe") - assert len(iframe) == 0 - - def test_create_olx_nodes(self, cartridge, link_map_csv, iframe_content): - """ - Test create olx nodes with html content. - - Args: - cartridge ([Cartridge]): Cartridge class instance. - link_map_csv ([str]): Csv file path. - iframe_content ([str]): Html file content. - """ - olx_exporter = self._get_oxl_exporter(cartridge, link_map_csv) - nodes = olx_exporter._create_olx_nodes("html", {"html": iframe_content}) - # Html xblock and video xblock - assert len(nodes) == 2 - - class TestOlxExporterLtiPolicy: def _get_oxl_exporter(self, cartridge, passports_csv): """ @@ -167,11 +37,10 @@ def _get_oxl_exporter(self, cartridge, passports_csv): olx_exporter.doc = xml.dom.minidom.Document() return olx_exporter - def test_lti_consumer_present_set_to_true(self, cartridge, passports_csv): + def test_lti_consumer_ids_are_defined(self, cartridge, passports_csv): olx_exporter = self._get_oxl_exporter(cartridge, passports_csv) _ = olx_exporter.xml() - assert olx_exporter.lti_consumer_present is True assert olx_exporter.lti_consumer_ids == {"external_tool_lti", "learning_tools_interoperability"} def test_policy_contains_advanced_module(self, cartridge, passports_csv, caplog): @@ -193,51 +62,3 @@ def test_policy_contains_advanced_module(self, cartridge, passports_csv, caplog) assert ["Missing LTI Passport for learning_tools_interoperability. Using default."] == [ rec.message for rec in caplog.records ] - - -class TestDiscussionParsing: - """ - Test the OLX exporter for discussion parsing flow. - """ - - def test_discussion_content_cleaning_from_cdata( - self, - mocker, - bare_olx_exporter, - cdata_containing_html, - expected_cleaned_cdata_containing_html, - ): - """ - Test that CDATA cleaning function is called during discussion parsing. - - Args: - mocker (MockerFixture): MockerFixture instance. - bare_olx_exporter (OlxExport): bare OLX exporter. - cdata_containing_html (str): HTML that contains CDATA tags. - expected_cleaned_cdata_containing_html (str): Expected HTML after - successful cleaning. - """ - details = {"dependencies": [], "title": Mock(), "text": cdata_containing_html} - - clean_from_cdata_mock = mocker.patch( - "cc2olx.olx.clean_from_cdata", - return_value=expected_cleaned_cdata_containing_html, - ) - - bare_olx_exporter._create_discussion_node(details) - - clean_from_cdata_mock.assert_called_once() - - def test_discussion_decription_is_wrapped_into_cdata(self, bare_olx_exporter, cdata_containing_html): - """ - Test that processed HTML content is wrapped into CDATA section. - - Args: - bare_olx_exporter (OlxExport): bare OLX exporter. - cdata_containing_html (str): HTML that contains CDATA tags. - """ - details = {"dependencies": [], "title": Mock(), "text": cdata_containing_html} - - discussion_decription_html, __ = bare_olx_exporter._create_discussion_node(details) - - assert isinstance(discussion_decription_html.childNodes[0], xml.dom.minidom.CDATASection) diff --git a/tests/test_olx_generators/__init__.py b/tests/test_olx_generators/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_olx_generators/test_discussion.py b/tests/test_olx_generators/test_discussion.py new file mode 100644 index 00000000..fedc1146 --- /dev/null +++ b/tests/test_olx_generators/test_discussion.py @@ -0,0 +1,44 @@ +import xml.dom.minidom +from unittest.mock import Mock, patch + +from cc2olx.olx_generators import DiscussionOlxGenerator + + +class TestDiscussionOlxGenerator: + def test_discussion_content_cleaning_from_cdata( + self, + cdata_containing_html, + expected_cleaned_cdata_containing_html, + ): + """ + Test that CDATA cleaning function is called during discussion parsing. + + Args: + cdata_containing_html (str): HTML that contains CDATA tags. + expected_cleaned_cdata_containing_html (str): Expected HTML after + successful cleaning. + """ + generator = DiscussionOlxGenerator(Mock()) + content = {"dependencies": [], "title": Mock(), "text": cdata_containing_html} + + with patch( + "cc2olx.olx_generators.discussion.clean_from_cdata", + return_value=expected_cleaned_cdata_containing_html, + ) as clean_from_cdata_mock: + generator.create_nodes(content) + + clean_from_cdata_mock.assert_called_once() + + def test_discussion_description_is_wrapped_into_cdata(self, cdata_containing_html): + """ + Test that processed HTML content is wrapped into CDATA section. + + Args: + cdata_containing_html (str): HTML that contains CDATA tags. + """ + generator = DiscussionOlxGenerator(Mock()) + content = {"dependencies": [], "title": Mock(), "text": cdata_containing_html} + + discussion_description_html, __ = generator.create_nodes(content) + + assert isinstance(discussion_description_html.childNodes[0], xml.dom.minidom.CDATASection) diff --git a/tests/test_olx_generators/test_html.py b/tests/test_olx_generators/test_html.py new file mode 100644 index 00000000..ad9d4414 --- /dev/null +++ b/tests/test_olx_generators/test_html.py @@ -0,0 +1,65 @@ +import xml.dom.minidom +from unittest.mock import patch + +import lxml + +from cc2olx.dataclasses import OlxGeneratorContext +from cc2olx.iframe_link_parser import KalturaIframeLinkParser +from cc2olx.olx_generators import HtmlOlxGenerator + + +class TestHtmlOlxGenerator: + def test_process_html_for_iframe_provides_video_blocks(self, iframe_content, link_map_csv): + context = OlxGeneratorContext(iframe_link_parser=KalturaIframeLinkParser(link_map_csv), lti_consumer_ids=set()) + generator = HtmlOlxGenerator(context) + + _, video_olx = generator._process_html_for_iframe(iframe_content) + + assert len(video_olx) == 1 + assert video_olx[0].nodeName == "video" + + def test_process_html_for_iframe_removes_iframes_from_html(self, iframe_content, link_map_csv): + context = OlxGeneratorContext(iframe_link_parser=KalturaIframeLinkParser(link_map_csv), lti_consumer_ids=set()) + generator = HtmlOlxGenerator(context) + + html_str, _ = generator._process_html_for_iframe(iframe_content) + + html = lxml.html.fromstring(html_str) + iframe = html.xpath("//iframe") + assert len(iframe) == 0 + + def test_html_cleaning_from_cdata(self, cdata_containing_html, expected_cleaned_cdata_containing_html): + """ + Test that CDATA cleaning function is called during HTML processing. + + Args: + cdata_containing_html (str): HTML that contains CDATA tags. + expected_cleaned_cdata_containing_html (str): Expected HTML after + successful cleaning. + """ + context = OlxGeneratorContext(iframe_link_parser=None, lti_consumer_ids=set()) + generator = HtmlOlxGenerator(context) + content = {"html": cdata_containing_html} + + with patch( + "cc2olx.olx_generators.html.clean_from_cdata", + return_value=expected_cleaned_cdata_containing_html, + ) as clean_from_cdata_mock: + generator.create_nodes(content) + + clean_from_cdata_mock.assert_called_once() + + def test_processed_html_content_is_wrapped_into_cdata(self, cdata_containing_html): + """ + Test that processed HTML content is wrapped into CDATA section. + + Args: + cdata_containing_html (str): HTML that contains CDATA tags. + """ + context = OlxGeneratorContext(iframe_link_parser=None, lti_consumer_ids=set()) + generator = HtmlOlxGenerator(context) + content = {"html": cdata_containing_html} + + result_html, *__ = generator.create_nodes(content) + + assert isinstance(result_html.childNodes[0], xml.dom.minidom.CDATASection) diff --git a/tests/test_olx_generators/test_qti.py b/tests/test_olx_generators/test_qti.py new file mode 100644 index 00000000..0e563106 --- /dev/null +++ b/tests/test_olx_generators/test_qti.py @@ -0,0 +1,17 @@ +from unittest.mock import Mock + +import pytest + +from cc2olx.exceptions import QtiError +from cc2olx.olx_generators import QtiOlxGenerator + + +class TestQtiOlxGenerator: + @pytest.mark.parametrize("cc_profile", ["unknown_profile", "cc.chess.v0p1", "cc.drag_and_drop.v0p1", "123"]) + def test_create_nodes_raises_qti_error_if_cc_profile_is_unknown(self, cc_profile): + generator = QtiOlxGenerator(Mock()) + + with pytest.raises(QtiError) as exc_info: + generator.create_nodes([{"cc_profile": cc_profile}]) + + assert str(exc_info.value) == f'Unknown cc_profile: "{cc_profile}"' diff --git a/tests/test_olx_generators/test_video.py b/tests/test_olx_generators/test_video.py new file mode 100644 index 00000000..b82737f4 --- /dev/null +++ b/tests/test_olx_generators/test_video.py @@ -0,0 +1,14 @@ +from unittest.mock import Mock + +from cc2olx.olx_generators import VideoOlxGenerator + + +class TestVideoOlxGenerator: + def test_nodes_creation(self): + generator = VideoOlxGenerator(Mock()) + expected_video_xml = '