Skip to content

Commit

Permalink
test: [FC-0063] PDF blocks processing is tested
Browse files Browse the repository at this point in the history
  • Loading branch information
myhailo-chernyshov-rg committed Jan 17, 2025
1 parent 462d4bb commit 13c3bef
Show file tree
Hide file tree
Showing 13 changed files with 318 additions and 30 deletions.
28 changes: 24 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@
import os
import shutil
import zipfile

from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import List
from xml.dom.minidom import parse

import pytest

from cc2olx.cli import parse_args
from cc2olx.models import Cartridge
from cc2olx.parser import parse_options
from .utils import build_multi_value_args


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -84,13 +85,32 @@ def relative_links_source() -> str:
return "https://relative.source.domain"


@pytest.fixture(scope="session")
def content_types_with_custom_blocks() -> List[str]:
"""
Provide content types with custom blocks.
"""
return ["pdf"]


@pytest.fixture
def options(imscc_file, link_map_csv, relative_links_source):
def options(imscc_file, link_map_csv, relative_links_source, content_types_with_custom_blocks):
"""
Basic options fixture.
"""

args = parse_args(["-i", str(imscc_file), "-f", str(link_map_csv), "-s", relative_links_source])
content_types_with_custom_blocks_args = build_multi_value_args("-c", content_types_with_custom_blocks)

args = parse_args(
[
"-i",
str(imscc_file),
"-f",
str(link_map_csv),
"-s",
relative_links_source,
*content_types_with_custom_blocks_args,
]
)

options = parse_options(args)

Expand Down
18 changes: 15 additions & 3 deletions tests/fixtures_data/imscc_file/imsmanifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,18 @@
<item identifier="video" identifierref="resource_9_video">
<title>Video With Other Content</title>
</item>
<item identifier="pdf_outside_resource" identifierref="pdf_dependency">
<item identifier="pdf_web_resource" identifierref="resource_pdf_1">
<title>PDF from Web Resources</title>
</item>
<item identifier="pdf_outside_resource" identifierref="resource_pdf_2">
<title>PDF Outside of Web Resources</title>
</item>
<item identifier="web_link_content" identifierref="resource_8_web_link_content">
<title>Web Link Content</title>
</item>
<item identifier="web_link_to_pdf" identifierref="resource_web_link_to_pdf">
<title>Web Link to PDF file</title>
</item>
</item>
<item identifier="sequence2">
<title>Sequence2</title>
Expand Down Expand Up @@ -155,11 +161,17 @@
<resource identifier="resource_7_canvas_content" type="webcontent" href="canvas_content/canvas_content.html">
<file href="canvas_content/canvas_content.html"/>
</resource>
<resource identifier="pdf_dependency" type="webcontent">
<resource identifier="resource_pdf_1" type="webcontent">
<file href="web_resources/PEP_8.pdf" />
</resource>
<resource identifier="resource_pdf_2" type="webcontent">
<file href="extra_files/example.pdf" />
</resource>
<resource identifier="resource_8_web_link_content" type="imswl_xmlv1p3">
<file href="web_link_content.xml"/>
<file href="weblinks/web_link_content.xml"/>
</resource>
<resource identifier="resource_web_link_to_pdf" type="imswl_xmlv1p3">
<file href="weblinks/web_link_to_pdf.xml"/>
</resource>
<resource identifier="resource_external_lti_tool" type="imsbasiclti_xmlv1p0">
<file href="resource_external_lti_tool.xml"/>
Expand Down
Binary file not shown.
5 changes: 5 additions & 0 deletions tests/fixtures_data/imscc_file/weblinks/web_link_to_pdf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<webLink xmlns="http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.imsglobal.org/xsd/imsccv1p3/imswl_v1p3 http://www.imsglobal.org/profile/cc/ccv1p3/ccv1p3_imswl_v1p3.xsd">
<title>PEP 312 – Simple Implicit Lambda</title>
<url href="https://pdf.storage.com/python/proposals/PEP_312.pdf"/>
</webLink>
18 changes: 7 additions & 11 deletions tests/fixtures_data/studio_course_xml/course.xml
Original file line number Diff line number Diff line change
Expand Up @@ -236,22 +236,18 @@
</html>]]></html>
<video edx_video_id="42d2a5e2-bced-45d6-b8dc-2f5901c9fdd0" display_name="Video With Other Content" url_name="resource_9_video"/>
</vertical>
<vertical display_name="PDF from Web Resources" url_name="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
<pdf display_name="PDF from Web Resources" url="/static/PEP_8.pdf" url_name="resource_pdf_1"/>
</vertical>
<vertical display_name="PDF Outside of Web Resources" url_name="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
<html display_name="PDF Outside of Web Resources" url_name="pdf_dependency"><![CDATA[<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
</head>
<body>
<p>
<a href="/static/extra_files/example.pdf" alt="extra_files/example.pdf">extra_files/example.pdf<a>
</p>
</body>
</html>
]]></html>
<pdf display_name="PDF Outside of Web Resources" url="/static/extra_files/example.pdf" url_name="resource_pdf_2"/>
</vertical>
<vertical display_name="Web Link Content" url_name="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
<html display_name="Web Link Content" url_name="resource_8_web_link_content"><![CDATA[<a href="https://relative.source.domain/web-link">Web Link Content</a>]]></html>
</vertical>
<vertical display_name="Web Link to PDF file" url_name="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
<pdf display_name="Web Link to PDF file" url="https://pdf.storage.com/python/proposals/PEP_312.pdf" url_name="resource_web_link_to_pdf"/>
</vertical>
</sequential>
</chapter>
<chapter display_name="Sequence2" url_name="sequence2">
Expand Down
41 changes: 41 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from cc2olx.cli import parse_args
from .utils import build_multi_value_args


def test_parse_args(imscc_file):
Expand All @@ -26,6 +27,7 @@ def test_parse_args(imscc_file):
passport_file=None,
output="output",
relative_links_source=None,
content_types_with_custom_blocks=[],
)


Expand All @@ -44,6 +46,7 @@ def test_parse_args_csv_file(imscc_file, link_map_csv):
passport_file=None,
output="output",
relative_links_source=None,
content_types_with_custom_blocks=[],
)


Expand All @@ -60,6 +63,7 @@ def test_parse_args_passport_file(imscc_file, passports_csv):
passport_file=passports_csv,
output="output",
relative_links_source=None,
content_types_with_custom_blocks=[],
)


Expand All @@ -79,6 +83,7 @@ def test_parse_args_with_correct_relative_links_source(imscc_file: Path) -> None
passport_file=None,
output="output",
relative_links_source=relative_links_source,
content_types_with_custom_blocks=[],
)


Expand All @@ -90,3 +95,39 @@ def test_parse_args_with_incorrect_relative_links_source(imscc_file: Path) -> No

with pytest.raises(SystemExit):
parse_args(["-i", str(imscc_file), "-s", relative_links_source])


def test_parse_args_with_correct_content_types_with_custom_blocks(imscc_file: Path) -> None:
"""
Positive input test for content types with custom blocks argument.
"""
content_types_with_custom_blocks = ["pdf"]
content_types_with_custom_blocks_args = build_multi_value_args("-c", content_types_with_custom_blocks)

parsed_args = parse_args(["-i", str(imscc_file), *content_types_with_custom_blocks_args])

assert parsed_args == Namespace(
inputs=[imscc_file],
loglevel="INFO",
result="folder",
link_file=None,
passport_file=None,
output="output",
relative_links_source=None,
content_types_with_custom_blocks=content_types_with_custom_blocks,
)


@pytest.mark.parametrize(
"content_type_with_custom_block",
["word_document", "poll", "survey", "feedback", "image", "audio", "llm"],
)
def test_parse_args_with_incorrect_content_types_with_custom_blocks(
imscc_file: Path,
content_type_with_custom_block: str,
) -> None:
"""
Test arguments parser detects incorrect content types with custom blocks.
"""
with pytest.raises(SystemExit):
parse_args(["-i", str(imscc_file), "-c", content_type_with_custom_block])
163 changes: 163 additions & 0 deletions tests/test_content_parsers/test_pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from unittest.mock import Mock, patch

import pytest

from cc2olx.content_parsers import PdfContentParser


class TestPdfContentParser:
def test_parse_content_returns_none_if_idref_is_none(self):
parser = PdfContentParser(Mock(), Mock())

assert parser._parse_content(None) is None

def test_parse_content_returns_none_if_pdf_content_type_with_custom_block_is_not_used(self):
parser = PdfContentParser(Mock(), Mock())
parser._context = Mock(is_content_type_with_custom_block_used=Mock(return_value=False))

assert parser._parse_content(Mock()) is None

def test_parse_content_returns_none_if_resource_is_not_found(self):
parser = PdfContentParser(Mock(), Mock())
parser._context = Mock(is_content_type_with_custom_block_used=Mock(return_value=True))
parser._cartridge = Mock(define_resource=Mock(return_value=None))

assert parser._parse_content(Mock()) is None

def test_parse_content_parses_webcontent(self):
parser = PdfContentParser(Mock(), Mock())
resource = {"type": "webcontent"}
parse_webcontent_mock = Mock()
parser._context = Mock(is_content_type_with_custom_block_used=Mock(return_value=True))
parser._cartridge = Mock(define_resource=Mock(return_value=resource))
parser._parse_webcontent = parse_webcontent_mock

parsed_content = parser._parse_content(Mock())

parse_webcontent_mock.assert_called_once_with(resource)
assert parsed_content == parse_webcontent_mock.return_value

def test_parse_webcontent_transforms_web_link_content_to_pdf(self):
parser = PdfContentParser(Mock(), Mock())
web_link_content_mock = Mock()
transform_web_link_content_to_pdf_mock = Mock()
parser._context = Mock(is_content_type_with_custom_block_used=Mock(return_value=True))
parser._cartridge = Mock(define_resource=Mock(return_value={"type": "imswl_xmlv1p3"}))
parser._parse_web_link_content = Mock(return_value=web_link_content_mock)
parser._transform_web_link_content_to_pdf = transform_web_link_content_to_pdf_mock

parsed_content = parser._parse_content(Mock())

transform_web_link_content_to_pdf_mock.assert_called_once_with(web_link_content_mock)
assert parsed_content == transform_web_link_content_to_pdf_mock.return_value

@pytest.mark.parametrize(
"file_suffix",
[".docx", ".mp3", ".mp4", ".png", ".jpeg", ".ods", ".csv", ".xls", ".pptx", ".txt"],
)
def test_parse_webcontent_returns_none_if_resource_file_is_not_pdf(self, file_suffix):
cartridge_mock = Mock()
parser = PdfContentParser(Mock(), Mock())
parser._cartridge = cartridge_mock
web_content_file_mock = Mock()
resource = {"children": [web_content_file_mock]}

with patch(
"cc2olx.content_parsers.pdf.WebContent",
return_value=Mock(resource_file_path=Mock(suffix=file_suffix)),
) as web_content_mock:
parsed_webcontent = parser._parse_webcontent(resource)

web_content_mock.assert_called_once_with(cartridge_mock, web_content_file_mock)
assert parsed_webcontent is None

@patch(
"cc2olx.content_parsers.pdf.WebContent",
return_value=Mock(resource_file_path=Mock(suffix=".pdf"), is_from_web_resources_dir=Mock(return_value=True)),
)
def test_parse_webcontent_parses_pdf_from_web_resources_dir(self, web_content_mock):
parser = PdfContentParser(Mock(), Mock())
cartridge_mock = Mock()
parse_pdf_webcontent_from_web_resources_dir_mock = Mock()
parser._cartridge = cartridge_mock
parser._parse_pdf_webcontent_from_web_resources_dir = parse_pdf_webcontent_from_web_resources_dir_mock
web_content_file_mock = Mock()
resource = {"children": [web_content_file_mock]}

parsed_webcontent = parser._parse_webcontent(resource)

web_content_mock.assert_called_once_with(cartridge_mock, web_content_file_mock)
parse_pdf_webcontent_from_web_resources_dir_mock.assert_called_once_with(web_content_mock.return_value)
assert parsed_webcontent == parse_pdf_webcontent_from_web_resources_dir_mock.return_value

@patch(
"cc2olx.content_parsers.pdf.WebContent",
return_value=Mock(resource_file_path=Mock(suffix=".pdf"), is_from_web_resources_dir=Mock(return_value=False)),
)
def test_parse_webcontent_parses_pdf_outside_web_resources_dir(self, web_content_mock):
parser = PdfContentParser(Mock(), Mock())
cartridge_mock = Mock()
parse_pdf_webcontent_outside_web_resources_dir_mock = Mock()
parser._cartridge = cartridge_mock
parser._parse_pdf_webcontent_outside_web_resources_dir = parse_pdf_webcontent_outside_web_resources_dir_mock
web_content_file_mock = Mock()
resource = {"children": [web_content_file_mock]}

parsed_webcontent = parser._parse_webcontent(resource)

web_content_mock.assert_called_once_with(cartridge_mock, web_content_file_mock)
parse_pdf_webcontent_outside_web_resources_dir_mock.assert_called_once_with(web_content_mock.return_value)
assert parsed_webcontent == parse_pdf_webcontent_outside_web_resources_dir_mock.return_value

def test_pdf_webcontent_from_web_resources_dir_parsing(self):
web_content_mock = Mock()
cartridge_mock = Mock()
parser = PdfContentParser(Mock(), Mock())
parser._cartridge = cartridge_mock
expected_content = {"url": web_content_mock.olx_static_path}

actual_content = parser._parse_pdf_webcontent_from_web_resources_dir(web_content_mock)

cartridge_mock.olx_to_original_static_file_paths.add_web_resource_path.assert_called_once_with(
web_content_mock.olx_static_path,
web_content_mock.resource_file_path,
)
assert actual_content == expected_content

def test_pdf_webcontent_outside_web_resources_dir_parsing(self):
web_content_mock = Mock()
cartridge_mock = Mock()
parser = PdfContentParser(Mock(), Mock())
parser._cartridge = cartridge_mock
expected_content = {"url": web_content_mock.olx_static_path}

actual_content = parser._parse_pdf_webcontent_outside_web_resources_dir(web_content_mock)

cartridge_mock.olx_to_original_static_file_paths.add_extra_path.assert_called_once_with(
web_content_mock.olx_static_path,
web_content_mock.resource_relative_path,
)
assert actual_content == expected_content

@pytest.mark.parametrize(
"web_link_url",
["https://example.com/html_content.html", "http://example.com/video.mp4", "/path/to/audio.wav"],
)
def test_transform_web_link_content_to_pdf_returns_none_if_web_link_does_not_point_to_pdf_file(self, web_link_url):
parser = PdfContentParser(Mock(), Mock())
web_link_content = {"href": web_link_url}

assert parser._transform_web_link_content_to_pdf(web_link_content) is None

@pytest.mark.parametrize(
"web_link_url",
["https://example.com/PEP_8.pdf", "http://example.com/imscc_profilev1p2-Overview.pdf", "/static/example.pdf"],
)
def test_transform_web_link_content_to_pdf_when_web_link_points_to_pdf_file(self, web_link_url):
parser = PdfContentParser(Mock(), Mock())
web_link_content = {"href": web_link_url}
expected_content = {"url": web_link_url}

actual_content = parser._transform_web_link_content_to_pdf(web_link_content)

assert actual_content == expected_content
3 changes: 2 additions & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@ def test_convert_one_file(options, imscc_file, studio_course_xml):
Tests, that ``convert_one_file`` call for ``imscc`` file results in
tar.gz archive with olx course.
"""
expected_tgz_members_num = 7
expected_tgz_members_num = 8

convert_one_file(
imscc_file,
options["workspace"],
options["link_file"],
relative_links_source=options["relative_links_source"],
content_types_with_custom_blocks=options["content_types_with_custom_blocks"],
)

tgz_path = str((imscc_file.parent / "output" / imscc_file.stem).with_suffix(".tar.gz"))
Expand Down
Loading

0 comments on commit 13c3bef

Please sign in to comment.