From 809f4cc7f2ffe2489933cfefe404d76efc92312a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 27 Jan 2023 07:19:39 -0500 Subject: [PATCH] Keep track of which files have already been parsed (#1786) This is to avoid re-parsing when the workflow refers to multiple fragments in the same file. * Depend on cwl-utils >= 0.22 * Add git to cwltool-docker * bump minimum schema-salad version to 8.4+ Co-authored-by: Michael R. Crusoe --- .coveragerc | 1 + build-cwltool-docker.sh | 2 +- conformance-test.sh | 2 +- cwltool.Dockerfile | 6 ++--- cwltool/load_tool.py | 44 +++++++++++++++++++++++----------- requirements.txt | 4 ++-- setup.py | 4 ++-- tests/test_load_tool.py | 18 ++++++++++++++ tests/wf/811-12.cwl | 15 ++++++++++++ tests/wf/schemadef-tool-12.cwl | 24 +++++++++++++++++++ 10 files changed, 97 insertions(+), 23 deletions(-) create mode 100644 tests/wf/811-12.cwl create mode 100644 tests/wf/schemadef-tool-12.cwl diff --git a/.coveragerc b/.coveragerc index 1c4b01767..0f0187862 100644 --- a/.coveragerc +++ b/.coveragerc @@ -9,6 +9,7 @@ exclude_lines = pragma: no cover raise NotImplementedError if __name__ == .__main__.: + if TYPE_CHECKING: ignore_errors = True omit = tests/* diff --git a/build-cwltool-docker.sh b/build-cwltool-docker.sh index d733b9919..97910069a 100755 --- a/build-cwltool-docker.sh +++ b/build-cwltool-docker.sh @@ -7,4 +7,4 @@ docker run -t -v /var/run/docker.sock:/var/run/docker.sock \ -v /tmp:/tmp \ -v "$PWD":/tmp/cwltool \ quay.io/commonwl/cwltool_module /bin/sh -c \ - "apk add gcc bash && pip install -r/tmp/cwltool/test-requirements.txt ; pytest -k 'not (test_bioconda or test_double_overwrite or test_env_filtering or test_biocontainers or test_disable_file_overwrite_without_ext or test_disable_file_creation_in_outdir_with_ext or test_write_write_conflict or test_directory_literal_with_real_inputs_inside or test_revsort_workflow or test_stdin_with_id_preset or test_no_compute_chcksum or test_packed_workflow_execution[tests/wf/count-lines1-wf.cwl-tests/wf/wc-job.json-False] or test_sequential_workflow or test_single_process_subwf_subwf_inline_step)' --ignore-glob '*test_udocker.py' -n 0 -v -rs --pyargs cwltool" + "apk add gcc bash git && pip install -r/tmp/cwltool/test-requirements.txt ; pytest -k 'not (test_bioconda or test_double_overwrite or test_env_filtering or test_biocontainers or test_disable_file_overwrite_without_ext or test_disable_file_creation_in_outdir_with_ext or test_write_write_conflict or test_directory_literal_with_real_inputs_inside or test_revsort_workflow or test_stdin_with_id_preset or test_no_compute_chcksum or test_packed_workflow_execution[tests/wf/count-lines1-wf.cwl-tests/wf/wc-job.json-False] or test_sequential_workflow or test_single_process_subwf_subwf_inline_step)' --ignore-glob '*test_udocker.py' -n 0 -v -rs --pyargs cwltool" diff --git a/conformance-test.sh b/conformance-test.sh index 3e773bc63..ffd3a45b5 100755 --- a/conformance-test.sh +++ b/conformance-test.sh @@ -73,7 +73,7 @@ fi venv cwl-conformance-venv pip install -U setuptools wheel pip pip uninstall -y cwltool -pip install "${SCRIPT_DIRECTORY}" +pip install "${SCRIPT_DIRECTORY}" -r"${SCRIPT_DIRECTORY}/requirements.txt" pip install cwltest>=2.3 pytest-cov pytest-xdist # Set conformance test filename diff --git a/cwltool.Dockerfile b/cwltool.Dockerfile index 17b41f8cb..ecfbd9e22 100644 --- a/cwltool.Dockerfile +++ b/cwltool.Dockerfile @@ -5,7 +5,7 @@ RUN apk add --no-cache git gcc python3-dev libxml2-dev libxslt-dev libc-dev linu WORKDIR /cwltool COPY . . -RUN pip install toml -rmypy-requirements.txt +RUN pip install toml -rmypy-requirements.txt -rrequirements.txt RUN CWLTOOL_USE_MYPYC=1 MYPYPATH=mypy-stubs pip wheel --no-binary schema-salad --wheel-dir=/wheels .[deps] RUN rm /wheels/schema_salad* RUN pip install black @@ -15,13 +15,13 @@ RUN pip install --force-reinstall --no-index --no-warn-script-location --root=/p # --force-reinstall to install our new mypyc compiled schema-salad package FROM python:3.11-alpine as module -LABEL maintainer peter.amstutz@curri.com +LABEL maintainer peter.amstutz@curii.com RUN apk add --no-cache docker nodejs graphviz libxml2 libxslt COPY --from=builder /pythonroot/ / FROM python:3.11-alpine -LABEL maintainer peter.amstutz@curri.com +LABEL maintainer peter.amstutz@curii.com RUN apk add --no-cache docker nodejs graphviz libxml2 libxslt COPY --from=builder /pythonroot/ / diff --git a/cwltool/load_tool.py b/cwltool/load_tool.py index 5e2a378ba..3e39ed76f 100644 --- a/cwltool/load_tool.py +++ b/cwltool/load_tool.py @@ -32,6 +32,7 @@ ResolveType, json_dumps, ) +from schema_salad.fetcher import Fetcher from ruamel.yaml.comments import CommentedMap, CommentedSeq @@ -318,8 +319,11 @@ def fast_parser( fileuri: Optional[str], uri: str, loadingContext: LoadingContext, + fetcher: Fetcher, ) -> Tuple[Union[CommentedMap, CommentedSeq], CommentedMap]: - lopt = cwl_v1_2.LoadingOptions(idx=loadingContext.codegen_idx, fileuri=fileuri) + lopt = cwl_v1_2.LoadingOptions( + idx=loadingContext.codegen_idx, fileuri=fileuri, fetcher=fetcher + ) if uri not in loadingContext.codegen_idx: cwl_v1_2.load_document_with_metadata( @@ -359,18 +363,26 @@ def fast_parser( # Need to match the document loader's index with the fast parser index # Get the base URI (no fragments) for documents that use $graph nofrag = urllib.parse.urldefrag(uri)[0] - objects, loadopt = loadingContext.codegen_idx[nofrag] - fileobj = cmap( - cast( - Union[int, float, str, Dict[str, Any], List[Any], None], - cwl_v1_2.save(objects, relative_uris=False), + + flag = "fastparser-idx-from:" + nofrag + if not loadingContext.loader.idx.get(flag): + objects, loadopt = loadingContext.codegen_idx[nofrag] + fileobj = cmap( + cast( + Union[int, float, str, Dict[str, Any], List[Any], None], + cwl_v1_2.save(objects, relative_uris=False), + ) ) - ) - visit_class( - fileobj, - ("CommandLineTool", "Workflow", "ExpressionTool"), - partial(update_index, loadingContext.loader), - ) + visit_class( + fileobj, + ("CommandLineTool", "Workflow", "ExpressionTool"), + partial(update_index, loadingContext.loader), + ) + loadingContext.loader.idx[flag] = flag + for u in lopt.imports: + loadingContext.loader.idx["import:" + u] = "import:" + u + for u in lopt.includes: + loadingContext.loader.idx["include:" + u] = "include:" + u return cast( Union[CommentedMap, CommentedSeq], @@ -519,7 +531,9 @@ def resolve_and_validate_document( # processobj, metadata = document_loader.resolve_ref(uri) elif loadingContext.fast_parser: - processobj, metadata = fast_parser(workflowobj, fileuri, uri, loadingContext) + processobj, metadata = fast_parser( + workflowobj, fileuri, uri, loadingContext, document_loader.fetcher + ) else: document_loader.resolve_all(workflowobj, fileuri) processobj, metadata = document_loader.resolve_ref(uri) @@ -594,7 +608,9 @@ def make_tool( and isinstance(uri, str) and not loadingContext.skip_resolve_all ): - resolveduri, metadata = fast_parser(None, None, uri, loadingContext) + resolveduri, metadata = fast_parser( + None, None, uri, loadingContext, loadingContext.loader.fetcher + ) else: resolveduri, metadata = loadingContext.loader.resolve_ref(uri) diff --git a/requirements.txt b/requirements.txt index f485c88f1..0cc0e3725 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ ruamel.yaml>=0.15,<0.17.22 rdflib>=4.2.2,<6.3 rdflib>= 4.2.2, < 6.0.0;python_version<='3.6' shellescape>=3.4.1,<3.9 -schema-salad>=8.2.20211104054942,<9 +schema-salad>=8.4,<9 prov==1.5.1 bagit==1.8.1 mypy-extensions @@ -15,4 +15,4 @@ pydot>=1.4.1 argcomplete>=1.12.0 pyparsing != 3.0.2 # breaks --print-dot (pydot) https://github.com/pyparsing/pyparsing/issues/319 pyparsing < 3;python_version<='3.6' # breaks --print-dot -cwl-utils>=0.19 +cwl-utils>=0.22 diff --git a/setup.py b/setup.py index 49fb9d68a..3ecbbe4e2 100644 --- a/setup.py +++ b/setup.py @@ -110,7 +110,7 @@ "rdflib >= 4.2.2, < 6.3.0", "rdflib >= 4.2.2, < 6.0.0;python_version<='3.6'", "shellescape >= 3.4.1, < 3.9", - "schema-salad >= 8.2.20211104054942, < 9", + "schema-salad >= 8.4, < 9", "mypy-extensions", "psutil >= 5.6.6", "prov == 1.5.1", @@ -121,7 +121,7 @@ "pyparsing != 3.0.2", # breaks --print-dot (pydot) https://github.com/pyparsing/pyparsing/issues/319 "pyparsing < 3 ;python_version<='3.6'", # breaks --print-dot (pydot) "argcomplete", - "cwl-utils >= 0.19", + "cwl-utils >= 0.22", ], extras_require={ "deps": ["galaxy-tool-util >= 22.1.2, <23"], diff --git a/tests/test_load_tool.py b/tests/test_load_tool.py index f087d3e0f..df8f1361a 100644 --- a/tests/test_load_tool.py +++ b/tests/test_load_tool.py @@ -130,3 +130,21 @@ def test_load_graph_fragment_from_packed() -> None: finally: use_standard_schema("v1.0") + + +def test_import_tracked() -> None: + """Test that $import and $include are tracked in the index.""" + + loadingContext = LoadingContext({"fast_parser": True}) + tool = load_tool(get_data("tests/wf/811-12.cwl"), loadingContext) + path = "import:file://%s" % get_data("tests/wf/schemadef-type.yml") + + assert tool.doc_loader is not None + assert path in tool.doc_loader.idx + + loadingContext = LoadingContext({"fast_parser": False}) + tool = load_tool(get_data("tests/wf/811.cwl"), loadingContext) + path = "import:file://%s" % get_data("tests/wf/schemadef-type.yml") + + assert tool.doc_loader is not None + assert path in tool.doc_loader.idx diff --git a/tests/wf/811-12.cwl b/tests/wf/811-12.cwl new file mode 100644 index 000000000..f4403f45b --- /dev/null +++ b/tests/wf/811-12.cwl @@ -0,0 +1,15 @@ +cwlVersion: v1.2 +class: Workflow + +inputs: + - id: hello + type: Any +outputs: [] + +steps: + step: + id: step + run: schemadef-tool-12.cwl + in: + hello: hello + out: [] diff --git a/tests/wf/schemadef-tool-12.cwl b/tests/wf/schemadef-tool-12.cwl new file mode 100644 index 000000000..5c3433e7f --- /dev/null +++ b/tests/wf/schemadef-tool-12.cwl @@ -0,0 +1,24 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.2 +hints: + ResourceRequirement: + ramMin: 8 + +requirements: + - $import: schemadef-type.yml + +inputs: + - id: hello + type: "schemadef-type.yml#HelloType" + inputBinding: + valueFrom: $(self.a)/$(self.b) + +outputs: + - id: output + type: File + outputBinding: + glob: output.txt + +stdout: output.txt +baseCommand: echo