Skip to content

Commit

Permalink
Keep track of which files have already been parsed (#1786)
Browse files Browse the repository at this point in the history
This is to avoid re-parsing when the workflow refers to multiple
fragments in the same file.

* Depend on cwl-utils >= 0.22
* Add git to cwltool-docker
* bump minimum schema-salad version to 8.4+

Co-authored-by: Michael R. Crusoe <michael.crusoe@gmail.com>
  • Loading branch information
tetron and mr-c authored Jan 27, 2023
1 parent cb3160c commit 809f4cc
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 23 deletions.
1 change: 1 addition & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ exclude_lines =
pragma: no cover
raise NotImplementedError
if __name__ == .__main__.:
if TYPE_CHECKING:
ignore_errors = True
omit =
tests/*
2 changes: 1 addition & 1 deletion build-cwltool-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ docker run -t -v /var/run/docker.sock:/var/run/docker.sock \
-v /tmp:/tmp \
-v "$PWD":/tmp/cwltool \
quay.io/commonwl/cwltool_module /bin/sh -c \
"apk add gcc bash && pip install -r/tmp/cwltool/test-requirements.txt ; pytest -k 'not (test_bioconda or test_double_overwrite or test_env_filtering or test_biocontainers or test_disable_file_overwrite_without_ext or test_disable_file_creation_in_outdir_with_ext or test_write_write_conflict or test_directory_literal_with_real_inputs_inside or test_revsort_workflow or test_stdin_with_id_preset or test_no_compute_chcksum or test_packed_workflow_execution[tests/wf/count-lines1-wf.cwl-tests/wf/wc-job.json-False] or test_sequential_workflow or test_single_process_subwf_subwf_inline_step)' --ignore-glob '*test_udocker.py' -n 0 -v -rs --pyargs cwltool"
"apk add gcc bash git && pip install -r/tmp/cwltool/test-requirements.txt ; pytest -k 'not (test_bioconda or test_double_overwrite or test_env_filtering or test_biocontainers or test_disable_file_overwrite_without_ext or test_disable_file_creation_in_outdir_with_ext or test_write_write_conflict or test_directory_literal_with_real_inputs_inside or test_revsort_workflow or test_stdin_with_id_preset or test_no_compute_chcksum or test_packed_workflow_execution[tests/wf/count-lines1-wf.cwl-tests/wf/wc-job.json-False] or test_sequential_workflow or test_single_process_subwf_subwf_inline_step)' --ignore-glob '*test_udocker.py' -n 0 -v -rs --pyargs cwltool"
2 changes: 1 addition & 1 deletion conformance-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ fi
venv cwl-conformance-venv
pip install -U setuptools wheel pip
pip uninstall -y cwltool
pip install "${SCRIPT_DIRECTORY}"
pip install "${SCRIPT_DIRECTORY}" -r"${SCRIPT_DIRECTORY}/requirements.txt"
pip install cwltest>=2.3 pytest-cov pytest-xdist

# Set conformance test filename
Expand Down
6 changes: 3 additions & 3 deletions cwltool.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ RUN apk add --no-cache git gcc python3-dev libxml2-dev libxslt-dev libc-dev linu
WORKDIR /cwltool
COPY . .

RUN pip install toml -rmypy-requirements.txt
RUN pip install toml -rmypy-requirements.txt -rrequirements.txt
RUN CWLTOOL_USE_MYPYC=1 MYPYPATH=mypy-stubs pip wheel --no-binary schema-salad --wheel-dir=/wheels .[deps]
RUN rm /wheels/schema_salad*
RUN pip install black
Expand All @@ -15,13 +15,13 @@ RUN pip install --force-reinstall --no-index --no-warn-script-location --root=/p
# --force-reinstall to install our new mypyc compiled schema-salad package

FROM python:3.11-alpine as module
LABEL maintainer peter.amstutz@curri.com
LABEL maintainer peter.amstutz@curii.com

RUN apk add --no-cache docker nodejs graphviz libxml2 libxslt
COPY --from=builder /pythonroot/ /

FROM python:3.11-alpine
LABEL maintainer peter.amstutz@curri.com
LABEL maintainer peter.amstutz@curii.com

RUN apk add --no-cache docker nodejs graphviz libxml2 libxslt
COPY --from=builder /pythonroot/ /
Expand Down
44 changes: 30 additions & 14 deletions cwltool/load_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
ResolveType,
json_dumps,
)
from schema_salad.fetcher import Fetcher

from ruamel.yaml.comments import CommentedMap, CommentedSeq

Expand Down Expand Up @@ -318,8 +319,11 @@ def fast_parser(
fileuri: Optional[str],
uri: str,
loadingContext: LoadingContext,
fetcher: Fetcher,
) -> Tuple[Union[CommentedMap, CommentedSeq], CommentedMap]:
lopt = cwl_v1_2.LoadingOptions(idx=loadingContext.codegen_idx, fileuri=fileuri)
lopt = cwl_v1_2.LoadingOptions(
idx=loadingContext.codegen_idx, fileuri=fileuri, fetcher=fetcher
)

if uri not in loadingContext.codegen_idx:
cwl_v1_2.load_document_with_metadata(
Expand Down Expand Up @@ -359,18 +363,26 @@ def fast_parser(
# Need to match the document loader's index with the fast parser index
# Get the base URI (no fragments) for documents that use $graph
nofrag = urllib.parse.urldefrag(uri)[0]
objects, loadopt = loadingContext.codegen_idx[nofrag]
fileobj = cmap(
cast(
Union[int, float, str, Dict[str, Any], List[Any], None],
cwl_v1_2.save(objects, relative_uris=False),

flag = "fastparser-idx-from:" + nofrag
if not loadingContext.loader.idx.get(flag):
objects, loadopt = loadingContext.codegen_idx[nofrag]
fileobj = cmap(
cast(
Union[int, float, str, Dict[str, Any], List[Any], None],
cwl_v1_2.save(objects, relative_uris=False),
)
)
)
visit_class(
fileobj,
("CommandLineTool", "Workflow", "ExpressionTool"),
partial(update_index, loadingContext.loader),
)
visit_class(
fileobj,
("CommandLineTool", "Workflow", "ExpressionTool"),
partial(update_index, loadingContext.loader),
)
loadingContext.loader.idx[flag] = flag
for u in lopt.imports:
loadingContext.loader.idx["import:" + u] = "import:" + u
for u in lopt.includes:
loadingContext.loader.idx["include:" + u] = "include:" + u

return cast(
Union[CommentedMap, CommentedSeq],
Expand Down Expand Up @@ -519,7 +531,9 @@ def resolve_and_validate_document(
#
processobj, metadata = document_loader.resolve_ref(uri)
elif loadingContext.fast_parser:
processobj, metadata = fast_parser(workflowobj, fileuri, uri, loadingContext)
processobj, metadata = fast_parser(
workflowobj, fileuri, uri, loadingContext, document_loader.fetcher
)
else:
document_loader.resolve_all(workflowobj, fileuri)
processobj, metadata = document_loader.resolve_ref(uri)
Expand Down Expand Up @@ -594,7 +608,9 @@ def make_tool(
and isinstance(uri, str)
and not loadingContext.skip_resolve_all
):
resolveduri, metadata = fast_parser(None, None, uri, loadingContext)
resolveduri, metadata = fast_parser(
None, None, uri, loadingContext, loadingContext.loader.fetcher
)
else:
resolveduri, metadata = loadingContext.loader.resolve_ref(uri)

Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ruamel.yaml>=0.15,<0.17.22
rdflib>=4.2.2,<6.3
rdflib>= 4.2.2, < 6.0.0;python_version<='3.6'
shellescape>=3.4.1,<3.9
schema-salad>=8.2.20211104054942,<9
schema-salad>=8.4,<9
prov==1.5.1
bagit==1.8.1
mypy-extensions
Expand All @@ -15,4 +15,4 @@ pydot>=1.4.1
argcomplete>=1.12.0
pyparsing != 3.0.2 # breaks --print-dot (pydot) https://github.com/pyparsing/pyparsing/issues/319
pyparsing < 3;python_version<='3.6' # breaks --print-dot
cwl-utils>=0.19
cwl-utils>=0.22
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
"rdflib >= 4.2.2, < 6.3.0",
"rdflib >= 4.2.2, < 6.0.0;python_version<='3.6'",
"shellescape >= 3.4.1, < 3.9",
"schema-salad >= 8.2.20211104054942, < 9",
"schema-salad >= 8.4, < 9",
"mypy-extensions",
"psutil >= 5.6.6",
"prov == 1.5.1",
Expand All @@ -121,7 +121,7 @@
"pyparsing != 3.0.2", # breaks --print-dot (pydot) https://github.com/pyparsing/pyparsing/issues/319
"pyparsing < 3 ;python_version<='3.6'", # breaks --print-dot (pydot)
"argcomplete",
"cwl-utils >= 0.19",
"cwl-utils >= 0.22",
],
extras_require={
"deps": ["galaxy-tool-util >= 22.1.2, <23"],
Expand Down
18 changes: 18 additions & 0 deletions tests/test_load_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,21 @@ def test_load_graph_fragment_from_packed() -> None:

finally:
use_standard_schema("v1.0")


def test_import_tracked() -> None:
"""Test that $import and $include are tracked in the index."""

loadingContext = LoadingContext({"fast_parser": True})
tool = load_tool(get_data("tests/wf/811-12.cwl"), loadingContext)
path = "import:file://%s" % get_data("tests/wf/schemadef-type.yml")

assert tool.doc_loader is not None
assert path in tool.doc_loader.idx

loadingContext = LoadingContext({"fast_parser": False})
tool = load_tool(get_data("tests/wf/811.cwl"), loadingContext)
path = "import:file://%s" % get_data("tests/wf/schemadef-type.yml")

assert tool.doc_loader is not None
assert path in tool.doc_loader.idx
15 changes: 15 additions & 0 deletions tests/wf/811-12.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
cwlVersion: v1.2
class: Workflow

inputs:
- id: hello
type: Any
outputs: []

steps:
step:
id: step
run: schemadef-tool-12.cwl
in:
hello: hello
out: []
24 changes: 24 additions & 0 deletions tests/wf/schemadef-tool-12.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env cwl-runner
class: CommandLineTool
cwlVersion: v1.2
hints:
ResourceRequirement:
ramMin: 8

requirements:
- $import: schemadef-type.yml

inputs:
- id: hello
type: "schemadef-type.yml#HelloType"
inputBinding:
valueFrom: $(self.a)/$(self.b)

outputs:
- id: output
type: File
outputBinding:
glob: output.txt

stdout: output.txt
baseCommand: echo

0 comments on commit 809f4cc

Please sign in to comment.