Skip to content

Commit

Permalink
Bugfix/sanitize workspaces (#451)
Browse files Browse the repository at this point in the history
Update script adapters to use same path sanitizer as for workspace creation for script serialization. Resolves #450

Add hypothesis to enable property testing to stress the path sanitizer.

Adds copy button to code listings in documentation
  • Loading branch information
jwhite242 authored Feb 13, 2025
1 parent 0802410 commit 0ebf2c7
Show file tree
Hide file tree
Showing 8 changed files with 208 additions and 10 deletions.
3 changes: 2 additions & 1 deletion maestrowf/interfaces/script/fluxscriptadapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from maestrowf.abstracts.enums import JobStatusCode, CancelCode
from maestrowf.interfaces.script import CancellationRecord, SubmissionRecord, \
FluxFactory
from maestrowf.utils import make_safe_path

LOGGER = logging.getLogger(__name__)
status_re = re.compile(r"Job \d+ status: (.*)$")
Expand Down Expand Up @@ -338,7 +339,7 @@ def _write_script(self, ws_path, step):
to_be_scheduled, cmd, restart = self.get_scheduler_command(step)

fname = "{}.{}".format(step.name, self._extension)
script_path = os.path.join(ws_path, fname)
script_path = make_safe_path(ws_path, fname)
with open(script_path, "w") as script:
script.write(self.get_header(step))
cmd = "\n\n{}\n".format(cmd)
Expand Down
4 changes: 2 additions & 2 deletions maestrowf/interfaces/script/localscriptadapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
CancelCode
from maestrowf.interfaces.script import CancellationRecord, SubmissionRecord
from maestrowf.abstracts.interfaces import ScriptAdapter
from maestrowf.utils import start_process
from maestrowf.utils import make_safe_path, start_process

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -80,7 +80,7 @@ def _write_script(self, ws_path, step):
to_be_scheduled = False

fname = "{}.sh".format(step.name)
script_path = os.path.join(ws_path, fname)
script_path = make_safe_path(ws_path, fname)
with open(script_path, "w") as script:
script.write("#!{0}\n\n{1}\n".format(self._exec, cmd))

Expand Down
5 changes: 3 additions & 2 deletions maestrowf/interfaces/script/lsfscriptadapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from maestrowf.abstracts.enums import CancelCode, JobStatusCode, State, \
SubmissionCode
from maestrowf.interfaces.script import CancellationRecord, SubmissionRecord

from maestrowf.utils import make_safe_path

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -463,7 +463,8 @@ def _write_script(self, ws_path, step):
to_be_scheduled, cmd, restart = self.get_scheduler_command(step)

fname = "{}.{}".format(step.name, self._extension)
script_path = os.path.join(ws_path, fname)
script_path = make_safe_path(ws_path, fname)

with open(script_path, "w") as script:
if to_be_scheduled:
script.write(self.get_header(step))
Expand Down
4 changes: 2 additions & 2 deletions maestrowf/interfaces/script/slurmscriptadapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from maestrowf.abstracts.enums import JobStatusCode, State, SubmissionCode, \
CancelCode
from maestrowf.interfaces.script import CancellationRecord, SubmissionRecord
from maestrowf.utils import start_process
from maestrowf.utils import make_safe_path, start_process

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -556,7 +556,7 @@ def _write_script(self, ws_path, step):
to_be_scheduled, cmd, restart = self.get_scheduler_command(step)

fname = "{}.slurm.sh".format(step.name)
script_path = os.path.join(ws_path, fname)
script_path = make_safe_path(ws_path, fname)

if to_be_scheduled:
header = self.get_header(step)
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ theme:
- navigation.indexes
# - toc.integrate
- content.code.annotate
- content.code.copy
palette:
- scheme: slate
primary: black
Expand Down
79 changes: 78 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool]
[tool.poetry]
name = "maestrowf"
version = "1.1.11dev2"
version = "1.1.11dev3"
description = "A tool to easily orchestrate general computational workflows both locally and on supercomputers."
license = "MIT License"
classifiers = [
Expand Down Expand Up @@ -55,6 +55,10 @@ pytest-cov = "*"
pre-commit = "*"
tox-travis = "*"
tox-pyenv = "*"
hypothesis = [
{version = "<6.114", python = "3.8"},
{version = ">=6.114", python = ">=3.9"}
]

[tool.poetry.group.docs.dependencies]
mkdocs = { version = "<2.0", python = "^3.9" }
Expand Down
116 changes: 115 additions & 1 deletion tests/utils/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from contextlib import nullcontext as does_not_raise
from pathlib import Path

import pytest
from pytest import raises
from rich.pretty import pprint
from maestrowf.utils import parse_version

from maestrowf.datastructures.core import ParameterGenerator
from maestrowf.datastructures.core.parameters import Combination
from maestrowf.utils import make_safe_path, parse_version
from packaging.version import Version, InvalidVersion

from hypothesis import given, HealthCheck, settings, strategies

@pytest.mark.parametrize(
"version_string, expected_version, error",
Expand Down Expand Up @@ -58,3 +63,112 @@ def test_version_greater(test_version_string, ref_version, expected, base_expect

ver_cmp_base = test_version.base_version >= ref_version.base_version
assert ver_cmp_base == base_expected


@pytest.mark.parametrize(
"test_base_path, test_param_combos, expected_paths",
[
(
"hello_world",
{
"NAME": {
"values": ["Pam", "Jim", "Michael", "Dwight"],
"labels": "NAME.%%"
},
"GREETING": {
"values": ["Hello", "Ciao", "Hey", "Hi"],
"labels": "GREETING.%%"
},
"FAREWELL": {
"values": ["Goodbye", "Farewell", "So long", "See you later"],
"labels": "FAREWELL.%%"
}
},
['hello_world/FAREWELL.Goodbye.GREETING.Hello.NAME.Pam',
'hello_world/FAREWELL.Farewell.GREETING.Ciao.NAME.Jim',
'hello_world/FAREWELL.So_long.GREETING.Hey.NAME.Michael',
'hello_world/FAREWELL.See_you_later.GREETING.Hi.NAME.Dwight']
),
(
"foo_bar_kwargs",
{
"FOOBAR": {
"values": ["--foobar 9 2 5 1", "--foobar 1 6 4 3"],
"labels": "FOOBAR.%%"
},
},
['foo_bar_kwargs/FOOBAR.--foobar_9_2_5_1',
'foo_bar_kwargs/FOOBAR.--foobar_1_6_4_3']
),
],
)
def test_param_combo_path_sanitizer(test_base_path, test_param_combos, expected_paths):
"""
Test sanitization of combo strings for use in workspace paths.
"""
params = ParameterGenerator()
for param_key, param_dict in test_param_combos.items():
if "name" not in param_dict:
params.add_parameter(param_key, param_dict['values'], param_dict['labels'])
else:
params.add_parameter(param_key, param_dict['values'], param_dict['labels'], param_dict['name'])

for idx, combo in enumerate(params):

combo_str = combo.get_param_string(list(test_param_combos.keys()))
print(f"{combo_str=}")
test_path = make_safe_path(test_base_path, combo_str)
assert test_path == expected_paths[idx]



@given(
strategies.text(
min_size=3,
max_size=20,
alphabet=strategies.characters(
codec='ascii', min_codepoint=32, max_codepoint=126
)
),
)
@settings(max_examples=100, suppress_health_check=(HealthCheck.function_scoped_fixture,))
def test_path_sanitizer(tmpdir, test_path_str):
"""
Test sanitization of misc strings for use in workspace paths.
Similar to combo_path_sanitizer, but uses simpler string inputs for easier random testing
Notes
-----
Explicitly checking for empty paths for now to handle hypothesis' tendency to generate
strings that are all punctuation characters since those characters are simply removed from
from the safe paths... Need better strategy for that kind of replacement long term
(force hashing?)
"""
# print(f"{test_path_str=}")
# print(f"{tmpdir=}") # switch with tmp_path when make_safe_path converted to pathlib
test_path_name = make_safe_path("", test_path_str)

test_path = Path(tmpdir) / test_path_name
# print(f"{test_path=}")
path_found = False
try:
test_path.touch()
for path in Path(tmpdir).iterdir():
if path.absolute() == test_path.absolute() and path.is_file():
path_found = True
break

except FileNotFoundError:
pprint(f"Creating '{test_path}' yielded FileNotFoundError")
path_found = False
except NotADirectoryError:
pprint(f"Creating '{test_path}' yielded NotADirectoryError")
path_found = False


if test_path_name:
assert path_found == True
else:
# Handle case of strings that only contain characters make_safe_path says are invalid,
# leading to empty strings.
assert test_path_name == ""

0 comments on commit 0ebf2c7

Please sign in to comment.