Bugfix/sanitize workspaces (#451)

Update script adapters to use same path sanitizer as for workspace creation for script serialization. Resolves #450 Add hypothesis to enable property testing to stress the path sanitizer. Adds copy button to code listings in documentation
LLNL · Feb 13, 2025 · 0ebf2c7 · 0ebf2c7
1 parent 0802410
commit 0ebf2c7
Show file tree

Hide file tree

Showing 8 changed files with 208 additions and 10 deletions.
diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py
@@ -37,6 +37,7 @@
 from maestrowf.abstracts.enums import JobStatusCode, CancelCode
 from maestrowf.interfaces.script import CancellationRecord, SubmissionRecord, \
     FluxFactory
+from maestrowf.utils import make_safe_path
 
 LOGGER = logging.getLogger(__name__)
 status_re = re.compile(r"Job \d+ status: (.*)$")
@@ -338,7 +339,7 @@ def _write_script(self, ws_path, step):
         to_be_scheduled, cmd, restart = self.get_scheduler_command(step)
 
         fname = "{}.{}".format(step.name, self._extension)
-        script_path = os.path.join(ws_path, fname)
+        script_path = make_safe_path(ws_path, fname)
         with open(script_path, "w") as script:
             script.write(self.get_header(step))
             cmd = "\n\n{}\n".format(cmd)

diff --git a/maestrowf/interfaces/script/localscriptadapter.py b/maestrowf/interfaces/script/localscriptadapter.py
@@ -35,7 +35,7 @@
     CancelCode
 from maestrowf.interfaces.script import CancellationRecord, SubmissionRecord
 from maestrowf.abstracts.interfaces import ScriptAdapter
-from maestrowf.utils import start_process
+from maestrowf.utils import make_safe_path, start_process
 
 LOGGER = logging.getLogger(__name__)
 
@@ -80,7 +80,7 @@ def _write_script(self, ws_path, step):
         to_be_scheduled = False
 
         fname = "{}.sh".format(step.name)
-        script_path = os.path.join(ws_path, fname)
+        script_path = make_safe_path(ws_path, fname)
         with open(script_path, "w") as script:
             script.write("#!{0}\n\n{1}\n".format(self._exec, cmd))
 

diff --git a/maestrowf/interfaces/script/lsfscriptadapter.py b/maestrowf/interfaces/script/lsfscriptadapter.py
@@ -39,7 +39,7 @@
 from maestrowf.abstracts.enums import CancelCode, JobStatusCode, State, \
     SubmissionCode
 from maestrowf.interfaces.script import CancellationRecord, SubmissionRecord
-
+from maestrowf.utils import make_safe_path
 
 LOGGER = logging.getLogger(__name__)
 
@@ -463,7 +463,8 @@ def _write_script(self, ws_path, step):
         to_be_scheduled, cmd, restart = self.get_scheduler_command(step)
 
         fname = "{}.{}".format(step.name, self._extension)
-        script_path = os.path.join(ws_path, fname)
+        script_path = make_safe_path(ws_path, fname)
+
         with open(script_path, "w") as script:
             if to_be_scheduled:
                 script.write(self.get_header(step))

diff --git a/maestrowf/interfaces/script/slurmscriptadapter.py b/maestrowf/interfaces/script/slurmscriptadapter.py
@@ -37,7 +37,7 @@
 from maestrowf.abstracts.enums import JobStatusCode, State, SubmissionCode, \
     CancelCode
 from maestrowf.interfaces.script import CancellationRecord, SubmissionRecord
-from maestrowf.utils import start_process
+from maestrowf.utils import make_safe_path, start_process
 
 LOGGER = logging.getLogger(__name__)
 
@@ -556,7 +556,7 @@ def _write_script(self, ws_path, step):
         to_be_scheduled, cmd, restart = self.get_scheduler_command(step)
 
         fname = "{}.slurm.sh".format(step.name)
-        script_path = os.path.join(ws_path, fname)
+        script_path = make_safe_path(ws_path, fname)
 
         if to_be_scheduled:
             header = self.get_header(step)

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -62,6 +62,7 @@ theme:
     - navigation.indexes
     # - toc.integrate
     - content.code.annotate
+    - content.code.copy
   palette:
     - scheme: slate
       primary: black

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool]
 [tool.poetry]
 name = "maestrowf"
-version = "1.1.11dev2"
+version = "1.1.11dev3"
 description = "A tool to easily orchestrate general computational workflows both locally and on supercomputers."
 license = "MIT License"
 classifiers = [
@@ -55,6 +55,10 @@ pytest-cov = "*"
 pre-commit = "*"
 tox-travis = "*"
 tox-pyenv = "*"
+hypothesis = [
+   {version = "<6.114", python = "3.8"},
+   {version = ">=6.114", python = ">=3.9"}
+]
 
 [tool.poetry.group.docs.dependencies]
 mkdocs = { version = "<2.0", python = "^3.9" }

diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py
@@ -1,11 +1,16 @@
 from contextlib import nullcontext as does_not_raise
+from pathlib import Path
 
 import pytest
 from pytest import raises
 from rich.pretty import pprint
-from maestrowf.utils import parse_version
+
+from maestrowf.datastructures.core import ParameterGenerator
+from maestrowf.datastructures.core.parameters import Combination
+from maestrowf.utils import make_safe_path, parse_version
 from packaging.version import Version, InvalidVersion
 
+from hypothesis import given, HealthCheck, settings, strategies
 
 @pytest.mark.parametrize(
     "version_string, expected_version, error",
@@ -58,3 +63,112 @@ def test_version_greater(test_version_string, ref_version, expected, base_expect
 
     ver_cmp_base = test_version.base_version >= ref_version.base_version
     assert ver_cmp_base == base_expected
+
+
+@pytest.mark.parametrize(
+    "test_base_path, test_param_combos, expected_paths",
+    [
+        (
+            "hello_world",
+            {
+                "NAME": {
+                    "values": ["Pam", "Jim", "Michael", "Dwight"],
+                    "labels": "NAME.%%"
+                },
+                "GREETING": {
+                    "values": ["Hello", "Ciao", "Hey", "Hi"],
+                    "labels": "GREETING.%%"
+                },
+                "FAREWELL": {
+                    "values": ["Goodbye", "Farewell", "So long", "See you later"],
+                    "labels": "FAREWELL.%%"
+                }
+            },
+            ['hello_world/FAREWELL.Goodbye.GREETING.Hello.NAME.Pam',
+             'hello_world/FAREWELL.Farewell.GREETING.Ciao.NAME.Jim',
+             'hello_world/FAREWELL.So_long.GREETING.Hey.NAME.Michael',
+             'hello_world/FAREWELL.See_you_later.GREETING.Hi.NAME.Dwight']
+        ),
+        (
+            "foo_bar_kwargs",
+            {
+                "FOOBAR": {
+                    "values": ["--foobar 9 2 5 1", "--foobar 1 6 4 3"],
+                    "labels": "FOOBAR.%%"
+                },
+            },
+            ['foo_bar_kwargs/FOOBAR.--foobar_9_2_5_1',
+             'foo_bar_kwargs/FOOBAR.--foobar_1_6_4_3']
+        ),
+    ],
+)
+def test_param_combo_path_sanitizer(test_base_path, test_param_combos, expected_paths):
+    """
+    Test sanitization of combo strings for use in workspace paths.
+    """
+    params = ParameterGenerator()
+    for param_key, param_dict in test_param_combos.items():
+        if "name" not in param_dict:
+            params.add_parameter(param_key, param_dict['values'], param_dict['labels'])
+        else:
+            params.add_parameter(param_key, param_dict['values'], param_dict['labels'], param_dict['name'])
+
+    for idx, combo in enumerate(params):
+
+        combo_str = combo.get_param_string(list(test_param_combos.keys()))
+        print(f"{combo_str=}")        
+        test_path = make_safe_path(test_base_path, combo_str)
+        assert test_path == expected_paths[idx]
+
+
+
+@given(
+    strategies.text(
+        min_size=3,
+        max_size=20,
+        alphabet=strategies.characters(
+            codec='ascii', min_codepoint=32, max_codepoint=126
+        )
+    ),
+)
+@settings(max_examples=100, suppress_health_check=(HealthCheck.function_scoped_fixture,))
+def test_path_sanitizer(tmpdir, test_path_str):
+    """
+    Test sanitization of misc strings for use in workspace paths.
+    Similar to combo_path_sanitizer, but uses simpler string inputs for easier random testing
+
+    Notes
+    -----
+    Explicitly checking for empty paths for now to handle hypothesis' tendency to generate
+    strings that are all punctuation characters since those characters are simply removed from
+    from the safe paths...   Need better strategy for that kind of replacement long term
+    (force hashing?)
+    """
+    # print(f"{test_path_str=}")
+    # print(f"{tmpdir=}")         # switch with tmp_path when make_safe_path converted to pathlib
+    test_path_name = make_safe_path("", test_path_str)
+
+    test_path = Path(tmpdir) / test_path_name
+    # print(f"{test_path=}")
+    path_found = False
+    try:
+        test_path.touch()
+        for path in Path(tmpdir).iterdir():
+            if path.absolute() == test_path.absolute() and path.is_file():
+                path_found = True
+                break
+
+    except FileNotFoundError:
+        pprint(f"Creating '{test_path}' yielded FileNotFoundError")
+        path_found = False
+    except NotADirectoryError:
+        pprint(f"Creating '{test_path}' yielded NotADirectoryError")
+        path_found = False
+
+
+    if test_path_name:
+        assert path_found == True
+    else:
+        # Handle case of strings that only contain characters make_safe_path says are invalid,
+        # leading to empty strings.
+        assert test_path_name == ""