From bec4218b64c19f1bff2732c57b09f13ae9d3dd88 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 19 Apr 2024 15:15:28 +0200 Subject: [PATCH 1/8] Update minimum python version. --- .copier-answers.yml | 4 ++-- .github/workflows/python-version-ci | 2 +- pyproject.toml | 6 +----- requirements/dev.in | 3 --- tox.ini | 2 +- 5 files changed, 5 insertions(+), 12 deletions(-) diff --git a/.copier-answers.yml b/.copier-answers.yml index 431e78b..a2d9e9f 100644 --- a/.copier-answers.yml +++ b/.copier-answers.yml @@ -1,10 +1,10 @@ # Changes here will be overwritten by Copier; NEVER EDIT MANUALLY -_commit: ec8da94 +_commit: d91bf92 _src_path: gh:scipp/copier_template description: A daemon that creates a raw dataset using scicat interface whenever a new file is written by a file-writer. max_python: '3.12' -min_python: '3.9' +min_python: '3.12' namespace_package: '' nightly_deps: '' orgname: ScicatProject diff --git a/.github/workflows/python-version-ci b/.github/workflows/python-version-ci index bd28b9c..e4fba21 100644 --- a/.github/workflows/python-version-ci +++ b/.github/workflows/python-version-ci @@ -1 +1 @@ -3.9 +3.12 diff --git a/pyproject.toml b/pyproject.toml index 39063d4..9478972 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,14 +18,11 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering", "Typing :: Typed", ] -requires-python = ">=3.9" +requires-python = ">=3.12" # IMPORTANT: # Run 'tox -e deps' after making changes here. This will update requirement files. @@ -82,5 +79,4 @@ enable_error_code = [ "redundant-expr", "truthy-bool", ] -show_error_codes = true warn_unreachable = true diff --git a/requirements/dev.in b/requirements/dev.in index 8e2cb06..9efd9d0 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -6,6 +6,3 @@ copier pip-compile-multi pre-commit - -# See https://github.com/copier-org/copier/issues/1568 -pyyaml-include<2 diff --git a/tox.ini b/tox.ini index 6f5eab7..731c297 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py39 +envlist = py312 isolated_build = true [testenv] From 0c1b24b3d96e5340abe9e6b1deb7782d7be81b0e Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 19 Apr 2024 15:22:44 +0200 Subject: [PATCH 2/8] Add minimum test. --- tests/minimum_test.py | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/minimum_test.py diff --git a/tests/minimum_test.py b/tests/minimum_test.py new file mode 100644 index 0000000..790739b --- /dev/null +++ b/tests/minimum_test.py @@ -0,0 +1,2 @@ +def test_package() -> None: + import scicat_filewriter_ingest # noqa: F401 From 7dd4c014a4892880f748dec6d600a777f660276a Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 5 Apr 2024 12:57:37 +0200 Subject: [PATCH 3/8] Update command name and configuration instruction. --- README.md | 11 +++++ pyproject.toml | 2 +- resources/config.sample.json | 42 +++++++++++++++++++ ...ilewriter_ingest.py => scicat_ingestor.py} | 0 4 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 resources/config.sample.json rename src/{scicat_filewriter_ingest.py => scicat_ingestor.py} (100%) diff --git a/README.md b/README.md index 8c43ec0..e946be1 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,14 @@ ## About A daemon that creates a raw dataset using scicat interface whenever a new file is written by a file-writer. + +## Configuration + +You can use a json file to configure options. +There is a template, ``resources/config.sample.json`` you can copy/paste. + +```bash +cp resources/config.sample.json config.20240405.json +``` + +Then ``scicat_ingestor`` will automatically use the configuration file. diff --git a/pyproject.toml b/pyproject.toml index 9478972..acc4d5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dynamic = ["version"] "Source" = "https://github.com/ScicatProject/scicat-filewriter-ingest" [project.scripts] -scicat-filewriter-ingest = "scicat_filewriter_ingest:main" +scicat_ingestor = "scicat_ingestor:main" [tool.setuptools_scm] diff --git a/resources/config.sample.json b/resources/config.sample.json new file mode 100644 index 0000000..57f2aae --- /dev/null +++ b/resources/config.sample.json @@ -0,0 +1,42 @@ +{ + "kafka": { + "topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"], + "group_id": "GROUP_ID", + "bootstrap_servers": [ + "HOST:9092" + ], + "enable_auto_commit": true, + "auto_offset_reset": "earliest" + }, + "user_office": { + "host": "https://useroffice.host", + "username": "USERNAME", + "password": "PASSWORD" + }, + "scicat": { + "host": "https://scicat.host", + "username": "USERNAME", + "password": "PASSWORD" + }, + "dataset": { + "instrument_id" : "", + "instrument" : "INSTRUMENT_NAME", + "default_proposal_id" : "714781", + "ownable" : { + "ownerGroup": "ess", + "accessGroups": ["ymir","swap"] + } + }, + "options": { + "config_file" : "config.json", + "verbose" : false, + "file_log" : false, + "file_log_base_name" : ".scicat_ingestor_log", + "file_log_timestamp" : false, + "debug_level" : "INFO", + "system_log" : false, + "system_log_facility" : "mail", + "log_prefix" : " SFI: ", + "check_by_job_id" : true + } +} diff --git a/src/scicat_filewriter_ingest.py b/src/scicat_ingestor.py similarity index 100% rename from src/scicat_filewriter_ingest.py rename to src/scicat_ingestor.py From fd2d60b02471c621c4c548a84a1662c8a895d363 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 5 Apr 2024 14:15:11 +0200 Subject: [PATCH 4/8] Add configuration parsing methods and tests, and update configuration template. --- .gitignore | 3 + resources/config.sample.json | 7 +- src/scicat_configuration.py | 143 +++++++++++++++++++++++++++++ src/scicat_ingestor.py | 6 +- tests/test_scicat_configuration.py | 80 ++++++++++++++++ 5 files changed, 235 insertions(+), 4 deletions(-) create mode 100644 src/scicat_configuration.py create mode 100644 tests/test_scicat_configuration.py diff --git a/.gitignore b/.gitignore index 74ee902..d896434 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,6 @@ __pycache__/ *.cif *.rcif *.ort + +# User configuration +config.*.json diff --git a/resources/config.sample.json b/resources/config.sample.json index 57f2aae..aec802d 100644 --- a/resources/config.sample.json +++ b/resources/config.sample.json @@ -31,12 +31,13 @@ "config_file" : "config.json", "verbose" : false, "file_log" : false, - "file_log_base_name" : ".scicat_ingestor_log", + "log_file_suffix" : ".scicat_ingestor_log", "file_log_timestamp" : false, - "debug_level" : "INFO", + "log_level" : "INFO", "system_log" : false, "system_log_facility" : "mail", "log_prefix" : " SFI: ", - "check_by_job_id" : true + "check_by_job_id" : true, + "pyscicat": null } } diff --git a/src/scicat_configuration.py b/src/scicat_configuration.py new file mode 100644 index 0000000..958798b --- /dev/null +++ b/src/scicat_configuration.py @@ -0,0 +1,143 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) +import argparse +from dataclasses import dataclass +from typing import Mapping, Optional + + +def build_main_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + + group = parser.add_argument_group('Scicat Ingestor Options') + + group.add_argument( + '-c', + '--cf', + '--config', + '--config-file', + default='config.20240405.json', + dest='config_file', + help='Configuration file name. Default: config.20240405.json', + type=str, + ) + group.add_argument( + '-v', + '--verbose', + dest='verbose', + help='Provide logging on stdout', + action='store_true', + default=False, + ) + group.add_argument( + '--file-log', + dest='file_log', + help='Provide logging on file', + action='store_true', + default=False, + ) + group.add_argument( + '--log-file-suffix', + dest='log_file_suffix', + help='Suffix of the log file name', + default='.scicat_ingestor_log', + ) + group.add_argument( + '--file-log-timestamp', + dest='file_log_timestamp', + help='Provide logging on the system log', + action='store_true', + default=False, + ) + group.add_argument( + '--system-log', + dest='system_log', + help='Provide logging on the system log', + action='store_true', + default=False, + ) + group.add_argument( + '--system-log-facility', + dest='system_log_facility', + help='Facility for system log', + default='mail', + ) + group.add_argument( + '--log-prefix', + dest='log_prefix', + help='Prefix for log messages', + default=' SFI: ', + ) + group.add_argument( + '--log-level', dest='log_level', help='Logging level', default='INFO', type=str + ) + group.add_argument( + '--check-by-job-id', + dest='check_by_job_id', + help='Check the status of a job by job_id', + action='store_true', + default=True, + ) + group.add_argument( + '--pyscicat', + dest='pyscicat', + help='Location where a specific version of pyscicat is available', + default=None, + type=str, + ) + return parser + + +@dataclass +class RunOptions: + config_file: str + verbose: bool + file_log: bool + log_file_suffix: str + file_log_timestamp: bool + system_log: bool + system_log_facility: str + log_prefix: str + log_level: str + check_by_job_id: bool + pyscicat: Optional[str] = None + + +@dataclass +class ScicatConfig: + original_dict: Mapping + """Original configuration dictionary in the json file.""" + run_options: RunOptions + """Merged configuration dictionary with command line arguments.""" + + +def build_scicat_config(input_args: argparse.Namespace) -> ScicatConfig: + """Merge configuration from the configuration file and input arguments.""" + import copy + import json + import pathlib + from types import MappingProxyType + + # Read configuration file + if ( + input_args.config_file + and (config_file_path := pathlib.Path(input_args.config_file)).is_file() + ): + config_dict = json.loads(config_file_path.read_text()) + else: + config_dict = dict() + + # Overwrite deep-copied options with command line arguments + run_option_dict: dict = copy.deepcopy(config_dict.setdefault('options', dict())) + for arg_name, arg_value in vars(input_args).items(): + if arg_value is not None: + run_option_dict[arg_name] = arg_value + + # Protect original configuration by making it read-only + for key, value in config_dict.items(): + config_dict[key] = MappingProxyType(value) + + # Wrap configuration in a dataclass + return ScicatConfig( + original_dict=MappingProxyType(config_dict), + run_options=RunOptions(**run_option_dict), + ) diff --git a/src/scicat_ingestor.py b/src/scicat_ingestor.py index 13aa104..b07e2a8 100644 --- a/src/scicat_ingestor.py +++ b/src/scicat_ingestor.py @@ -1,7 +1,11 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) +from scicat_configuration import build_main_arg_parser, build_scicat_config def main() -> None: """Main entry point of the app.""" - ... + arg_parser = build_main_arg_parser() + arg_namespace = arg_parser.parse_args() + config = build_scicat_config(arg_namespace) + print(config) diff --git a/tests/test_scicat_configuration.py b/tests/test_scicat_configuration.py new file mode 100644 index 0000000..85dbd5b --- /dev/null +++ b/tests/test_scicat_configuration.py @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) +import argparse + +import pytest + +from scicat_configuration import ScicatConfig + + +@pytest.fixture +def main_arg_parser() -> argparse.ArgumentParser: + """Return the namespace of the main argument parser.""" + from scicat_configuration import build_main_arg_parser + + return build_main_arg_parser() + + +def test_scicat_arg_parser_configuration_matches( + main_arg_parser: argparse.ArgumentParser, +) -> None: + """Test if options in the configuration file matches the argument parser.""" + import json + import pathlib + + scicat_namespace = main_arg_parser.parse_args( + ['-c', 'resources/config.sample.json'] + ) + + # Check if the configuration file is the same + assert scicat_namespace.config_file == 'resources/config.sample.json' + config_path = pathlib.Path(scicat_namespace.config_file) + config_from_args: dict = vars(scicat_namespace) + + # Parse the configuration file + assert config_path.exists() + config_from_file: dict = json.loads(config_path.read_text()) + main_options: dict = config_from_file.get('options', dict()) + + # Check if all keys matches + all_keys = set(config_from_args.keys()).union(main_options.keys()) + for key in all_keys: + assert key in config_from_args + assert key in main_options + + +def test_build_scicat_config_default(main_arg_parser: argparse.ArgumentParser) -> None: + """Test if the configuration can be built from default arguments.""" + from scicat_configuration import build_scicat_config + + scicat_namespace = main_arg_parser.parse_args() + scicat_config = build_scicat_config(scicat_namespace) + assert scicat_config.original_dict['options']['config_file'] == 'config.json' + assert scicat_config.run_options.config_file == 'config.20240405.json' + + +@pytest.fixture +def scicat_config(main_arg_parser: argparse.ArgumentParser) -> ScicatConfig: + from scicat_configuration import build_scicat_config + + scicat_namespace = main_arg_parser.parse_args( + ['-c', 'resources/config.sample.json', '--verbose'] + ) + return build_scicat_config(scicat_namespace) + + +def test_build_scicat_config(scicat_config: ScicatConfig) -> None: + """Test if the configuration can be built from arguments.""" + assert scicat_config.original_dict['options']['config_file'] == 'config.json' + assert scicat_config.run_options.config_file == 'resources/config.sample.json' + assert not scicat_config.original_dict['options']['verbose'] + assert scicat_config.run_options.verbose + + +def test_scicat_config_original_dict_read_only(scicat_config: ScicatConfig) -> None: + """Test if the original dictionary is read-only.""" + from types import MappingProxyType + + assert isinstance(scicat_config.original_dict, MappingProxyType) + for sub_option in scicat_config.original_dict.values(): + assert isinstance(sub_option, MappingProxyType) From 4458a928faac3cd9ed1eefa9b9e95d82fd18a24b Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 5 Apr 2024 14:28:02 +0200 Subject: [PATCH 5/8] Fix broken test. --- tests/test_scicat_configuration.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_scicat_configuration.py b/tests/test_scicat_configuration.py index 85dbd5b..193ac8e 100644 --- a/tests/test_scicat_configuration.py +++ b/tests/test_scicat_configuration.py @@ -49,7 +49,6 @@ def test_build_scicat_config_default(main_arg_parser: argparse.ArgumentParser) - scicat_namespace = main_arg_parser.parse_args() scicat_config = build_scicat_config(scicat_namespace) - assert scicat_config.original_dict['options']['config_file'] == 'config.json' assert scicat_config.run_options.config_file == 'config.20240405.json' From 5940e1d7413b5f12f26c23e9b6021030e8387cd6 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 5 Apr 2024 14:30:34 +0200 Subject: [PATCH 6/8] Update Readme [ci-skip] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e946be1..0318437 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ A daemon that creates a raw dataset using scicat interface whenever a new file i ## Configuration You can use a json file to configure options. -There is a template, ``resources/config.sample.json`` you can copy/paste. +There is a template, ``resources/config.sample.json`` you can copy/paste to make your own configuration file. ```bash cp resources/config.sample.json config.20240405.json From 57337f7b429e1f333856f538346691b76a08014a Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 5 Apr 2024 14:33:07 +0200 Subject: [PATCH 7/8] Add minimal test of the script. --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5f56a06..4586104 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -59,3 +59,4 @@ jobs: with: name: CoverageReport path: coverage_html/ + - run: scicat_ingestor --help # Minimal test of the script From c6f7362846fc194927d9ebb5413154708dc520c7 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 5 Apr 2024 14:35:33 +0200 Subject: [PATCH 8/8] Move script test to tox. --- .github/workflows/test.yml | 1 - tox.ini | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4586104..5f56a06 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -59,4 +59,3 @@ jobs: with: name: CoverageReport path: coverage_html/ - - run: scicat_ingestor --help # Minimal test of the script diff --git a/tox.ini b/tox.ini index 731c297..dff601d 100644 --- a/tox.ini +++ b/tox.ini @@ -7,6 +7,7 @@ deps = -r requirements/test.txt setenv = JUPYTER_PLATFORM_DIRS = 1 commands = pytest {posargs} + scicat_ingestor --help # Minimal test of the script [testenv:nightly] deps = -r requirements/nightly.txt