From 56a373ab51472eb5e789a93ac8af2f0cbaa8d196 Mon Sep 17 00:00:00 2001 From: matt garber Date: Wed, 12 Feb 2025 08:12:35 -0500 Subject: [PATCH] Cutover to rich for all console output (#351) * Cutover to rich for all console output * Updated version output check * pragma --- MAINTAINER.md | 16 ++++++++++++++-- cumulus_library/actions/builder.py | 15 ++++++++------- cumulus_library/actions/cleaner.py | 11 ++++++----- cumulus_library/actions/importer.py | 1 - cumulus_library/actions/uploader.py | 8 ++++---- cumulus_library/base_utils.py | 5 +++-- cumulus_library/builders/counts.py | 6 +++--- cumulus_library/builders/valueset/vsac.py | 5 +++-- cumulus_library/cli.py | 17 ++++++++++------- cumulus_library/databases/duckdb.py | 2 +- cumulus_library/databases/utils.py | 3 ++- pyproject.toml | 1 + tests/conftest.py | 12 ++++++------ tests/regression/run_regression.py | 13 +++++++------ tests/test_cli.py | 9 +++++++-- tests/test_counts_builder.py | 10 +++++----- .../study_dynamic_prefix/gen_prefix.py | 4 +++- 17 files changed, 83 insertions(+), 55 deletions(-) diff --git a/MAINTAINER.md b/MAINTAINER.md index 3b37ae52..b35ecf87 100644 --- a/MAINTAINER.md +++ b/MAINTAINER.md @@ -4,6 +4,17 @@ This document is intended for users contributing/maintaining this repository. It is not comprehensive, but aims to capture items relevant to architecture that are not covered in another document. +## Writing to the console + +Since the person writing this paragraph is **extremely** guilty of inserting print debug +statements and then missing them during cleanup, we are using the following rules +for writing console output: + +- When developing CLI UI, use one of two approaches: either use `rich.print()` for +basic output, or `rich.get_console()` if you want access to styling features +- When debugging, use the stdlib `print()` function, which ruff will flag as part of +the precommit hook. + ## Intended usage and database schemas Since these terms are used somewhat haphazardly in different database implementations, @@ -71,15 +82,16 @@ You should print your desired prefix to stdout. Example: ```python import argparse +import rich parser = argparse.ArgumentParser() parser.add_argument("--study") args, _rest = parser.parse_known_args() if args.study: - print(f"data_metrics_{args.study}") + rich.print(f"data_metrics_{args.study}") else: - print("data_metrics") + rich.print("data_metrics") ``` #### Usage diff --git a/cumulus_library/actions/builder.py b/cumulus_library/actions/builder.py index 74975587..0a1445c9 100644 --- a/cumulus_library/actions/builder.py +++ b/cumulus_library/actions/builder.py @@ -9,7 +9,8 @@ import tomllib import zipfile -from rich.progress import Progress, TaskID +import rich +from rich import progress from cumulus_library import ( BaseTableBuilder, @@ -449,14 +450,14 @@ def _query_error( query_and_filename: list, exit_message: str, ) -> None: - print( + rich.print( "An error occurred executing the following query in ", f"{query_and_filename[1]}:", file=sys.stderr, ) - print("--------", file=sys.stderr) - print(query_and_filename[0], file=sys.stderr) - print("--------", file=sys.stderr) + rich.print("--------", file=sys.stderr) + rich.print(query_and_filename[0], file=sys.stderr) + rich.print("--------", file=sys.stderr) log_utils.log_transaction(config=config, manifest=manifest, status=enums.LogStatuses.ERROR) sys.exit(exit_message) @@ -467,8 +468,8 @@ def _execute_build_queries( *, cursor: databases.DatabaseCursor, queries: list, - progress: Progress, - task: TaskID, + progress: progress.Progress, + task: progress.TaskID, ) -> None: """Handler for executing create table queries and displaying console output. diff --git a/cumulus_library/actions/cleaner.py b/cumulus_library/actions/cleaner.py index 131de0e6..1333bc8b 100644 --- a/cumulus_library/actions/cleaner.py +++ b/cumulus_library/actions/cleaner.py @@ -1,6 +1,7 @@ import sys -from rich.progress import Progress, TaskID +import rich +from rich import progress from cumulus_library import base_utils, databases, enums, errors, study_manifest from cumulus_library.template_sql import base_templates @@ -10,8 +11,8 @@ def _execute_drop_queries( cursor: databases.DatabaseCursor, verbose: bool, view_table_list: list, - progress: Progress, - task: TaskID, + progress: progress.Progress, + task: progress.TaskID, ) -> None: """Handler for executing drop view/table queries and displaying console output. @@ -138,9 +139,9 @@ def clean_study( ): view_table_list.remove(view_table) if prefix: - print("The following views/tables were selected by prefix:") + rich.print("The following views/tables were selected by prefix:") for view_table in view_table_list: - print(f" {view_table[0]}") + rich.print(f" {view_table[0]}") confirm = input("Remove these tables? (y/N)") if confirm.lower() not in ("y", "yes"): sys.exit("Table cleaning aborted") diff --git a/cumulus_library/actions/importer.py b/cumulus_library/actions/importer.py index 160f9e73..3a5f94ac 100644 --- a/cumulus_library/actions/importer.py +++ b/cumulus_library/actions/importer.py @@ -46,7 +46,6 @@ def import_archive(config: base_utils.StudyConfig, *, archive_path: pathlib.Path """ # Ensure we've got something that looks like a valid database export - print(archive_path) if not archive_path.exists(): raise errors.StudyImportError(f"File {archive_path} not found.") try: diff --git a/cumulus_library/actions/uploader.py b/cumulus_library/actions/uploader.py index 4e89be60..614b050b 100644 --- a/cumulus_library/actions/uploader.py +++ b/cumulus_library/actions/uploader.py @@ -4,15 +4,15 @@ from pathlib import Path import requests +import rich from pandas import read_parquet -from rich import console, progress from cumulus_library import base_utils def upload_data( - progress_bar: progress.Progress, - file_upload_progress: progress.TaskID, + progress_bar: rich.progress.Progress, + file_upload_progress: rich.progress.TaskID, file_path: Path, version: str, args: dict, @@ -20,7 +20,7 @@ def upload_data( """Fetches presigned url and uploads file to aggregator""" study = file_path.parts[-2] file_name = file_path.parts[-1] - c = console.Console() + c = rich.get_console() progress_bar.update(file_upload_progress, description=f"Uploading {study}/{file_name}") data_package = file_name.split(".")[0] prefetch_res = requests.post( diff --git a/cumulus_library/base_utils.py b/cumulus_library/base_utils.py index adf06955..5bfb91db 100644 --- a/cumulus_library/base_utils.py +++ b/cumulus_library/base_utils.py @@ -7,6 +7,7 @@ import zipfile from contextlib import contextmanager +import rich from rich import progress from cumulus_library import databases, study_manifest @@ -80,8 +81,8 @@ def query_console_output( ): """Convenience context manager for handling console output""" if verbose: - print() - print(query) + rich.print() + rich.print(query) yield if not verbose: progress_bar.advance(task) diff --git a/cumulus_library/builders/counts.py b/cumulus_library/builders/counts.py index 84fedd7c..50a33d8a 100644 --- a/cumulus_library/builders/counts.py +++ b/cumulus_library/builders/counts.py @@ -2,7 +2,7 @@ import pathlib -from rich import console +import rich from cumulus_library import BaseTableBuilder, errors, study_manifest from cumulus_library.builders.statistics_templates import counts_templates @@ -21,7 +21,7 @@ def __init__( if manifest: self.study_prefix = manifest.get_study_prefix() elif study_prefix: - c = console.Console() + c = rich.get_console() c.print( "[yellow]Warning: providing study_prefix to a CountsBuilder is deprecated" " and will be removed in a future version" @@ -50,7 +50,7 @@ def get_table_name(self, table_name: str, duration=None) -> str: # This tries to non-destructively get a table to be named something like # 'study__count_resource_[everything else]' # """ - # c = console.Console() + # c = rich.get_console() # if f"__count_{fhir_resource}" in table_name: # return table_name # name_parts = table_name.split("__")[-1].split("_") diff --git a/cumulus_library/builders/valueset/vsac.py b/cumulus_library/builders/valueset/vsac.py index 43ae42f8..fc4ac9be 100644 --- a/cumulus_library/builders/valueset/vsac.py +++ b/cumulus_library/builders/valueset/vsac.py @@ -3,6 +3,7 @@ import pathlib import pandas +import rich from cumulus_library import base_utils from cumulus_library.apis import umls @@ -32,9 +33,9 @@ def download_oid_data( path = pathlib.Path(__file__).parent.parent / "data" # pragma: no cover path.mkdir(exist_ok=True, parents=True) if not (force_upload) and (path / f"{steward}.parquet").exists(): - print(f"{steward} data present at {path}, skipping download.") + rich.print(f"{steward} data present at {path}, skipping download.") return False - print(f"Downloading {steward} to {path}") + rich.print(f"Downloading {steward} to {path}") api = umls.UmlsApi(api_key=api_key or api_key) output = [] diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py index 281f98c1..7e897e53 100755 --- a/cumulus_library/cli.py +++ b/cumulus_library/cli.py @@ -287,12 +287,12 @@ def get_studies_by_manifest_path(path: pathlib.Path) -> dict[str, pathlib.Path]: def run_cli(args: dict): """Controls which library tasks are run based on CLI arguments""" - console = rich.console.Console() + console = rich.get_console() if args["action"] == "upload": try: uploader.upload_files(args) except requests.RequestException as e: - print(str(e)) + rich.print(str(e)) sys.exit() # all other actions require connecting to the database @@ -404,7 +404,7 @@ def main(cli_args=None): parser = cli_parser.get_parser() args = vars(parser.parse_args(cli_args)) - + console = rich.get_console() arg_env_pairs = ( ("data_path", "CUMULUS_LIBRARY_DATA_PATH"), ("db_type", "CUMULUS_LIBRARY_DB_TYPE"), @@ -441,7 +441,9 @@ def main(cli_args=None): sys.exit(1) if args["action"] == "version": - print(f"cumulus-library version: {__version__}\nInstalled studies:") + table = rich.table.Table(title=f"cumulus-library version: {__version__}") + table.add_column("Study Name", style="green") + table.add_column("Version", style="blue") studies = get_study_dict(args.get("study_dir")) for study in sorted(studies.keys()): try: @@ -451,9 +453,10 @@ def main(cli_args=None): study_init = importlib.util.module_from_spec(spec) sys.modules["study_init"] = study_init spec.loader.exec_module(study_init) - print(f" {study}: {study_init.__version__}") + table.add_row(study, study_init.__version__) except Exception: - print(f" {study}: no version defined") + table.add_row(study, "No version defined") + console.print(table) sys.exit(0) if len(read_env_vars) > 0: @@ -465,7 +468,7 @@ def main(cli_args=None): table.add_row(row[0], "#########") # pragma: no cover else: table.add_row(row[0], row[1]) - console = rich.console.Console() + console.print(table) options = {} diff --git a/cumulus_library/databases/duckdb.py b/cumulus_library/databases/duckdb.py index 1d29e345..1175b2c2 100644 --- a/cumulus_library/databases/duckdb.py +++ b/cumulus_library/databases/duckdb.py @@ -129,7 +129,7 @@ def _compat_to_utf8(value: str | None) -> datetime.date | None: """See the create_function() call for to_utf8 for more background""" # This is exercised locally on unit tests but is not in CI. Not sure why, # and not sure it's worth debugging - return value + return value # pragma: no cover @staticmethod def _compat_from_iso8601_timestamp( diff --git a/cumulus_library/databases/utils.py b/cumulus_library/databases/utils.py index a5f7d8ac..c6ba7431 100644 --- a/cumulus_library/databases/utils.py +++ b/cumulus_library/databases/utils.py @@ -6,6 +6,7 @@ import pyarrow import pyarrow.dataset import pyarrow.json +import rich from cumulus_library import base_utils, db_config, errors from cumulus_library.databases import athena, base, duckdb @@ -140,7 +141,7 @@ def create_db_backend(args: dict[str, str]) -> (base.DatabaseBackend, str): # TODO: reevaluate as DuckDB's local schema support evolves. # https://duckdb.org/docs/sql/statements/set.html#syntax if not (args.get("schema_name") is None or args["schema_name"] == "main"): - print( # pragma: no cover + rich.print( # pragma: no cover "Warning - local schema names are not yet supported by duckDB's " "python library - using 'main' instead" ) diff --git a/pyproject.toml b/pyproject.toml index 7ac2863d..e826bded 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ select = [ "PLE", # pylint errors "RUF", # the ruff developer's own rules "S", # bandit security warnings + "T201", #checks for print statements "UP", # alert you when better syntax is available in your python version ] [tool.ruff.lint.per-file-ignores] diff --git a/tests/conftest.py b/tests/conftest.py index 03b3dc12..0d5c78ec 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ import numpy import pandas import pytest -from rich import console, table +import rich from cumulus_library import base_utils, cli from cumulus_library.databases import create_db_backend @@ -161,7 +161,7 @@ def debug_table_schema(cursor, table): f"select column_name, data_type from information_schema.columns where table_name='{table}'" ).fetchall() for line in table_schema: - print(line) + rich.print(line) def debug_table_head(cursor, table, rows=3, cols="*"): @@ -172,8 +172,8 @@ def debug_table_head(cursor, table, rows=3, cols="*"): for field in cursor.description: col_names.append(field[0]) for line in table_schema: - print(line) - print() + rich.print(line) + rich.print() def debug_diff_tables(cols, found, ref, pos=0): @@ -181,7 +181,7 @@ def debug_diff_tables(cols, found, ref, pos=0): found = found[pos] if len(found) > pos else [] ref = ref[pos] if len(ref) > pos else [] max_size = max(len(found), len(ref)) - diff_table = table.Table(title=f"Row {pos} delta") + diff_table = rich.table.Table(title=f"Row {pos} delta") diff_table.add_column("DB Column") diff_table.add_column("Found in DB") diff_table.add_column("Reference") @@ -191,7 +191,7 @@ def debug_diff_tables(cols, found, ref, pos=0): str(found[i]) if i < len(found) else "**None**", str(ref[i]) if i < len(ref) else "**None**", ) - output = console.Console() + output = rich.get_console() output.print(diff_table) diff --git a/tests/regression/run_regression.py b/tests/regression/run_regression.py index b2636beb..6ec79e73 100644 --- a/tests/regression/run_regression.py +++ b/tests/regression/run_regression.py @@ -15,17 +15,18 @@ this approach.""" import os +import pathlib import sys -from pathlib import Path +import rich from pandas import read_parquet VOCAB_ICD_ROW_COUNT = 403231 def regress_core(): - ref_path = f"{Path(__file__).resolve().parent}/reference" - export_path = f"{Path(__file__).resolve().parent}/data_export/core" + ref_path = f"{pathlib.Path(__file__).resolve().parent}/reference" + export_path = f"{pathlib.Path(__file__).resolve().parent}/data_export/core" references = set(os.listdir(ref_path)) exports = set(os.listdir(export_path)) @@ -79,10 +80,10 @@ def regress_core(): ) if len(diffs) > 0: for row in diffs: - print(f"--- {row[0]} ---") - print(row[1]) + rich.print(f"--- {row[0]} ---") + rich.print(row[1]) sys.exit(f"❌ Found {len(diffs)} difference(s) in core study. ❌") - print("✅ Core study reference and export matched ✅") + rich.print("✅ Core study reference and export matched ✅") if __name__ == "__main__": diff --git a/tests/test_cli.py b/tests/test_cli.py index 75fb8093..75b428c9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -866,8 +866,13 @@ def test_version(capfd): ) out, _ = capfd.readouterr() assert f"cumulus-library version: {__version__}" in out - assert "study_valid: 1.0.0" in out - assert "study_invalid_bad_query: no version defined" in out + out = out.split("\n") + valid = list(filter(lambda x: " study_valid " in x, out)) + assert len(valid) == 1 + assert " 1.0.0 " in valid[0] + invalid = list(filter(lambda x: " study_invalid_bad_query " in x, out)) + assert len(invalid) == 1 + assert " No version defined " in invalid[0] @mock.patch.dict( diff --git a/tests/test_counts_builder.py b/tests/test_counts_builder.py index dd745dfe..26ec9b41 100644 --- a/tests/test_counts_builder.py +++ b/tests/test_counts_builder.py @@ -78,12 +78,12 @@ def test_get_where_clauses(clause, min_subject, expected, raises): # name = builder.coerce_table_name(table_name, fhir_resource) # assert name == new_name # for thing in mock_console.mock_calls: -# print(thing) -# print(type(thing)) -# print("------") +# rich.print(thing) +# rich.print(type(thing)) +# rich.print("------") # for thing in mock_console.print().mock_calls: -# print(thing) -# print(type(thing)) +# rich.print(thing) +# rich.print(type(thing)) # assert mock_console.print.called == warns diff --git a/tests/test_data/study_dynamic_prefix/gen_prefix.py b/tests/test_data/study_dynamic_prefix/gen_prefix.py index 5cbb8470..45734ab6 100644 --- a/tests/test_data/study_dynamic_prefix/gen_prefix.py +++ b/tests/test_data/study_dynamic_prefix/gen_prefix.py @@ -1,7 +1,9 @@ import argparse +import rich + parser = argparse.ArgumentParser() parser.add_argument("--prefix", default="dynamic") args, _rest = parser.parse_known_args() -print(args.prefix) +rich.print(args.prefix)