From 56a373ab51472eb5e789a93ac8af2f0cbaa8d196 Mon Sep 17 00:00:00 2001
From: matt garber <Matthew.Garber@childrens.harvard.edu>
Date: Wed, 12 Feb 2025 08:12:35 -0500
Subject: [PATCH] Cutover to rich for all console output (#351)

* Cutover to rich for all console output

* Updated version output check

* pragma
---
 MAINTAINER.md                                   | 16 ++++++++++++++--
 cumulus_library/actions/builder.py              | 15 ++++++++-------
 cumulus_library/actions/cleaner.py              | 11 ++++++-----
 cumulus_library/actions/importer.py             |  1 -
 cumulus_library/actions/uploader.py             |  8 ++++----
 cumulus_library/base_utils.py                   |  5 +++--
 cumulus_library/builders/counts.py              |  6 +++---
 cumulus_library/builders/valueset/vsac.py       |  5 +++--
 cumulus_library/cli.py                          | 17 ++++++++++-------
 cumulus_library/databases/duckdb.py             |  2 +-
 cumulus_library/databases/utils.py              |  3 ++-
 pyproject.toml                                  |  1 +
 tests/conftest.py                               | 12 ++++++------
 tests/regression/run_regression.py              | 13 +++++++------
 tests/test_cli.py                               |  9 +++++++--
 tests/test_counts_builder.py                    | 10 +++++-----
 .../study_dynamic_prefix/gen_prefix.py          |  4 +++-
 17 files changed, 83 insertions(+), 55 deletions(-)

diff --git a/MAINTAINER.md b/MAINTAINER.md
index 3b37ae52..b35ecf87 100644
--- a/MAINTAINER.md
+++ b/MAINTAINER.md
@@ -4,6 +4,17 @@ This document is intended for users contributing/maintaining this repository.
 It is not comprehensive, but aims to capture items relevant to architecture
 that are not covered in another document.
 
+## Writing to the console
+
+Since the person writing this paragraph is **extremely** guilty of inserting print debug
+statements and then missing them during cleanup, we are using the following rules 
+for writing console output:
+
+- When developing CLI UI, use one of two approaches: either use `rich.print()` for
+basic output, or `rich.get_console()` if you want access to styling features
+- When debugging, use the stdlib `print()` function, which ruff will flag as part of
+the precommit hook. 
+
 ## Intended usage and database schemas
 
 Since these terms are used somewhat haphazardly in different database implementations, 
@@ -71,15 +82,16 @@ You should print your desired prefix to stdout.
 Example:
 ```python
 import argparse
+import rich
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--study")
 args, _rest = parser.parse_known_args()
 
 if args.study:
-  print(f"data_metrics_{args.study}")
+  rich.print(f"data_metrics_{args.study}")
 else:
-  print("data_metrics")
+  rich.print("data_metrics")
 ```
 
 #### Usage
diff --git a/cumulus_library/actions/builder.py b/cumulus_library/actions/builder.py
index 74975587..0a1445c9 100644
--- a/cumulus_library/actions/builder.py
+++ b/cumulus_library/actions/builder.py
@@ -9,7 +9,8 @@
 import tomllib
 import zipfile
 
-from rich.progress import Progress, TaskID
+import rich
+from rich import progress
 
 from cumulus_library import (
     BaseTableBuilder,
@@ -449,14 +450,14 @@ def _query_error(
     query_and_filename: list,
     exit_message: str,
 ) -> None:
-    print(
+    rich.print(
         "An error occurred executing the following query in ",
         f"{query_and_filename[1]}:",
         file=sys.stderr,
     )
-    print("--------", file=sys.stderr)
-    print(query_and_filename[0], file=sys.stderr)
-    print("--------", file=sys.stderr)
+    rich.print("--------", file=sys.stderr)
+    rich.print(query_and_filename[0], file=sys.stderr)
+    rich.print("--------", file=sys.stderr)
     log_utils.log_transaction(config=config, manifest=manifest, status=enums.LogStatuses.ERROR)
     sys.exit(exit_message)
 
@@ -467,8 +468,8 @@ def _execute_build_queries(
     *,
     cursor: databases.DatabaseCursor,
     queries: list,
-    progress: Progress,
-    task: TaskID,
+    progress: progress.Progress,
+    task: progress.TaskID,
 ) -> None:
     """Handler for executing create table queries and displaying console output.
 
diff --git a/cumulus_library/actions/cleaner.py b/cumulus_library/actions/cleaner.py
index 131de0e6..1333bc8b 100644
--- a/cumulus_library/actions/cleaner.py
+++ b/cumulus_library/actions/cleaner.py
@@ -1,6 +1,7 @@
 import sys
 
-from rich.progress import Progress, TaskID
+import rich
+from rich import progress
 
 from cumulus_library import base_utils, databases, enums, errors, study_manifest
 from cumulus_library.template_sql import base_templates
@@ -10,8 +11,8 @@ def _execute_drop_queries(
     cursor: databases.DatabaseCursor,
     verbose: bool,
     view_table_list: list,
-    progress: Progress,
-    task: TaskID,
+    progress: progress.Progress,
+    task: progress.TaskID,
 ) -> None:
     """Handler for executing drop view/table queries and displaying console output.
 
@@ -138,9 +139,9 @@ def clean_study(
         ):
             view_table_list.remove(view_table)
     if prefix:
-        print("The following views/tables were selected by prefix:")
+        rich.print("The following views/tables were selected by prefix:")
         for view_table in view_table_list:
-            print(f"  {view_table[0]}")
+            rich.print(f"  {view_table[0]}")
         confirm = input("Remove these tables? (y/N)")
         if confirm.lower() not in ("y", "yes"):
             sys.exit("Table cleaning aborted")
diff --git a/cumulus_library/actions/importer.py b/cumulus_library/actions/importer.py
index 160f9e73..3a5f94ac 100644
--- a/cumulus_library/actions/importer.py
+++ b/cumulus_library/actions/importer.py
@@ -46,7 +46,6 @@ def import_archive(config: base_utils.StudyConfig, *, archive_path: pathlib.Path
     """
 
     # Ensure we've got something that looks like a valid database export
-    print(archive_path)
     if not archive_path.exists():
         raise errors.StudyImportError(f"File {archive_path} not found.")
     try:
diff --git a/cumulus_library/actions/uploader.py b/cumulus_library/actions/uploader.py
index 4e89be60..614b050b 100644
--- a/cumulus_library/actions/uploader.py
+++ b/cumulus_library/actions/uploader.py
@@ -4,15 +4,15 @@
 from pathlib import Path
 
 import requests
+import rich
 from pandas import read_parquet
-from rich import console, progress
 
 from cumulus_library import base_utils
 
 
 def upload_data(
-    progress_bar: progress.Progress,
-    file_upload_progress: progress.TaskID,
+    progress_bar: rich.progress.Progress,
+    file_upload_progress: rich.progress.TaskID,
     file_path: Path,
     version: str,
     args: dict,
@@ -20,7 +20,7 @@ def upload_data(
     """Fetches presigned url and uploads file to aggregator"""
     study = file_path.parts[-2]
     file_name = file_path.parts[-1]
-    c = console.Console()
+    c = rich.get_console()
     progress_bar.update(file_upload_progress, description=f"Uploading {study}/{file_name}")
     data_package = file_name.split(".")[0]
     prefetch_res = requests.post(
diff --git a/cumulus_library/base_utils.py b/cumulus_library/base_utils.py
index adf06955..5bfb91db 100644
--- a/cumulus_library/base_utils.py
+++ b/cumulus_library/base_utils.py
@@ -7,6 +7,7 @@
 import zipfile
 from contextlib import contextmanager
 
+import rich
 from rich import progress
 
 from cumulus_library import databases, study_manifest
@@ -80,8 +81,8 @@ def query_console_output(
 ):
     """Convenience context manager for handling console output"""
     if verbose:
-        print()
-        print(query)
+        rich.print()
+        rich.print(query)
     yield
     if not verbose:
         progress_bar.advance(task)
diff --git a/cumulus_library/builders/counts.py b/cumulus_library/builders/counts.py
index 84fedd7c..50a33d8a 100644
--- a/cumulus_library/builders/counts.py
+++ b/cumulus_library/builders/counts.py
@@ -2,7 +2,7 @@
 
 import pathlib
 
-from rich import console
+import rich
 
 from cumulus_library import BaseTableBuilder, errors, study_manifest
 from cumulus_library.builders.statistics_templates import counts_templates
@@ -21,7 +21,7 @@ def __init__(
         if manifest:
             self.study_prefix = manifest.get_study_prefix()
         elif study_prefix:
-            c = console.Console()
+            c = rich.get_console()
             c.print(
                 "[yellow]Warning: providing study_prefix to a CountsBuilder is deprecated"
                 " and will be removed in a future version"
@@ -50,7 +50,7 @@ def get_table_name(self, table_name: str, duration=None) -> str:
     #     This tries to non-destructively get a table to be named something like
     #     'study__count_resource_[everything else]'
     #     """
-    #     c = console.Console()
+    #     c = rich.get_console()
     #     if f"__count_{fhir_resource}" in table_name:
     #         return table_name
     #     name_parts = table_name.split("__")[-1].split("_")
diff --git a/cumulus_library/builders/valueset/vsac.py b/cumulus_library/builders/valueset/vsac.py
index 43ae42f8..fc4ac9be 100644
--- a/cumulus_library/builders/valueset/vsac.py
+++ b/cumulus_library/builders/valueset/vsac.py
@@ -3,6 +3,7 @@
 import pathlib
 
 import pandas
+import rich
 
 from cumulus_library import base_utils
 from cumulus_library.apis import umls
@@ -32,9 +33,9 @@ def download_oid_data(
         path = pathlib.Path(__file__).parent.parent / "data"  # pragma: no cover
     path.mkdir(exist_ok=True, parents=True)
     if not (force_upload) and (path / f"{steward}.parquet").exists():
-        print(f"{steward} data present at {path}, skipping download.")
+        rich.print(f"{steward} data present at {path}, skipping download.")
         return False
-    print(f"Downloading {steward} to {path}")
+    rich.print(f"Downloading {steward} to {path}")
     api = umls.UmlsApi(api_key=api_key or api_key)
     output = []
 
diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py
index 281f98c1..7e897e53 100755
--- a/cumulus_library/cli.py
+++ b/cumulus_library/cli.py
@@ -287,12 +287,12 @@ def get_studies_by_manifest_path(path: pathlib.Path) -> dict[str, pathlib.Path]:
 
 def run_cli(args: dict):
     """Controls which library tasks are run based on CLI arguments"""
-    console = rich.console.Console()
+    console = rich.get_console()
     if args["action"] == "upload":
         try:
             uploader.upload_files(args)
         except requests.RequestException as e:
-            print(str(e))
+            rich.print(str(e))
             sys.exit()
 
     # all other actions require connecting to the database
@@ -404,7 +404,7 @@ def main(cli_args=None):
 
     parser = cli_parser.get_parser()
     args = vars(parser.parse_args(cli_args))
-
+    console = rich.get_console()
     arg_env_pairs = (
         ("data_path", "CUMULUS_LIBRARY_DATA_PATH"),
         ("db_type", "CUMULUS_LIBRARY_DB_TYPE"),
@@ -441,7 +441,9 @@ def main(cli_args=None):
         sys.exit(1)
 
     if args["action"] == "version":
-        print(f"cumulus-library version: {__version__}\nInstalled studies:")
+        table = rich.table.Table(title=f"cumulus-library version: {__version__}")
+        table.add_column("Study Name", style="green")
+        table.add_column("Version", style="blue")
         studies = get_study_dict(args.get("study_dir"))
         for study in sorted(studies.keys()):
             try:
@@ -451,9 +453,10 @@ def main(cli_args=None):
                 study_init = importlib.util.module_from_spec(spec)
                 sys.modules["study_init"] = study_init
                 spec.loader.exec_module(study_init)
-                print(f"  {study}: {study_init.__version__}")
+                table.add_row(study, study_init.__version__)
             except Exception:
-                print(f"  {study}: no version defined")
+                table.add_row(study, "No version defined")
+        console.print(table)
         sys.exit(0)
 
     if len(read_env_vars) > 0:
@@ -465,7 +468,7 @@ def main(cli_args=None):
                 table.add_row(row[0], "#########")  # pragma: no cover
             else:
                 table.add_row(row[0], row[1])
-        console = rich.console.Console()
+
         console.print(table)
 
     options = {}
diff --git a/cumulus_library/databases/duckdb.py b/cumulus_library/databases/duckdb.py
index 1d29e345..1175b2c2 100644
--- a/cumulus_library/databases/duckdb.py
+++ b/cumulus_library/databases/duckdb.py
@@ -129,7 +129,7 @@ def _compat_to_utf8(value: str | None) -> datetime.date | None:
         """See the create_function() call for to_utf8 for more background"""
         # This is exercised locally on unit tests but is not in CI. Not sure why,
         # and not sure it's worth debugging
-        return value
+        return value  # pragma: no cover
 
     @staticmethod
     def _compat_from_iso8601_timestamp(
diff --git a/cumulus_library/databases/utils.py b/cumulus_library/databases/utils.py
index a5f7d8ac..c6ba7431 100644
--- a/cumulus_library/databases/utils.py
+++ b/cumulus_library/databases/utils.py
@@ -6,6 +6,7 @@
 import pyarrow
 import pyarrow.dataset
 import pyarrow.json
+import rich
 
 from cumulus_library import base_utils, db_config, errors
 from cumulus_library.databases import athena, base, duckdb
@@ -140,7 +141,7 @@ def create_db_backend(args: dict[str, str]) -> (base.DatabaseBackend, str):
         # TODO: reevaluate as DuckDB's local schema support evolves.
         # https://duckdb.org/docs/sql/statements/set.html#syntax
         if not (args.get("schema_name") is None or args["schema_name"] == "main"):
-            print(  # pragma: no cover
+            rich.print(  # pragma: no cover
                 "Warning - local schema names are not yet supported by duckDB's "
                 "python library - using 'main' instead"
             )
diff --git a/pyproject.toml b/pyproject.toml
index 7ac2863d..e826bded 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,6 +74,7 @@ select = [
     "PLE",  # pylint errors
     "RUF",  # the ruff developer's own rules
     "S",  # bandit security warnings
+    "T201", #checks for print statements
     "UP",  # alert you when better syntax is available in your python version
 ]
 [tool.ruff.lint.per-file-ignores]
diff --git a/tests/conftest.py b/tests/conftest.py
index 03b3dc12..0d5c78ec 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,7 +10,7 @@
 import numpy
 import pandas
 import pytest
-from rich import console, table
+import rich
 
 from cumulus_library import base_utils, cli
 from cumulus_library.databases import create_db_backend
@@ -161,7 +161,7 @@ def debug_table_schema(cursor, table):
         f"select column_name, data_type from information_schema.columns where table_name='{table}'"
     ).fetchall()
     for line in table_schema:
-        print(line)
+        rich.print(line)
 
 
 def debug_table_head(cursor, table, rows=3, cols="*"):
@@ -172,8 +172,8 @@ def debug_table_head(cursor, table, rows=3, cols="*"):
     for field in cursor.description:
         col_names.append(field[0])
     for line in table_schema:
-        print(line)
-    print()
+        rich.print(line)
+    rich.print()
 
 
 def debug_diff_tables(cols, found, ref, pos=0):
@@ -181,7 +181,7 @@ def debug_diff_tables(cols, found, ref, pos=0):
     found = found[pos] if len(found) > pos else []
     ref = ref[pos] if len(ref) > pos else []
     max_size = max(len(found), len(ref))
-    diff_table = table.Table(title=f"Row {pos} delta")
+    diff_table = rich.table.Table(title=f"Row {pos} delta")
     diff_table.add_column("DB Column")
     diff_table.add_column("Found in DB")
     diff_table.add_column("Reference")
@@ -191,7 +191,7 @@ def debug_diff_tables(cols, found, ref, pos=0):
             str(found[i]) if i < len(found) else "**None**",
             str(ref[i]) if i < len(ref) else "**None**",
         )
-    output = console.Console()
+    output = rich.get_console()
     output.print(diff_table)
 
 
diff --git a/tests/regression/run_regression.py b/tests/regression/run_regression.py
index b2636beb..6ec79e73 100644
--- a/tests/regression/run_regression.py
+++ b/tests/regression/run_regression.py
@@ -15,17 +15,18 @@
 this approach."""
 
 import os
+import pathlib
 import sys
-from pathlib import Path
 
+import rich
 from pandas import read_parquet
 
 VOCAB_ICD_ROW_COUNT = 403231
 
 
 def regress_core():
-    ref_path = f"{Path(__file__).resolve().parent}/reference"
-    export_path = f"{Path(__file__).resolve().parent}/data_export/core"
+    ref_path = f"{pathlib.Path(__file__).resolve().parent}/reference"
+    export_path = f"{pathlib.Path(__file__).resolve().parent}/data_export/core"
 
     references = set(os.listdir(ref_path))
     exports = set(os.listdir(export_path))
@@ -79,10 +80,10 @@ def regress_core():
                 )
     if len(diffs) > 0:
         for row in diffs:
-            print(f"--- {row[0]} ---")
-            print(row[1])
+            rich.print(f"--- {row[0]} ---")
+            rich.print(row[1])
         sys.exit(f"❌ Found {len(diffs)} difference(s) in core study. ❌")
-    print("✅ Core study reference and export matched ✅")
+    rich.print("✅ Core study reference and export matched ✅")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 75fb8093..75b428c9 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -866,8 +866,13 @@ def test_version(capfd):
         )
     out, _ = capfd.readouterr()
     assert f"cumulus-library version: {__version__}" in out
-    assert "study_valid: 1.0.0" in out
-    assert "study_invalid_bad_query: no version defined" in out
+    out = out.split("\n")
+    valid = list(filter(lambda x: " study_valid " in x, out))
+    assert len(valid) == 1
+    assert " 1.0.0 " in valid[0]
+    invalid = list(filter(lambda x: " study_invalid_bad_query " in x, out))
+    assert len(invalid) == 1
+    assert " No version defined " in invalid[0]
 
 
 @mock.patch.dict(
diff --git a/tests/test_counts_builder.py b/tests/test_counts_builder.py
index dd745dfe..26ec9b41 100644
--- a/tests/test_counts_builder.py
+++ b/tests/test_counts_builder.py
@@ -78,12 +78,12 @@ def test_get_where_clauses(clause, min_subject, expected, raises):
 #     name = builder.coerce_table_name(table_name, fhir_resource)
 #     assert name == new_name
 #     for thing in mock_console.mock_calls:
-#         print(thing)
-#         print(type(thing))
-#     print("------")
+#         rich.print(thing)
+#         rich.print(type(thing))
+#     rich.print("------")
 #     for thing in mock_console.print().mock_calls:
-#         print(thing)
-#         print(type(thing))
+#         rich.print(thing)
+#         rich.print(type(thing))
 #     assert mock_console.print.called == warns
 
 
diff --git a/tests/test_data/study_dynamic_prefix/gen_prefix.py b/tests/test_data/study_dynamic_prefix/gen_prefix.py
index 5cbb8470..45734ab6 100644
--- a/tests/test_data/study_dynamic_prefix/gen_prefix.py
+++ b/tests/test_data/study_dynamic_prefix/gen_prefix.py
@@ -1,7 +1,9 @@
 import argparse
 
+import rich
+
 parser = argparse.ArgumentParser()
 parser.add_argument("--prefix", default="dynamic")
 args, _rest = parser.parse_known_args()
 
-print(args.prefix)
+rich.print(args.prefix)