Added archival mode (#183)

* Added archival mode * test tweaks * PR feedback * moved zip to utils
smart-on-fhir · Feb 13, 2024 · a36f705 · a36f705
1 parent daf23d9
commit a36f705
Show file tree

Hide file tree

Showing 18 changed files with 543 additions and 165 deletions.
diff --git a/cumulus_library/base_utils.py b/cumulus_library/base_utils.py
@@ -3,6 +3,8 @@
 import datetime
 import json
 import os
+import shutil
+import zipfile
 from contextlib import contextmanager
 
 from rich import progress
@@ -82,3 +84,18 @@ def get_tablename_safe_iso_timestamp() -> str:
     iso_timestamp = get_utc_datetime().isoformat()
     safe_timestamp = iso_timestamp.replace(":", "_").replace("-", "_").replace("+", "_")
     return safe_timestamp
+
+
+def zip_dir(read_path, write_path, archive_name):
+    """Moves a directory to an archive"""
+    file_list = [file for file in read_path.glob("**/*") if file.is_file()]
+    timestamp = get_utc_datetime().isoformat().replace("+00:00", "Z")
+    with zipfile.ZipFile(
+        f"{write_path}/{archive_name}_{timestamp}.zip",
+        "w",
+        zipfile.ZIP_DEFLATED,
+    ) as f:
+        for file in file_list:
+            f.write(file, file.relative_to(read_path))
+            file.unlink()
+        shutil.rmtree(read_path)
diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py
@@ -193,20 +193,22 @@ def clean_and_build_all(self, study_dict: dict, stats_build: bool) -> None:
             self.clean_and_build_study(study_dict[key], stats_build=stats_build)
 
     ### Data exporters
-    def export_study(self, target: pathlib.Path, data_path: pathlib.Path) -> None:
+    def export_study(
+        self, target: pathlib.Path, data_path: pathlib.Path, archive: bool
+    ) -> None:
         """Exports aggregates defined in a manifest
 
         :param target: A path to the study directory
         """
         if data_path is None:
             sys.exit("Missing destination - please provide a path argument.")
         studyparser = study_parser.StudyManifestParser(target, data_path)
-        studyparser.export_study(self.db, data_path)
+        studyparser.export_study(self.db, self.schema_name, data_path, archive)
 
-    def export_all(self, study_dict: dict, data_path: pathlib.Path):
+    def export_all(self, study_dict: dict, data_path: pathlib.Path, archive: bool):
         """Exports all defined count tables to disk"""
         for key in study_dict.keys():
-            self.export_study(study_dict[key], data_path)
+            self.export_study(study_dict[key], data_path, archive)
 
     def generate_study_sql(
         self,
@@ -294,6 +296,7 @@ def get_studies_by_manifest_path(path: pathlib.Path) -> dict:
 
 def run_cli(args: dict):
     """Controls which library tasks are run based on CLI arguments"""
+    console = rich.console.Console()
     if args["action"] == "create":
         create_template(args["create_dir"])
 
@@ -307,7 +310,7 @@ def run_cli(args: dict):
             runner = StudyRunner(db_backend, data_path=args.get("data_path"))
             if args.get("verbose"):
                 runner.verbose = True
-            print("Testing connection to database...")
+            console.print("[italic] Connecting to database...")
             runner.cursor.execute("SHOW DATABASES")
             study_dict = get_study_dict(args["study_dir"])
             if "prefix" not in args.keys():
@@ -344,11 +347,28 @@ def run_cli(args: dict):
                             )
 
             elif args["action"] == "export":
+                if args["archive"]:
+                    warning_text = (
+                        "🚨[bold red] This will export all study tables [/bold red]🚨"
+                        "\n\nDepending on your study definition, this data may contain "
+                        "data that would be characterized as a [italic]limited data "
+                        "set[/italic], primarily dates, on a per patient level.\n\n"
+                        "[bold]By doing this, you are assuming the responsibility for "
+                        "meeting your organization's security requirements for "
+                        "storing this data in a secure manager.[/bold]\n\n"
+                        "Type Y to proceed, or any other value to quit.\n"
+                    )
+                    console.print(warning_text)
+                    response = input()
+                    if response.lower() != "y":
+                        sys.exit()
                 if "all" in args["target"]:
-                    runner.export_all(study_dict, args["data_path"])
+                    runner.export_all(study_dict, args["data_path"], args["archive"])
                 else:
                     for target in args["target"]:
-                        runner.export_study(study_dict[target], args["data_path"])
+                        runner.export_study(
+                            study_dict[target], args["data_path"], args["archive"]
+                        )
 
             elif args["action"] == "generate-sql":
                 for target in args["target"]:

diff --git a/cumulus_library/cli_parser.py b/cumulus_library/cli_parser.py
@@ -127,21 +127,6 @@ def get_parser() -> argparse.ArgumentParser:
         dest="action",
     )
 
-    # Study creation
-
-    create = actions.add_parser(
-        "create", help="Create a study instance from a template"
-    )
-    create.add_argument(
-        "create_dir",
-        default="./",
-        nargs="?",
-        help=(
-            "The the directory the study will be created in. Default is "
-            "the current directory."
-        ),
-    )
-
     # Database cleaning
 
     clean = actions.add_parser(
@@ -204,7 +189,11 @@ def get_parser() -> argparse.ArgumentParser:
     add_data_path_argument(export)
     add_verbose_argument(export)
     add_db_config(export)
-
+    export.add_argument(
+        "--archive",
+        action="store_true",
+        help="Generates archive of :all: study tables, ignoring manifest export list.",
+    )
     # Aggregator upload
 
     upload = actions.add_parser(

diff --git a/cumulus_library/studies/core/core_templates/encounter.sql.jinja b/cumulus_library/studies/core/core_templates/encounter.sql.jinja
@@ -83,7 +83,6 @@ temp_encounter AS (
 
 SELECT DISTINCT
     e.id,
-    e.class AS enc_class,
     ac.code AS enc_class_code,
     ac.display AS enc_class_display,
     e.status,

diff --git a/cumulus_library/studies/core/reference_sql/builder_condition.sql b/cumulus_library/studies/core/reference_sql/builder_condition.sql
@@ -1,8 +1,8 @@
 -- noqa: disable=all
--- This sql was autogenerated as a reference example using the library CLI.
--- Its format is tied to the specific database it was run against, and it may not
--- be correct for all databases. Use the CLI's build option to derive the best SQL
--- for your dataset.
+-- This sql was autogenerated as a reference example using the library
+-- CLI.Its format is tied to the specific database it was run against,
+-- and it may not be correct for all databases. Use the CLI's build 
+-- option to derive the best SQL for your dataset.
 
 -- ###########################################################
 
@@ -20,7 +20,7 @@ CREATE TABLE core__condition_codable_concepts_display AS (
             condition AS s,
             UNNEST(s.code.coding) AS u (codeable_concept)
         WHERE
-            u.codeable_concept.system = 'http://snomed.info/sct'
+            u.codeable_concept.system LIKE 'http://snomed.info/sct'
     ), --noqa: LT07
 
     system_code_1 AS (
@@ -34,7 +34,7 @@ CREATE TABLE core__condition_codable_concepts_display AS (
             condition AS s,
             UNNEST(s.code.coding) AS u (codeable_concept)
         WHERE
-            u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-10-cm'
+            u.codeable_concept.system LIKE 'http://hl7.org/fhir/sid/icd-10-cm'
     ), --noqa: LT07
 
     system_code_2 AS (
@@ -48,7 +48,63 @@ CREATE TABLE core__condition_codable_concepts_display AS (
             condition AS s,
             UNNEST(s.code.coding) AS u (codeable_concept)
         WHERE
-            u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-9-cm'
+            u.codeable_concept.system LIKE 'http://hl7.org/fhir/sid/icd-9-cm'
+    ), --noqa: LT07
+
+    system_code_3 AS (
+        SELECT DISTINCT
+            s.id AS id,
+            '3' AS priority,
+            u.codeable_concept.code AS code,
+            u.codeable_concept.display AS display,
+            u.codeable_concept.system AS code_system
+        FROM
+            condition AS s,
+            UNNEST(s.code.coding) AS u (codeable_concept)
+        WHERE
+            u.codeable_concept.system LIKE 'http://hl7.org/fhir/sid/icd-9-cm/diagnosis'
+    ), --noqa: LT07
+
+    system_code_4 AS (
+        SELECT DISTINCT
+            s.id AS id,
+            '4' AS priority,
+            u.codeable_concept.code AS code,
+            u.codeable_concept.display AS display,
+            u.codeable_concept.system AS code_system
+        FROM
+            condition AS s,
+            UNNEST(s.code.coding) AS u (codeable_concept)
+        WHERE
+            u.codeable_concept.system LIKE 'urn:oid:1.2.840.114350.1.13.71.2.7.2.728286'
+    ), --noqa: LT07
+
+    system_code_5 AS (
+        SELECT DISTINCT
+            s.id AS id,
+            '5' AS priority,
+            u.codeable_concept.code AS code,
+            u.codeable_concept.display AS display,
+            u.codeable_concept.system AS code_system
+        FROM
+            condition AS s,
+            UNNEST(s.code.coding) AS u (codeable_concept)
+        WHERE
+            u.codeable_concept.system LIKE 'urn:oid:1.2.840.114350.1.13.71.2.7.4.698084.10375'
+    ), --noqa: LT07
+
+    system_code_6 AS (
+        SELECT DISTINCT
+            s.id AS id,
+            '6' AS priority,
+            u.codeable_concept.code AS code,
+            u.codeable_concept.display AS display,
+            u.codeable_concept.system AS code_system
+        FROM
+            condition AS s,
+            UNNEST(s.code.coding) AS u (codeable_concept)
+        WHERE
+            u.codeable_concept.system LIKE 'http://terminology.hl7.org/CodeSystem/data-absent-reason'
     ), --noqa: LT07
 
     union_table AS (
@@ -75,6 +131,39 @@ CREATE TABLE core__condition_codable_concepts_display AS (
             code,
             display
         FROM system_code_2
+        UNION
+        SELECT
+            id,
+            priority,
+            code_system,
+            code,
+            display
+        FROM system_code_3
+        UNION
+        SELECT
+            id,
+            priority,
+            code_system,
+            code,
+            display
+        FROM system_code_4
+        UNION
+        SELECT
+            id,
+            priority,
+            code_system,
+            code,
+            display
+        FROM system_code_5
+        UNION
+        SELECT
+            id,
+            priority,
+            code_system,
+            code,
+            display
+        FROM system_code_6
+
     ),
 
     partitioned_table AS (
@@ -127,6 +216,7 @@ CREATE TABLE core__condition_codable_concepts_all AS (
             code,
             display
         FROM system_code_0
+
     )
     SELECT
         id,
@@ -154,11 +244,11 @@ WITH temp_condition AS (
         cca.code_system,
         cca.display,
         date(from_iso8601_timestamp(c.recordeddate)) AS recordeddate,
-        date_trunc('week', date(from_iso8601_timestamp(c.recordeddate))) 
+        date_trunc('week', date(from_iso8601_timestamp(c."recordeddate")))
             AS recordeddate_week,
-        date_trunc('month', date(from_iso8601_timestamp(c.recordeddate))) 
+        date_trunc('month', date(from_iso8601_timestamp(c."recordeddate")))
             AS recordeddate_month,
-        date_trunc('year', date(from_iso8601_timestamp(c.recordeddate))) 
+        date_trunc('year', date(from_iso8601_timestamp(c."recordeddate")))
             AS recordeddate_year
     FROM condition AS c
     LEFT JOIN core__condition_codable_concepts_all AS cca ON c.id = cca.id

diff --git a/cumulus_library/studies/core/reference_sql/builder_documentreference.sql b/cumulus_library/studies/core/reference_sql/builder_documentreference.sql
@@ -1,8 +1,8 @@
 -- noqa: disable=all
--- This sql was autogenerated as a reference example using the library CLI.
--- Its format is tied to the specific database it was run against, and it may not
--- be correct for all databases. Use the CLI's build option to derive the best SQL
--- for your dataset.
+-- This sql was autogenerated as a reference example using the library
+-- CLI.Its format is tied to the specific database it was run against,
+-- and it may not be correct for all databases. Use the CLI's build 
+-- option to derive the best SQL for your dataset.
 
 -- ###########################################################
 
@@ -27,6 +27,7 @@ CREATE TABLE core__documentreference_dn_type AS (
             code,
             display
         FROM system_type_0
+
     )
     SELECT
         id,
@@ -51,13 +52,13 @@ WITH temp_documentreference AS (
         dr.context,
         dr.subject.reference AS subject_ref,
         dr.context.period.start AS author_date,
-        date_trunc('day', date(from_iso8601_timestamp(dr.context.period.start))) 
+        date_trunc('day', date(from_iso8601_timestamp(dr."context"."period"."start")))
             AS author_day,
-        date_trunc('week', date(from_iso8601_timestamp(dr.context.period.start))) 
+        date_trunc('week', date(from_iso8601_timestamp(dr."context"."period"."start")))
             AS author_week,
-        date_trunc('month', date(from_iso8601_timestamp(dr.context.period.start))) 
+        date_trunc('month', date(from_iso8601_timestamp(dr."context"."period"."start")))
             AS author_month,
-        date_trunc('year', date(from_iso8601_timestamp(dr.context.period.start))) 
+        date_trunc('year', date(from_iso8601_timestamp(dr."context"."period"."start")))
             AS author_year,
         cdrt.code,
         cdrt.code_system,