Skip to content

Commit

Permalink
Added archival mode (#183)
Browse files Browse the repository at this point in the history
* Added archival mode

* test tweaks

* PR feedback

* moved zip to utils
  • Loading branch information
dogversioning authored Feb 13, 2024
1 parent daf23d9 commit a36f705
Show file tree
Hide file tree
Showing 18 changed files with 543 additions and 165 deletions.
17 changes: 17 additions & 0 deletions cumulus_library/base_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import datetime
import json
import os
import shutil
import zipfile
from contextlib import contextmanager

from rich import progress
Expand Down Expand Up @@ -82,3 +84,18 @@ def get_tablename_safe_iso_timestamp() -> str:
iso_timestamp = get_utc_datetime().isoformat()
safe_timestamp = iso_timestamp.replace(":", "_").replace("-", "_").replace("+", "_")
return safe_timestamp


def zip_dir(read_path, write_path, archive_name):
"""Moves a directory to an archive"""
file_list = [file for file in read_path.glob("**/*") if file.is_file()]
timestamp = get_utc_datetime().isoformat().replace("+00:00", "Z")
with zipfile.ZipFile(
f"{write_path}/{archive_name}_{timestamp}.zip",
"w",
zipfile.ZIP_DEFLATED,
) as f:
for file in file_list:
f.write(file, file.relative_to(read_path))
file.unlink()
shutil.rmtree(read_path)
34 changes: 27 additions & 7 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,20 +193,22 @@ def clean_and_build_all(self, study_dict: dict, stats_build: bool) -> None:
self.clean_and_build_study(study_dict[key], stats_build=stats_build)

### Data exporters
def export_study(self, target: pathlib.Path, data_path: pathlib.Path) -> None:
def export_study(
self, target: pathlib.Path, data_path: pathlib.Path, archive: bool
) -> None:
"""Exports aggregates defined in a manifest
:param target: A path to the study directory
"""
if data_path is None:
sys.exit("Missing destination - please provide a path argument.")
studyparser = study_parser.StudyManifestParser(target, data_path)
studyparser.export_study(self.db, data_path)
studyparser.export_study(self.db, self.schema_name, data_path, archive)

def export_all(self, study_dict: dict, data_path: pathlib.Path):
def export_all(self, study_dict: dict, data_path: pathlib.Path, archive: bool):
"""Exports all defined count tables to disk"""
for key in study_dict.keys():
self.export_study(study_dict[key], data_path)
self.export_study(study_dict[key], data_path, archive)

def generate_study_sql(
self,
Expand Down Expand Up @@ -294,6 +296,7 @@ def get_studies_by_manifest_path(path: pathlib.Path) -> dict:

def run_cli(args: dict):
"""Controls which library tasks are run based on CLI arguments"""
console = rich.console.Console()
if args["action"] == "create":
create_template(args["create_dir"])

Expand All @@ -307,7 +310,7 @@ def run_cli(args: dict):
runner = StudyRunner(db_backend, data_path=args.get("data_path"))
if args.get("verbose"):
runner.verbose = True
print("Testing connection to database...")
console.print("[italic] Connecting to database...")
runner.cursor.execute("SHOW DATABASES")
study_dict = get_study_dict(args["study_dir"])
if "prefix" not in args.keys():
Expand Down Expand Up @@ -344,11 +347,28 @@ def run_cli(args: dict):
)

elif args["action"] == "export":
if args["archive"]:
warning_text = (
"🚨[bold red] This will export all study tables [/bold red]🚨"
"\n\nDepending on your study definition, this data may contain "
"data that would be characterized as a [italic]limited data "
"set[/italic], primarily dates, on a per patient level.\n\n"
"[bold]By doing this, you are assuming the responsibility for "
"meeting your organization's security requirements for "
"storing this data in a secure manager.[/bold]\n\n"
"Type Y to proceed, or any other value to quit.\n"
)
console.print(warning_text)
response = input()
if response.lower() != "y":
sys.exit()
if "all" in args["target"]:
runner.export_all(study_dict, args["data_path"])
runner.export_all(study_dict, args["data_path"], args["archive"])
else:
for target in args["target"]:
runner.export_study(study_dict[target], args["data_path"])
runner.export_study(
study_dict[target], args["data_path"], args["archive"]
)

elif args["action"] == "generate-sql":
for target in args["target"]:
Expand Down
21 changes: 5 additions & 16 deletions cumulus_library/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,21 +127,6 @@ def get_parser() -> argparse.ArgumentParser:
dest="action",
)

# Study creation

create = actions.add_parser(
"create", help="Create a study instance from a template"
)
create.add_argument(
"create_dir",
default="./",
nargs="?",
help=(
"The the directory the study will be created in. Default is "
"the current directory."
),
)

# Database cleaning

clean = actions.add_parser(
Expand Down Expand Up @@ -204,7 +189,11 @@ def get_parser() -> argparse.ArgumentParser:
add_data_path_argument(export)
add_verbose_argument(export)
add_db_config(export)

export.add_argument(
"--archive",
action="store_true",
help="Generates archive of :all: study tables, ignoring manifest export list.",
)
# Aggregator upload

upload = actions.add_parser(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ temp_encounter AS (

SELECT DISTINCT
e.id,
e.class AS enc_class,
ac.code AS enc_class_code,
ac.display AS enc_class_display,
e.status,
Expand Down
110 changes: 100 additions & 10 deletions cumulus_library/studies/core/reference_sql/builder_condition.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
-- noqa: disable=all
-- This sql was autogenerated as a reference example using the library CLI.
-- Its format is tied to the specific database it was run against, and it may not
-- be correct for all databases. Use the CLI's build option to derive the best SQL
-- for your dataset.
-- This sql was autogenerated as a reference example using the library
-- CLI.Its format is tied to the specific database it was run against,
-- and it may not be correct for all databases. Use the CLI's build
-- option to derive the best SQL for your dataset.

-- ###########################################################

Expand All @@ -20,7 +20,7 @@ CREATE TABLE core__condition_codable_concepts_display AS (
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system = 'http://snomed.info/sct'
u.codeable_concept.system LIKE 'http://snomed.info/sct'
), --noqa: LT07

system_code_1 AS (
Expand All @@ -34,7 +34,7 @@ CREATE TABLE core__condition_codable_concepts_display AS (
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-10-cm'
u.codeable_concept.system LIKE 'http://hl7.org/fhir/sid/icd-10-cm'
), --noqa: LT07

system_code_2 AS (
Expand All @@ -48,7 +48,63 @@ CREATE TABLE core__condition_codable_concepts_display AS (
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-9-cm'
u.codeable_concept.system LIKE 'http://hl7.org/fhir/sid/icd-9-cm'
), --noqa: LT07

system_code_3 AS (
SELECT DISTINCT
s.id AS id,
'3' AS priority,
u.codeable_concept.code AS code,
u.codeable_concept.display AS display,
u.codeable_concept.system AS code_system
FROM
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system LIKE 'http://hl7.org/fhir/sid/icd-9-cm/diagnosis'
), --noqa: LT07

system_code_4 AS (
SELECT DISTINCT
s.id AS id,
'4' AS priority,
u.codeable_concept.code AS code,
u.codeable_concept.display AS display,
u.codeable_concept.system AS code_system
FROM
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system LIKE 'urn:oid:1.2.840.114350.1.13.71.2.7.2.728286'
), --noqa: LT07

system_code_5 AS (
SELECT DISTINCT
s.id AS id,
'5' AS priority,
u.codeable_concept.code AS code,
u.codeable_concept.display AS display,
u.codeable_concept.system AS code_system
FROM
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system LIKE 'urn:oid:1.2.840.114350.1.13.71.2.7.4.698084.10375'
), --noqa: LT07

system_code_6 AS (
SELECT DISTINCT
s.id AS id,
'6' AS priority,
u.codeable_concept.code AS code,
u.codeable_concept.display AS display,
u.codeable_concept.system AS code_system
FROM
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system LIKE 'http://terminology.hl7.org/CodeSystem/data-absent-reason'
), --noqa: LT07

union_table AS (
Expand All @@ -75,6 +131,39 @@ CREATE TABLE core__condition_codable_concepts_display AS (
code,
display
FROM system_code_2
UNION
SELECT
id,
priority,
code_system,
code,
display
FROM system_code_3
UNION
SELECT
id,
priority,
code_system,
code,
display
FROM system_code_4
UNION
SELECT
id,
priority,
code_system,
code,
display
FROM system_code_5
UNION
SELECT
id,
priority,
code_system,
code,
display
FROM system_code_6

),

partitioned_table AS (
Expand Down Expand Up @@ -127,6 +216,7 @@ CREATE TABLE core__condition_codable_concepts_all AS (
code,
display
FROM system_code_0

)
SELECT
id,
Expand Down Expand Up @@ -154,11 +244,11 @@ WITH temp_condition AS (
cca.code_system,
cca.display,
date(from_iso8601_timestamp(c.recordeddate)) AS recordeddate,
date_trunc('week', date(from_iso8601_timestamp(c.recordeddate)))
date_trunc('week', date(from_iso8601_timestamp(c."recordeddate")))
AS recordeddate_week,
date_trunc('month', date(from_iso8601_timestamp(c.recordeddate)))
date_trunc('month', date(from_iso8601_timestamp(c."recordeddate")))
AS recordeddate_month,
date_trunc('year', date(from_iso8601_timestamp(c.recordeddate)))
date_trunc('year', date(from_iso8601_timestamp(c."recordeddate")))
AS recordeddate_year
FROM condition AS c
LEFT JOIN core__condition_codable_concepts_all AS cca ON c.id = cca.id
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
-- noqa: disable=all
-- This sql was autogenerated as a reference example using the library CLI.
-- Its format is tied to the specific database it was run against, and it may not
-- be correct for all databases. Use the CLI's build option to derive the best SQL
-- for your dataset.
-- This sql was autogenerated as a reference example using the library
-- CLI.Its format is tied to the specific database it was run against,
-- and it may not be correct for all databases. Use the CLI's build
-- option to derive the best SQL for your dataset.

-- ###########################################################

Expand All @@ -27,6 +27,7 @@ CREATE TABLE core__documentreference_dn_type AS (
code,
display
FROM system_type_0

)
SELECT
id,
Expand All @@ -51,13 +52,13 @@ WITH temp_documentreference AS (
dr.context,
dr.subject.reference AS subject_ref,
dr.context.period.start AS author_date,
date_trunc('day', date(from_iso8601_timestamp(dr.context.period.start)))
date_trunc('day', date(from_iso8601_timestamp(dr."context"."period"."start")))
AS author_day,
date_trunc('week', date(from_iso8601_timestamp(dr.context.period.start)))
date_trunc('week', date(from_iso8601_timestamp(dr."context"."period"."start")))
AS author_week,
date_trunc('month', date(from_iso8601_timestamp(dr.context.period.start)))
date_trunc('month', date(from_iso8601_timestamp(dr."context"."period"."start")))
AS author_month,
date_trunc('year', date(from_iso8601_timestamp(dr.context.period.start)))
date_trunc('year', date(from_iso8601_timestamp(dr."context"."period"."start")))
AS author_year,
cdrt.code,
cdrt.code_system,
Expand Down
Loading

0 comments on commit a36f705

Please sign in to comment.