Skip to content

Commit

Permalink
feat: add c_attachment_count and c_content_type_use
Browse files Browse the repository at this point in the history
These measure attachment & content type usage.
  • Loading branch information
mikix committed Nov 27, 2024
1 parent aacc0ea commit bf26f02
Show file tree
Hide file tree
Showing 22 changed files with 648 additions and 3 deletions.
2 changes: 1 addition & 1 deletion cumulus_library_data_metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Data Metrics study for Cumulus Library"""

__version__ = "6.0.0"
__version__ = "6.1.0"
187 changes: 187 additions & 0 deletions cumulus_library_data_metrics/attachment_utils.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
{% import 'utils.jinja' as utils %}


{% macro _extract_attachment_info(src, unnest_field, access_field, attachment_schema, extra_field_names, extra_fields) -%}
{% set has_data_ext = attachment_schema["_data"]["extension"]["url"] and attachment_schema["_data"]["extension"]["valueCode"] %}
{% set has_url_ext = attachment_schema["_url"]["extension"]["url"] and attachment_schema["_url"]["extension"]["valueCode"] %}
(
WITH
tmp_rows AS (
SELECT
id,
ROW_NUMBER() OVER (PARTITION BY id) AS row,

{% for extra_field in extra_fields %}
{{ extra_field }},
{% endfor %}

{{ utils.nullable(
attachment_schema["contentType"],
"u." + access_field + ".contentType",
"content_type",
) }},
{{ utils.nullable(
attachment_schema["language"],
"u." + access_field + ".language",
"language",
) }},
{{ utils.nullable(
attachment_schema["data"],
"u." + access_field + ".data",
"data",
) }},
{{ utils.nullable(
has_data_ext,
"u." + access_field + "._data",
"_data",
) }},
{{ utils.nullable(
attachment_schema["url"],
"u." + access_field + ".url",
"url",
) }},
{{ utils.nullable(
has_url_ext,
"u." + access_field + "._url",
"_url",
) }}

FROM {{ src }},
UNNEST({{ unnest_field }}) AS u ({{ unnest_field }})
),

data_absent_reasons AS (
{% if has_data_ext %}
SELECT
id,
row,
BOOL_OR(
u.extension.url = 'http://hl7.org/fhir/StructureDefinition/data-absent-reason'
AND u.extension.valueCode = 'masked'
) AS was_masked
FROM tmp_rows,
UNNEST(_data.extension) AS u (extension)
GROUP BY id, row

{% else %}
SELECT id, row, FALSE AS was_masked FROM tmp_rows WHERE 1=0 -- return an empty table
{% endif %}
),

url_absent_reasons AS (
{% if has_url_ext %}
SELECT
id,
row,
BOOL_OR(
u.extension.url = 'http://hl7.org/fhir/StructureDefinition/data-absent-reason'
AND u.extension.valueCode = 'masked'
) AS was_masked
FROM tmp_rows,
UNNEST(_url.extension) AS u (extension)
GROUP BY id, row

{% else %}
SELECT id, row, FALSE AS was_masked FROM tmp_rows WHERE 1=0 -- return an empty table
{% endif %}
)

SELECT
id,
row,
language,

CASE WHEN content_type IS NULL THEN NULL
ELSE lower(split_part(content_type, ';', 1)) -- chop off any encoding arg
END as content_type,

{% for name in extra_field_names %}
{{ name }},
{% endfor %}

(
{{ utils.is_string_valid('data') }}
OR (data_absent_reasons.was_masked IS NOT NULL AND data_absent_reasons.was_masked)
) AS has_data,

(
{{ utils.is_string_valid('url') }}
OR (url_absent_reasons.was_masked IS NOT NULL AND url_absent_reasons.was_masked)
) AS has_url

FROM tmp_rows
LEFT JOIN url_absent_reasons USING(id, row)
LEFT JOIN data_absent_reasons USING(id, row)
)
{% endmacro %}


{% macro _extract_diagnostic_report_attachments(schema) -%}
(
WITH
att_info AS {{ _extract_attachment_info(
"diagnosticreport", "presentedForm", "presentedForm", schema["presentedForm"], [], []
) }}

SELECT
id,
att.row,
dr.status,
CAST(NULL AS VARCHAR) AS doc_status,
att.content_type,
att.language,
CAST(NULL AS VARCHAR) AS format_code,
CAST(NULL AS VARCHAR) AS format_system,
att.has_data,
att.has_url

FROM att_info AS att
JOIN diagnosticreport AS dr USING(id)
)
{% endmacro %}


{% macro _extract_document_reference_attachments(schema) -%}
(
WITH
att_info AS {{ _extract_attachment_info(
"documentreference", "content", "content.attachment", schema["content"]["attachment"],
["format_system", "format_code"],
[
utils.nullable(
schema["content"]["format"]["system"],
"u.content.format.system",
"format_system",
),
utils.nullable(
schema["content"]["format"]["code"],
"u.content.format.code",
"format_code",
),
]
) }}

SELECT
id,
att.row,
dr.status,
dr.docStatus AS doc_status,
att.content_type,
att.language,
att.format_code,
att.format_system,
att.has_data,
att.has_url

FROM att_info AS att
JOIN documentreference AS dr USING(id)
)
{% endmacro %}

{% macro extract_attachments(src, schema) -%}
{% if src == "DiagnosticReport" %}
{{ _extract_diagnostic_report_attachments(schema) }}
{% elif src == "DocumentReference" %}
{{ _extract_document_reference_attachments(schema) }}
{% endif %}
{% endmacro %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
{% import 'utils.jinja' as utils %}
{% import 'attachment_utils.jinja' as attachment_utils %}

CREATE TABLE {{ study_prefix }}__count_c_attachment_count_{{ src|lower }} AS (
WITH

attachments AS {{ attachment_utils.extract_attachments(src, schema) }},

formats AS (
SELECT
id,
row,

-- https://www.hl7.org/fhir/R4/valueset-formatcodes.html
CASE
WHEN format_code = 'urn:ihe:pcc:xphr:2007'
THEN 'Personal Health Records. Also known as HL7 CCD and HITSP C32'
WHEN format_code = 'urn:ihe:pcc:aps:2007'
THEN 'IHE Antepartum Summary'
WHEN format_code = 'urn:ihe:pcc:xds-ms:2007'
THEN 'XDS Medical Summaries'
WHEN format_code = 'urn:ihe:pcc:edr:2007'
THEN 'Emergency Department Referral (EDR)'
WHEN format_code = 'urn:ihe:pcc:edes:2007'
THEN 'Emergency Department Encounter Summary (EDES)'
WHEN format_code = 'urn:ihe:pcc:apr:handp:2008'
THEN 'Antepartum Record (APR) - History and Physical'
WHEN format_code = 'urn:ihe:pcc:apr:lab:2008'
THEN 'Antepartum Record (APR) - Laboratory'
WHEN format_code = 'urn:ihe:pcc:apr:edu:2008'
THEN 'Antepartum Record (APR) - Education'
WHEN format_code = 'urn:ihe:pcc:crc:2008'
THEN 'Cancer Registry Content (CRC)'
WHEN format_code = 'urn:ihe:pcc:cm:2008'
THEN 'Care Management (CM)'
WHEN format_code = 'urn:ihe:pcc:ic:2008'
THEN 'Immunization Content (IC)'
WHEN format_code = 'urn:ihe:pcc:tn:2007'
THEN 'PCC TN'
WHEN format_code = 'urn:ihe:pcc:nn:2007'
THEN 'PCC NN'
WHEN format_code = 'urn:ihe:pcc:ctn:2007'
THEN 'PCC CTN'
WHEN format_code = 'urn:ihe:pcc:edpn:2007'
THEN 'PCC EDPN'
WHEN format_code = 'urn:ihe:pcc:hp:2008'
THEN 'PCC HP'
WHEN format_code = 'urn:ihe:pcc:ldhp:2009'
THEN 'PCC LDHP'
WHEN format_code = 'urn:ihe:pcc:lds:2009'
THEN 'PCC LDS'
WHEN format_code = 'urn:ihe:pcc:mds:2009'
THEN 'PCC MDS'
WHEN format_code = 'urn:ihe:pcc:nds:2010'
THEN 'PCC NDS'
WHEN format_code = 'urn:ihe:pcc:ppvs:2010'
THEN 'PCC PPVS'
WHEN format_code = 'urn:ihe:pcc:trs:2011'
THEN 'PCC TRS'
WHEN format_code = 'urn:ihe:pcc:ets:2011'
THEN 'PCC ETS'
WHEN format_code = 'urn:ihe:pcc:its:2011'
THEN 'PCC ITS'
WHEN format_code = 'urn:ihe:pcc:ript:2017'
THEN 'Routine Interfacility Patient Transport (RIPT)'
WHEN format_code = 'urn:ihe:iti:bppc:2007'
THEN 'Basic Patient Privacy Consents'
WHEN format_code = 'urn:ihe:iti:bppc-sd:2007'
THEN 'Basic Patient Privacy Consents with Scanned Document'
WHEN format_code = 'urn:ihe:iti:xds-sd:pdf:2008'
THEN 'PDF embedded in CDA per XDS-SD profile'
WHEN format_code = 'urn:ihe:iti:xds-sd:text:2008'
THEN 'Text embedded in CDA per XDS-SD profile'
WHEN format_code = 'urn:ihe:iti:xdw:2011:workflowDoc'
THEN 'XDW Workflow Document'
WHEN format_code = 'urn:ihe:iti:dsg:detached:2014'
THEN 'DSG Detached Document'
WHEN format_code = 'urn:ihe:iti:dsg:enveloping:2014'
THEN 'DSG Enveloping Document'
WHEN format_code = 'urn:ihe:iti:appc:2016:consent'
THEN 'Advanced Patient Privacy Consents'
WHEN format_code = 'urn:ihe:iti:xds:2017:mimeTypeSufficient'
THEN 'mimeType Sufficient'
WHEN format_code = 'urn:ihe:lab:xd-lab:2008'
THEN 'CDA Laboratory Report'
WHEN format_code = 'urn:ihe:rad:TEXT'
THEN 'Radiology XDS-I Text'
WHEN format_code = 'urn:ihe:rad:PDF'
THEN 'Radiology XDS-I PDF'
WHEN format_code = 'urn:ihe:rad:CDA:ImagingReportStructuredHeadings:2013'
THEN 'Radiology XDS-I Structured CDA'
WHEN format_code = 'urn:ihe:card:imaging:2011'
THEN 'Cardiac Imaging Report'
WHEN format_code = 'urn:ihe:card:CRC:2012'
THEN 'Cardiology CRC'
WHEN format_code = 'urn:ihe:card:EPRC-IE:2014'
THEN 'Cardiology EPRC-IE'
WHEN format_code = 'urn:ihe:dent:TEXT'
THEN 'Dental Text'
WHEN format_code = 'urn:ihe:dent:PDF'
THEN 'Dental PDF'
WHEN format_code = 'urn:ihe:dent:CDA:ImagingReportStructuredHeadings:2013'
THEN 'Dental CDA'
WHEN format_code = 'urn:ihe:pat:apsr:all:2010'
THEN 'Anatomic Pathology Structured Report All'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:all:2010'
THEN 'Anatomic Pathology Structured Report Cancer All'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:breast:2010'
THEN 'Anatomic Pathology Structured Report Cancer Breast'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:colon:2010'
THEN 'Anatomic Pathology Structured Report Cancer Colon'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:prostate:2010'
THEN 'Anatomic Pathology Structured Report Cancer Prostate'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:thyroid:2010'
THEN 'Anatomic Pathology Structured Report Cancer Thyroid'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:lung:2010'
THEN 'Anatomic Pathology Structured Report Cancer Lung'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:skin:2010'
THEN 'Anatomic Pathology Structured Report Cancer Skin'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:kidney:2010'
THEN 'Anatomic Pathology Structured Report Cancer Kidney'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:cervix:2010'
THEN 'Anatomic Pathology Structured Report Cancer Cervix'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:endometrium:2010'
THEN 'Anatomic Pathology Structured Report Cancer Endometrium'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:ovary:2010'
THEN 'Anatomic Pathology Structured Report Cancer Ovary'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:esophagus:2010'
THEN 'Anatomic Pathology Structured Report Cancer Esophagus'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:stomach:2010'
THEN 'Anatomic Pathology Structured Report Cancer Stomach'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:liver:2010'
THEN 'Anatomic Pathology Structured Report Cancer Liver'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:pancreas:2010'
THEN 'Anatomic Pathology Structured Report Cancer Pancreas'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:testis:2010'
THEN 'Anatomic Pathology Structured Report Cancer Testis'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:urinary_bladder:2010'
THEN 'Anatomic Pathology Structured Report Cancer Urinary Bladder'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:lip_oral_cavity:2010'
THEN 'Anatomic Pathology Structured Report Cancer Lip Oral Cavity'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:pharynx:2010'
THEN 'Anatomic Pathology Structured Report Cancer Pharynx'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:salivary_gland:2010'
THEN 'Anatomic Pathology Structured Report Cancer Salivary Gland'
WHEN format_code = 'urn:ihe:pat:apsr:cancer:larynx:2010'
THEN 'Anatomic Pathology Structured Report Cancer Larynx'
WHEN format_code = 'urn:ihe:pharm:pre:2010'
THEN 'Pharmacy Pre'
WHEN format_code = 'urn:ihe:pharm:padv:2010'
THEN 'Pharmacy PADV'
WHEN format_code = 'urn:ihe:pharm:dis:2010'
THEN 'Pharmacy DIS'
WHEN format_code = 'urn:ihe:pharm:pml:2013'
THEN 'Pharmacy PML'
WHEN format_code = 'urn:hl7-org:sdwg:ccda-structuredBody:1.1'
THEN 'For documents following C-CDA 1.1 constraints using a structured body.'
WHEN format_code = 'urn:hl7-org:sdwg:ccda-nonXMLBody:1.1'
THEN 'For documents following C-CDA 1.1 constraints using a non structured body.'
WHEN format_code = 'urn:hl7-org:sdwg:ccda-structuredBody:2.1'
THEN 'For documents following C-CDA 2.1 constraints using a structured body.'
WHEN format_code = 'urn:hl7-org:sdwg:ccda-nonXMLBody:2.1'
THEN 'For documents following C-CDA 2.1 constraints using a non structured body.'
ELSE NULL -- this system binding only holds the above values, ignore unexpected codes
END AS display

FROM attachments
-- This system binding is marked as extensible, but for the sake of reliably providing
-- consinstent & readable display names, we'll only support the preferred system for now.
WHERE format_system = 'http://ihe.net/fhir/ValueSet/IHE.FormatCode.codesystem'
),

simplified AS (
SELECT
id,
CAST(row AS VARCHAR) AS row,
{{ utils.coalesce_missing('status') }} AS status,
{{ utils.coalesce_missing('content_type') }} AS content_type,
{{ utils.coalesce_missing('language') }} AS language,
{{ utils.coalesce_missing('formats.display') }} AS format,
CASE
WHEN has_data and has_url
THEN 'Data and URL'
WHEN has_data
THEN 'Data'
WHEN has_url
THEN 'URL'
ELSE 'Not available'
END AS content

FROM attachments
LEFT JOIN formats USING(id, row)
)

{% call utils.make_counts('simplified', output_mode, unique_ids=['id', 'row']) %}
status,
content_type,
language,
format,
content
{% endcall %}
);
Loading

0 comments on commit bf26f02

Please sign in to comment.