Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added ICD10 description tiers #12

Merged
merged 4 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[sqlfluff]
templater = jinja
dialect = athena
sql_file_exts = .sql,.sql.jinja
exclude_rules=
# UMLS uses reserved keywords as column names, tsk tsk
references.keywords
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,18 @@ schema by another means.
The following tables are a derived from the primary tables, and are included here as a
convenience to avoid having to compute these on a repeated basis

- **mrrel_is_a** a subset of the relationships in mrrel, including only those that define
- **mrrel_drug_is_a** a subset of the relationships in mrrel, including only those that define
that concept A is a member of concept B (i.e. is a child, or is explicitly marked as
being a tradename/member belonging to the parent concept).
being a tradename/member belonging to the parent concept), for drugs and drug-related topics.
- **mrconso_drugs** a subset of the entity list in mrconso, limited to vocabularies
specifically dealing with drug identifiers (i.e. SNOMED, RxNorm, etc.)
- **mrconso_icd10cm**/**mrrel__icd10cm** are slices of the respective main tables,
only containing records from the ICD10 coding system
- **icd10_(type)** are slices of a given coding system at the relevant level of the
ICD10 hierarchy (category,block,chapter,code)
- **icd10_tree** provides a relation-navigable code hierarchy of the individual levels
in the ICD10 hierarchy


## Licensing details

Expand Down
Binary file removed cumulus_library_umls/.DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion cumulus_library_umls/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.0"
__version__ = "2.0.0"
349 changes: 349 additions & 0 deletions cumulus_library_umls/ancilary_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,349 @@
-- A collection of convenience views for UMLS data

-- Selecting only child relationships FROM mrrel
CREATE TABLE IF NOT EXISTS umls__mrrel_drug_is_a AS
SELECT
cui1,
aui1,
stype1,
rel,
cui2,
aui2,
stype2,
rela,
rui,
srui,
sab,
sl,
rg,
dir,
suppress,
cvf
FROM umls__mrrel
WHERE (
rel = 'CHD'
OR rela IN (
'isa',
'tradename_of',
'has_tradename',
'has_basis_of_strength_substance'
)
)
AND rel NOT IN ('RB', 'PAR');

-- Selecting only drug-related concepts FROM mrconso
CREATE TABLE IF NOT EXISTS umls__mrconso_drugs AS
SELECT
cui,
lat,
ts,
lui,
stt,
sui,
ispref,
aui,
saui,
scui,
sdui,
sab,
tty,
code,
str
FROM umls__mrconso
WHERE sab IN (
'ATC',
'CVX',
'DRUGBANK',
'GS',
'MED-RT',
'MMSL',
'MMX',
'MTHCMSFRF',
'MTHSPL',
'NDDF', 'RXNORM', 'SNOMEDCT_US', 'USP', 'VANDF'
);

-- ICD-10CM convenience views

CREATE TABLE IF NOT EXISTS umls__mrconso_icd10cm AS
SELECT
cui,
lat,
ts,
lui,
stt,
sui,
ispref,
aui,
saui,
scui,
sdui,
sab,
tty,
code,
str,
length(code) AS code_len
FROM umls__mrconso
WHERE sab = 'ICD10CM';

CREATE TABLE IF NOT EXISTS umls__mrrel_icd10cm AS
SELECT
cui1,
aui1,
stype1,
rel,
cui2,
aui2,
stype2,
rela,
rui,
srui,
sab,
sl,
rg,
dir,
suppress,
cvf
FROM umls__mrrel
WHERE sab = 'ICD10CM';

-- The following views slice out individual ICD layers.
-- This lines up with how a human might traverse the nomenclature to find
-- a set of codes related to a specific condition.

CREATE TABLE IF NOT EXISTS umls__icd10_chapter AS
SELECT DISTINCT
r.rui,
r.cui1,
r.cui2,
c.tty,
c.code_len,
c.code,
c.str
FROM umls__mrrel_icd10cm AS r,
umls__mrconso_icd10cm AS c
WHERE
c.tty IN ('HT')
AND c.code LIKE '%-%'
AND r.cui1 = 'C2880081'
AND r.cui2 = c.cui
ORDER BY c.code ASC;

CREATE TABLE IF NOT EXISTS umls__icd10_block AS
SELECT DISTINCT
r.rui,
r.cui1,
r.cui2,
c.tty,
c.code_len,
c.code,
c.str
FROM umls__mrrel_icd10cm AS r,
umls__mrconso_icd10cm AS c,
umls__icd10_chapter AS par
WHERE
c.tty IN ('HT')
AND r.rel = 'CHD'
AND c.code LIKE '%-%'
AND r.cui1 = par.cui2
AND r.cui2 = c.cui
ORDER BY c.code ASC;

CREATE TABLE IF NOT EXISTS umls__icd10_category AS
SELECT DISTINCT
r.rui,
r.cui1,
r.cui2,
c.tty,
c.code_len,
c.code,
c.str
FROM umls__mrrel_icd10cm AS r,
umls__mrconso_icd10cm AS c,
umls__icd10_block AS par
WHERE
c.tty IN ('HT', 'PT')
AND c.code_len = 3
AND r.rel = 'CHD'
AND r.cui1 = par.cui2
AND r.cui2 = c.cui
ORDER BY c.code ASC;

CREATE TABLE IF NOT EXISTS umls__icd10_code AS

WITH code_5 AS (
SELECT DISTINCT
r.rui,
r.cui1,
r.cui2,
c.tty,
c.code_len,
c.code,
c.str
FROM umls__mrrel_icd10cm AS r,
umls__mrconso_icd10cm AS c,
umls__icd10_category AS par
WHERE
c.tty IN ('HT', 'PT')
AND c.code LIKE '%.%'
AND (c.code_len = 5 OR c.code LIKE '%.%X%')
AND r.rel = 'CHD'
AND r.cui1 = par.cui2
AND r.cui2 = c.cui
),

code_6 AS (
SELECT DISTINCT
r.rui,
r.cui1,
r.cui2,
c.tty,
c.code_len,
c.code,
c.str
FROM umls__mrrel_icd10cm AS r,
umls__mrconso_icd10cm AS c,
code_5 AS par
WHERE
c.tty IN ('HT', 'PT')
AND c.code LIKE '%.%'
AND (c.code_len = 6 OR c.code LIKE '%.%X%')
AND r.rel = 'CHD'
AND r.cui1 = par.cui2
AND r.cui2 = c.cui
),

code_7 AS (
SELECT DISTINCT
r.rui,
r.cui1,
r.cui2,
c.tty,
c.code_len,
c.code,
c.str
FROM umls__mrrel_icd10cm AS r,
umls__mrconso_icd10cm AS c,
code_6 AS par
WHERE
c.tty IN ('HT', 'PT')
AND c.code LIKE '%.%'
AND (c.code_len = 7 OR c.code LIKE '%.%X%')
AND c.code LIKE '%.%'
AND r.rel = 'CHD'
AND r.cui1 = par.cui2
AND r.cui2 = c.cui
ORDER BY c.code ASC
),

code_8 AS (
SELECT DISTINCT
r.rui,
r.cui1,
r.cui2,
c.tty,
c.code_len,
c.code,
c.str
FROM umls__mrrel_icd10cm AS r,
umls__mrconso_icd10cm AS c,
code_7 AS par
WHERE
c.tty IN ('HT', 'PT')
AND c.code LIKE '%.%'
AND (c.code_len = 8 OR c.code LIKE '%.%X%')
AND c.code LIKE '%.%'
AND r.rel = 'CHD'
AND r.cui1 = par.cui2
AND r.cui2 = c.cui
ORDER BY c.code ASC
)

SELECT
rui,
cui1,
cui2,
tty,
code_len,
code,
str,
5 AS depth
FROM code_5
UNION ALL
SELECT
rui,
cui1,
cui2,
tty,
code_len,
code,
str,
6 AS depth
FROM code_6
UNION ALL
SELECT
rui,
cui1,
cui2,
tty,
code_len,
code,
str,
7 AS depth
FROM code_7
UNION ALL
SELECT
rui,
cui1,
cui2,
tty,
code_len,
code,
str,
8 AS depth
FROM code_8;

CREATE OR REPLACE VIEW umls__icd10_tree AS
SELECT
rui,
cui1,
cui2,
tty,
code_len,
code,
str,
2 AS depth
FROM umls__icd10_chapter
UNION ALL
SELECT
rui,
cui1,
cui2,
tty,
code_len,
code,
str,
3 AS depth
FROM umls__icd10_block
UNION ALL
SELECT
rui,
cui1,
cui2,
tty,
code_len,
code,
str,
4 AS depth
FROM umls__icd10_category
UNION ALL
SELECT
rui,
cui1,
cui2,
tty,
code_len,
code,
str,
depth
FROM umls__icd10_code
3 changes: 2 additions & 1 deletion cumulus_library_umls/manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ study_prefix = "umls"
[table_builder_config]
file_names = [
"umls_builder.py",
"static_builder.py"
"static_builder.py",
"ancilary_tables.sql",
]

[advanced_options]
Expand Down
Loading
Loading