smart-on-fhir · dogversioning · Jul 30, 2024 · Jul 30, 2024 · Jul 30, 2024 · mikix
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -0,0 +1,38 @@
+name: CI
+on:
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
+  push:
+    branches:
+      - main
+    paths-ignore:
+      - 'docs/**'
+
+jobs:
+  unittest:
+    name: unit tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python 
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: pip
+
+      - name: Get library from main
+        run: pip install git+https://github.com/smart-on-fhir/cumulus-library.git
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install ".[test]"
+      - name: Create mock AWS credentials
+        run: |
+          mkdir ~/.aws && touch ~/.aws/credentials 
+          echo -e "[test]\naws_access_key_id = test\naws_secret_access_key = test" > ~/.aws/credentials
+      - name: Test with pytest
+        run: |
+          python -m pytest tests
diff --git a/cumulus_library_opioid/vocab/additional_rules_builder.py b/cumulus_library_opioid/vocab/additional_rules_builder.py
@@ -85,7 +85,6 @@ def prepare_queries(
                         'r.rui',
                         'r.rel',
                         'r.rela',
-                        'e.rela',
                         'r.str1',
                         'r.str2',
                         'r.keyword',

diff --git a/cumulus_library_opioid/vocab/rxnorm_vsac_builder.py b/cumulus_library_opioid/vocab/rxnorm_vsac_builder.py
@@ -34,12 +34,11 @@ def get_create_view_filter_by(
         ):
             a_schema = a_schema or 'rxnorm.'
             a_join_col = a_join_col or 'a.rxcui'
-            b_join_col = b_join_col or 'b.rxcui'
-            b_table = b_table or f'opioid__{steward}_vsac',
+            b_join_col = b_join_col or 'b.code'
+            b_table = b_table or f'opioid__{steward}_vsac'
             join_clauses = join_clauses or [f"{a_join_col} = {b_join_col}"]
-            view_name = view_name or (
-                    f'{manifest.get_study_prefix()}__{steward}_{a_table}'
-            )
+            view_name = view_name or f'{manifest.get_study_prefix()}__{steward}_{a_table}'
+
 
             return base_templates.get_create_view_from_tables(
                 view_name=view_name,
@@ -111,4 +110,3 @@ def get_create_view_filter_by(
                     b_join_col='b.rxcui1',
                 )
             )
-
diff --git a/cumulus_library_opioid/vocab/static_builder.py b/cumulus_library_opioid/vocab/static_builder.py
@@ -28,56 +28,57 @@ class StaticBuilder(base_table_builder.BaseTableBuilder):
     display_text = "Building static data tables..."
     base_path = pathlib.Path(__file__).resolve().parent
 
-    tables = [  # noqa: RUF012
-        TableConfig(
-            file_path=base_path / "./common/keywords/keywords.tsv",
-            delimiter="\t",
-            table_name="keywords",
-            headers=["STR"],
-            dtypes={"STR": "str"},
-            parquet_types=["STRING"],
-            filtered_path=base_path / "./common/keywords/keywords.filtered.tsv",
-        ),
-        TableConfig(
-            file_path=base_path / "./all_rxcui_str.RXNCONSO_curated.tsv",
-            delimiter="\t",
-            table_name="all_rxnconso_keywords",
-            headers=["RXCUI","STR","TTY","SAB","CODE","keyword","keyword_len"],
-            dtypes={"RXCUI":"str","STR":"str","TTY":"str","SAB":"str","CODE":"str","keyword":"str","keyword_len":"str"},
-            parquet_types=["STRING","STRING","STRING","STRING","STRING","STRING","STRING"],
-        ),
-        TableConfig(
-            file_path=base_path / "./common/expand_rules/expand_rules.tsv",
-            delimiter="\t",
-            table_name="search_rules",
-            headers=[
-                "TTY1",
-                "RELA",
-                "TTY2",
-                "rule",
-            ],
-            dtypes={"TTY1": "str", "RELA": "str", "TTY2": "str", "rule": "str"},
-            parquet_types=["STRING", "STRING", "STRING", "STRING", "BOOLEAN"],
-            ignore_header=True,
-            map_cols=[
-                {
-                    "from": "rule",
-                    "to": "include",
-                    "map_dict": {"yes": True, "no": False},
-                }
-            ],
-        ),
-        # TODO: We should eventually replace this with a source derived from
-        # UMLS directly at some point
-        TableConfig(
-            file_path=base_path / "./common/umls/umls_tty.tsv",
-            delimiter="\t",
-            table_name="umls_tty",
-            headers=["TTY","TTY_STR"],
-            dtypes={"TTY": "str","TTY_STR": "str",},
-            parquet_types=["STRING", "STRING"],
-        ),
-    ]
+    def get_table_configs(self):
+        return [
+            TableConfig(
+                file_path=self.base_path / "./common/keywords/keywords.tsv",
+                delimiter="\t",
+                table_name="keywords",
+                headers=["STR"],
+                dtypes={"STR": "str"},
+                parquet_types=["STRING"],
+                filtered_path=self.base_path / "./common/keywords/keywords.filtered.tsv",
+            ),
+            TableConfig(
+                file_path=self.base_path / "./all_rxcui_str.RXNCONSO_curated.tsv",
+                delimiter="\t",
+                table_name="all_rxnconso_keywords",
+                headers=["RXCUI","STR","TTY","SAB","CODE","keyword","keyword_len"],
+                dtypes={"RXCUI":"str","STR":"str","TTY":"str","SAB":"str","CODE":"str","keyword":"str","keyword_len":"str"},
+                parquet_types=["STRING","STRING","STRING","STRING","STRING","STRING","STRING"],
+            ),
+            TableConfig(
+                file_path=self.base_path / "./common/expand_rules/expand_rules.tsv",
+                delimiter="\t",
+                table_name="search_rules",
+                headers=[
+                    "TTY1",
+                    "RELA",
+                    "TTY2",
+                    "rule",
+                ],
+                dtypes={"TTY1": "str", "RELA": "str", "TTY2": "str", "rule": "str"},
+                parquet_types=["STRING", "STRING", "STRING", "STRING", "BOOLEAN"],
+                ignore_header=True,
+                map_cols=[
+                    {
+                        "from": "rule",
+                        "to": "include",
+                        "map_dict": {"yes": True, "no": False},
+                    }
+                ],
+            ),
+            # TODO: We should eventually replace this with a source derived from
+            # UMLS directly at some point
+            TableConfig(
+                file_path=self.base_path / "./common/umls/umls_tty.tsv",
+                delimiter="\t",
+                table_name="umls_tty",
+                headers=["TTY","TTY_STR"],
+                dtypes={"TTY": "str","TTY_STR": "str",},
+                parquet_types=["STRING", "STRING"],
+            ),
+        ]
 
     def filter_duplicated_meds(
         self, path: pathlib.Path, delimiter: str, filtered_path: pathlib.Path
@@ -127,6 +128,7 @@ def prepare_queries(
         **kwargs,
     ):
         # fetch and add vsac tables
+        self.tables = self.get_table_configs()
         vsac_stewards = vsac.get_vsac_stewards(config)
         for steward in vsac_stewards:
             vsac.download_oid_data(steward, config=config, path=self.base_path /'data')
@@ -195,4 +197,3 @@ def prepare_queries(
                     )
                 )
                 progress.advance(task)
-
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ version = "1.0"
 requires-python = ">= 3.10"
 # If you need python libraries, add them here
 dependencies = [
-    "cumulus-library >= 2.3.0",
+    "cumulus-library >= 3.0.0",
     "sqlfluff >=3",
     "xlrd",
     "openpyxl",

diff --git a/tests/test_additional_rules_builder.py b/tests/test_additional_rules_builder.py
@@ -36,7 +36,7 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm):
         {
             'name':'opioid__acep_potential_rules', 
             'columns':10,
-            'count':2880,
+            'count':1440,
             'first':(
                 1819, '1151359', 'BN', 'SCDG', 18636093, 'RO', 'has_ingredient', 
                 'Buprenorphine', 'buprenorphine / naloxone Oral Product', 
@@ -50,20 +50,20 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm):
         {
             'name':'opioid__acep_included_rels', 
             'columns':10,
-            'count':28,
+            'count':14,
             'first':(
                 1819, '1431077', 'BN', 'BN', 43028489, 'RN', 'reformulated_to', 
-                'reformulated_to', 'Buprenorphine', 'Zubsolv', 'zubsolv'
+                'Buprenorphine', 'Zubsolv', 'zubsolv'
             ),
             'last': (
                 1819, '904871', 'BN', 'BN', 3764389, 'RN', 'reformulated_to', 
-                'reformulated_to', 'Buprenorphine', 'Butrans', 'butrans'
+                'Buprenorphine', 'Butrans', 'butrans'
             ),
         },
         {
             'name':'opioid__acep_included_keywords', 
             'columns':10,
-            'count':2808,
+            'count':1404,
             'first':(
                 1819, '1151359', 'BN', 'SCDG', 18636093, 'RO', 'has_ingredient', 
                 'Buprenorphine', 'buprenorphine / naloxone Oral Product', 

diff --git a/tests/test_rxnorm_vsac_builder.py b/tests/test_rxnorm_vsac_builder.py
@@ -13,7 +13,7 @@
     clear=True,
 )
 @mock.patch("cumulus_library.apis.umls.UmlsApi")
-def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm,  tmp_path):
+def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm):
     with open(pathlib.Path(__file__).parent / "test_data/vsac_resp.json") as f:
         resp = json.load(f)
     mock_api.return_value.get_vsac_valuesets.return_value = resp
@@ -27,10 +27,10 @@ def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm,  tmp_path):
     builder = rxnorm_vsac_builder.RxNormVsacBuilder()
     builder.execute_queries(config=mock_db_config_rxnorm, manifest=manifest)
     res = cursor.execute('select * from opioid__acep_rela').fetchall()
-    assert len(res) == 1800
+    assert len(res) == 900
     assert res[0] == (
         1819, 'Product containing buprenorphine (medicinal product)', 'FN', 
-        'SNOMEDCT_US', 1818, 'RN', 'tradename_of', 4716626
+        'SNOMEDCT_US', 1818, 'RN', 'reformulated_to', 4716626
     )
     assert res[-1] == (
         1819, 'Buprenorphine', 'IN', 'GS', 1655031, 'RO', 'has_ingredient', 86130850

diff --git a/tests/test_static_builder.py b/tests/test_static_builder.py
@@ -62,7 +62,7 @@ def test_static_tables(
     shutil.copy(test_path / "filtered.csv", tmp_path / "filtered.csv")
     builder = static_builder.StaticBuilder()
     filtered = tmp_path / filtered if filtered else None
-    builder.tables = [
+    builder.get_table_configs = lambda: [
         static_builder.TableConfig(
             file_path=tmp_path / "static_table.csv",
             delimiter=",",