From fcdc61f1be2432b56d5bcb4750bee6e78362f3e6 Mon Sep 17 00:00:00 2001 From: Matt Garber Date: Tue, 30 Jul 2024 11:03:13 -0400 Subject: [PATCH 1/2] Updated tests, add CI hook --- .github/workflows/ci.yaml | 42 +++++++ .../vocab/additional_rules_builder.py | 1 - .../vocab/rxnorm_vsac_builder.py | 10 +- .../vocab/static_builder.py | 103 +++++++++--------- pyproject.toml | 2 +- tests/test_additional_rules_builder.py | 14 ++- tests/test_rxnorm_vsac_builder.py | 6 +- tests/test_static_builder.py | 2 +- 8 files changed, 112 insertions(+), 68 deletions(-) create mode 100644 .github/workflows/ci.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..ac3669a --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,42 @@ +name: CI +on: + pull_request: + paths-ignore: + - 'docs/**' + push: + branches: + - main + paths-ignore: + - 'docs/**' + +jobs: + unittest: + name: unit tests + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: pip + + - name: Get library from main + run: | + git clone https://github.com/smart-on-fhir/cumulus-library.git + cd cumulus-library + pip install -e . + cd .. + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ".[test]" + - name: Create mock AWS credentials + run: | + mkdir ~/.aws && touch ~/.aws/credentials + echo -e "[test]\naws_access_key_id = test\naws_secret_access_key = test" > ~/.aws/credentials + - name: Test with pytest + run: | + python -m pytest tests \ No newline at end of file diff --git a/cumulus_library_opioid/vocab/additional_rules_builder.py b/cumulus_library_opioid/vocab/additional_rules_builder.py index d8e368e..18d5e44 100644 --- a/cumulus_library_opioid/vocab/additional_rules_builder.py +++ b/cumulus_library_opioid/vocab/additional_rules_builder.py @@ -85,7 +85,6 @@ def prepare_queries( 'r.rui', 'r.rel', 'r.rela', - 'e.rela', 'r.str1', 'r.str2', 'r.keyword', diff --git a/cumulus_library_opioid/vocab/rxnorm_vsac_builder.py b/cumulus_library_opioid/vocab/rxnorm_vsac_builder.py index d5d0980..21bcf6f 100644 --- a/cumulus_library_opioid/vocab/rxnorm_vsac_builder.py +++ b/cumulus_library_opioid/vocab/rxnorm_vsac_builder.py @@ -34,12 +34,11 @@ def get_create_view_filter_by( ): a_schema = a_schema or 'rxnorm.' a_join_col = a_join_col or 'a.rxcui' - b_join_col = b_join_col or 'b.rxcui' - b_table = b_table or f'opioid__{steward}_vsac', + b_join_col = b_join_col or 'b.code' + b_table = b_table or f'opioid__{steward}_vsac' join_clauses = join_clauses or [f"{a_join_col} = {b_join_col}"] - view_name = view_name or ( - f'{manifest.get_study_prefix()}__{steward}_{a_table}' - ) + view_name = view_name or f'{manifest.get_study_prefix()}__{steward}_{a_table}' + return base_templates.get_create_view_from_tables( view_name=view_name, @@ -111,4 +110,3 @@ def get_create_view_filter_by( b_join_col='b.rxcui1', ) ) - diff --git a/cumulus_library_opioid/vocab/static_builder.py b/cumulus_library_opioid/vocab/static_builder.py index 924e18c..9848141 100644 --- a/cumulus_library_opioid/vocab/static_builder.py +++ b/cumulus_library_opioid/vocab/static_builder.py @@ -28,56 +28,57 @@ class StaticBuilder(base_table_builder.BaseTableBuilder): display_text = "Building static data tables..." base_path = pathlib.Path(__file__).resolve().parent - tables = [ # noqa: RUF012 - TableConfig( - file_path=base_path / "./common/keywords/keywords.tsv", - delimiter="\t", - table_name="keywords", - headers=["STR"], - dtypes={"STR": "str"}, - parquet_types=["STRING"], - filtered_path=base_path / "./common/keywords/keywords.filtered.tsv", - ), - TableConfig( - file_path=base_path / "./all_rxcui_str.RXNCONSO_curated.tsv", - delimiter="\t", - table_name="all_rxnconso_keywords", - headers=["RXCUI","STR","TTY","SAB","CODE","keyword","keyword_len"], - dtypes={"RXCUI":"str","STR":"str","TTY":"str","SAB":"str","CODE":"str","keyword":"str","keyword_len":"str"}, - parquet_types=["STRING","STRING","STRING","STRING","STRING","STRING","STRING"], - ), - TableConfig( - file_path=base_path / "./common/expand_rules/expand_rules.tsv", - delimiter="\t", - table_name="search_rules", - headers=[ - "TTY1", - "RELA", - "TTY2", - "rule", - ], - dtypes={"TTY1": "str", "RELA": "str", "TTY2": "str", "rule": "str"}, - parquet_types=["STRING", "STRING", "STRING", "STRING", "BOOLEAN"], - ignore_header=True, - map_cols=[ - { - "from": "rule", - "to": "include", - "map_dict": {"yes": True, "no": False}, - } - ], - ), - # TODO: We should eventually replace this with a source derived from - # UMLS directly at some point - TableConfig( - file_path=base_path / "./common/umls/umls_tty.tsv", - delimiter="\t", - table_name="umls_tty", - headers=["TTY","TTY_STR"], - dtypes={"TTY": "str","TTY_STR": "str",}, - parquet_types=["STRING", "STRING"], - ), - ] + def get_table_configs(self): + return [ + TableConfig( + file_path=self.base_path / "./common/keywords/keywords.tsv", + delimiter="\t", + table_name="keywords", + headers=["STR"], + dtypes={"STR": "str"}, + parquet_types=["STRING"], + filtered_path=self.base_path / "./common/keywords/keywords.filtered.tsv", + ), + TableConfig( + file_path=self.base_path / "./all_rxcui_str.RXNCONSO_curated.tsv", + delimiter="\t", + table_name="all_rxnconso_keywords", + headers=["RXCUI","STR","TTY","SAB","CODE","keyword","keyword_len"], + dtypes={"RXCUI":"str","STR":"str","TTY":"str","SAB":"str","CODE":"str","keyword":"str","keyword_len":"str"}, + parquet_types=["STRING","STRING","STRING","STRING","STRING","STRING","STRING"], + ), + TableConfig( + file_path=self.base_path / "./common/expand_rules/expand_rules.tsv", + delimiter="\t", + table_name="search_rules", + headers=[ + "TTY1", + "RELA", + "TTY2", + "rule", + ], + dtypes={"TTY1": "str", "RELA": "str", "TTY2": "str", "rule": "str"}, + parquet_types=["STRING", "STRING", "STRING", "STRING", "BOOLEAN"], + ignore_header=True, + map_cols=[ + { + "from": "rule", + "to": "include", + "map_dict": {"yes": True, "no": False}, + } + ], + ), + # TODO: We should eventually replace this with a source derived from + # UMLS directly at some point + TableConfig( + file_path=self.base_path / "./common/umls/umls_tty.tsv", + delimiter="\t", + table_name="umls_tty", + headers=["TTY","TTY_STR"], + dtypes={"TTY": "str","TTY_STR": "str",}, + parquet_types=["STRING", "STRING"], + ), + ] def filter_duplicated_meds( self, path: pathlib.Path, delimiter: str, filtered_path: pathlib.Path @@ -127,6 +128,7 @@ def prepare_queries( **kwargs, ): # fetch and add vsac tables + self.tables = self.get_table_configs() vsac_stewards = vsac.get_vsac_stewards(config) for steward in vsac_stewards: vsac.download_oid_data(steward, config=config, path=self.base_path /'data') @@ -195,4 +197,3 @@ def prepare_queries( ) ) progress.advance(task) - diff --git a/pyproject.toml b/pyproject.toml index 0143a7b..9e5a3ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ version = "1.0" requires-python = ">= 3.10" # If you need python libraries, add them here dependencies = [ - "cumulus-library >= 2.3.0", + "cumulus-library >= 3.0.0", "sqlfluff >=3", "xlrd", "openpyxl", diff --git a/tests/test_additional_rules_builder.py b/tests/test_additional_rules_builder.py index 208c6ca..993215a 100644 --- a/tests/test_additional_rules_builder.py +++ b/tests/test_additional_rules_builder.py @@ -36,7 +36,7 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm): { 'name':'opioid__acep_potential_rules', 'columns':10, - 'count':2880, + 'count':1440, 'first':( 1819, '1151359', 'BN', 'SCDG', 18636093, 'RO', 'has_ingredient', 'Buprenorphine', 'buprenorphine / naloxone Oral Product', @@ -50,20 +50,20 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm): { 'name':'opioid__acep_included_rels', 'columns':10, - 'count':28, + 'count':14, 'first':( 1819, '1431077', 'BN', 'BN', 43028489, 'RN', 'reformulated_to', - 'reformulated_to', 'Buprenorphine', 'Zubsolv', 'zubsolv' + 'Buprenorphine', 'Zubsolv', 'zubsolv' ), 'last': ( 1819, '904871', 'BN', 'BN', 3764389, 'RN', 'reformulated_to', - 'reformulated_to', 'Buprenorphine', 'Butrans', 'butrans' + 'Buprenorphine', 'Butrans', 'butrans' ), }, { 'name':'opioid__acep_included_keywords', 'columns':10, - 'count':2808, + 'count':1404, 'first':( 1819, '1151359', 'BN', 'SCDG', 18636093, 'RO', 'has_ingredient', 'Buprenorphine', 'buprenorphine / naloxone Oral Product', @@ -95,6 +95,10 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm): f"Select * from {table_conf['name']} order by " f"{','.join([str(x+1) for x in range(table_conf['columns'])])}" ).fetchall() + print(table_conf['name']) + print(res[0]) + print(res[-1]) + print(len(res)) assert len(res) == table_conf['count'] assert res[0] == table_conf['first'] if table_conf['count'] > 1: diff --git a/tests/test_rxnorm_vsac_builder.py b/tests/test_rxnorm_vsac_builder.py index c321a0b..2072467 100644 --- a/tests/test_rxnorm_vsac_builder.py +++ b/tests/test_rxnorm_vsac_builder.py @@ -13,7 +13,7 @@ clear=True, ) @mock.patch("cumulus_library.apis.umls.UmlsApi") -def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm, tmp_path): +def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm): with open(pathlib.Path(__file__).parent / "test_data/vsac_resp.json") as f: resp = json.load(f) mock_api.return_value.get_vsac_valuesets.return_value = resp @@ -27,10 +27,10 @@ def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm, tmp_path): builder = rxnorm_vsac_builder.RxNormVsacBuilder() builder.execute_queries(config=mock_db_config_rxnorm, manifest=manifest) res = cursor.execute('select * from opioid__acep_rela').fetchall() - assert len(res) == 1800 + assert len(res) == 900 assert res[0] == ( 1819, 'Product containing buprenorphine (medicinal product)', 'FN', - 'SNOMEDCT_US', 1818, 'RN', 'tradename_of', 4716626 + 'SNOMEDCT_US', 1818, 'RN', 'reformulated_to', 4716626 ) assert res[-1] == ( 1819, 'Buprenorphine', 'IN', 'GS', 1655031, 'RO', 'has_ingredient', 86130850 diff --git a/tests/test_static_builder.py b/tests/test_static_builder.py index 790c33d..154690d 100644 --- a/tests/test_static_builder.py +++ b/tests/test_static_builder.py @@ -62,7 +62,7 @@ def test_static_tables( shutil.copy(test_path / "filtered.csv", tmp_path / "filtered.csv") builder = static_builder.StaticBuilder() filtered = tmp_path / filtered if filtered else None - builder.tables = [ + builder.get_table_configs = lambda: [ static_builder.TableConfig( file_path=tmp_path / "static_table.csv", delimiter=",", From b0da4f98309f956300bc9a620833a73661969d94 Mon Sep 17 00:00:00 2001 From: Matt Garber Date: Tue, 30 Jul 2024 15:05:00 -0400 Subject: [PATCH 2/2] Cleanup ci pip, trim prints --- .github/workflows/ci.yaml | 8 ++------ tests/test_additional_rules_builder.py | 4 ---- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ac3669a..7df4b14 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -12,7 +12,7 @@ on: jobs: unittest: name: unit tests - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -23,11 +23,7 @@ jobs: cache: pip - name: Get library from main - run: | - git clone https://github.com/smart-on-fhir/cumulus-library.git - cd cumulus-library - pip install -e . - cd .. + run: pip install git+https://github.com/smart-on-fhir/cumulus-library.git - name: Install dependencies run: | diff --git a/tests/test_additional_rules_builder.py b/tests/test_additional_rules_builder.py index 993215a..ea2c158 100644 --- a/tests/test_additional_rules_builder.py +++ b/tests/test_additional_rules_builder.py @@ -95,10 +95,6 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm): f"Select * from {table_conf['name']} order by " f"{','.join([str(x+1) for x in range(table_conf['columns'])])}" ).fetchall() - print(table_conf['name']) - print(res[0]) - print(res[-1]) - print(len(res)) assert len(res) == table_conf['count'] assert res[0] == table_conf['first'] if table_conf['count'] > 1: