diff --git a/DESCRIPTION b/DESCRIPTION index f3e5f67..55475b2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,8 @@ Imports: SqlRender, stringr, stringdist, - tidyr + tidyr, + tidyselect Suggests: readr, remotes, diff --git a/NAMESPACE b/NAMESPACE index 9cffce1..b0614e4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,8 +9,10 @@ export(getConceptAncestor) export(getConceptDescendant) export(getConceptIdDetails) export(getConceptPrevalenceCounts) +export(getConceptRecordCount) export(getConceptRelationship) export(getConceptSynonym) +export(getCountOfSourceCodesMappedToStandardConcept) export(getDomain) export(getDrugIngredients) export(getExcludedConceptsInConceptSetExpression) diff --git a/R/FindOrphanConcepts.R b/R/FindOrphanConcepts.R index 8542cb1..ef4e65b 100644 --- a/R/FindOrphanConcepts.R +++ b/R/FindOrphanConcepts.R @@ -61,7 +61,7 @@ findOrphanConcepts <- function(connectionDetails = NULL, orphan_concept_table = paste0(tempTableName, "oo") ) DatabaseConnector::executeSql(connection, sql) - + sql <- "SELECT * FROM @orphan_concept_table;" orphanConcepts <- DatabaseConnector::renderTranslateQuerySql( diff --git a/R/GetConceptRecordCount.R b/R/GetConceptRecordCount.R new file mode 100644 index 0000000..1d27c95 --- /dev/null +++ b/R/GetConceptRecordCount.R @@ -0,0 +1,331 @@ +# Copyright 2022 Observational Health Data Sciences and Informatics +# +# This file is part of ConceptSetDiagnostics +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +#' Given conceptId(s) get the record count. +#' +#' @description +#' Given conceptId(s) get the record count. +#' +#' @template Connection +#' +#' @template ConceptIds +#' +#' @template CdmDatabaseSchema +#' +#' @template VocabularyDatabaseSchema +#' +#' @template TempEmulationSchema +#' +#' @param minCellCount The minimum cell count for fields containing person/subject count. +#' +#' @return +#' Returns a tibble data frame. +#' +#' @export +# function: getConceptRecordCount ---- +getConceptRecordCount <- function(conceptIds, + connection = NULL, + connectionDetails = NULL, + cdmDatabaseSchema, + vocabularyDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + minCellCount = 0) { + if (is.null(connection)) { + connection <- DatabaseConnector::connect(connectionDetails) + on.exit(DatabaseConnector::disconnect(connection)) + } + + tempTableName <- loadTempConceptTable( + conceptIds = conceptIds, + connection = connection, + tempEmulationSchema = tempEmulationSchema + ) + + domains <- + getDomainInformation(packageName = "ConceptSetDiagnostics") + domains <- domains$wide %>% + dplyr::filter(.data$isEraTable == FALSE) + # filtering out ERA tables because they are supposed to be derived tables, and counting them is double counting + + sqlDdlDrop <- + "DROP TABLE IF EXISTS @concept_count_temp;" + sqlDdlCreate <- " + CREATE TABLE @concept_count_temp ( + concept_id INT, + event_year INT, + event_month INT, + concept_is_standard VARCHAR(1), + concept_count BIGINT, + subject_count BIGINT + );" + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sqlDdlDrop, + tempEmulationSchema = tempEmulationSchema, + concept_count_temp = paste0(tempTableName, "cc"), + progressBar = FALSE, + reportOverallTime = FALSE + ) + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sqlDdlCreate, + tempEmulationSchema = tempEmulationSchema, + concept_count_temp = paste0(tempTableName, "cc"), + progressBar = FALSE, + reportOverallTime = FALSE + ) + # REASON for many SQL --DISTINCT subject_count cannot be computed from aggregation query of calendar month level data + sql1 <- "INSERT INTO @concept_count_temp + SELECT @domain_concept_id concept_id, + YEAR(@domain_start_date) event_year, + MONTH(@domain_start_date) event_month, + 'Y' concept_is_standard, + COUNT_BIG(*) concept_count, + COUNT_BIG(DISTINCT person_id) subject_count + FROM @cdm_database_schema.@domain_table dt + WHERE @domain_concept_id IN ( + SELECT DISTINCT concept_id + FROM @concept_id_universe + ) + AND YEAR(@domain_start_date) > 0 + AND @domain_concept_id > 0 + GROUP BY @domain_concept_id, + YEAR(@domain_start_date), + MONTH(@domain_start_date);" + sql2 <- " INSERT INTO @concept_count_temp + SELECT @domain_concept_id concept_id, + YEAR(@domain_start_date) event_year, + 0 AS event_month, + 'Y' concept_is_standard, + COUNT_BIG(*) concept_count, + COUNT_BIG(DISTINCT person_id) subject_count + FROM @cdm_database_schema.@domain_table + WHERE @domain_concept_id IN ( + SELECT DISTINCT concept_id + FROM @concept_id_universe + ) + AND YEAR(@domain_start_date) > 0 + AND @domain_concept_id > 0 + GROUP BY @domain_concept_id, + YEAR(@domain_start_date);" + sql3 <- "INSERT INTO @concept_count_temp + SELECT @domain_concept_id concept_id, + 0 as event_year, + 0 as event_month, + 'Y' concept_is_standard, + COUNT_BIG(*) concept_count, + COUNT_BIG(DISTINCT person_id) subject_count + FROM @cdm_database_schema.@domain_table dt + WHERE @domain_concept_id IN ( + SELECT DISTINCT concept_id + FROM @concept_id_universe + ) + AND YEAR(@domain_start_date) > 0 + AND @domain_concept_id > 0 + GROUP BY @domain_concept_id;" + + + sql4 <- "INSERT INTO @concept_count_temp + SELECT @domain_concept_id concept_id, + YEAR(@domain_start_date) event_year, + MONTH(@domain_start_date) event_month, + 'N' concept_is_standard, + COUNT_BIG(*) concept_count, + COUNT_BIG(DISTINCT person_id) subject_count + FROM @cdm_database_schema.@domain_table dt + LEFT JOIN ( + SELECT concept_id + FROM @vocabulary_database_schema.CONCEPT + WHERE standard_concept = 'S' + ) std + ON @domain_concept_id = std.concept_id + WHERE @domain_concept_id IN ( + SELECT DISTINCT concept_id + FROM @concept_id_universe + ) + AND YEAR(@domain_start_date) > 0 + AND @domain_concept_id > 0 + AND std.concept_id IS NULL + GROUP BY @domain_concept_id, + YEAR(@domain_start_date), + MONTH(@domain_start_date);" + sql5 <- " INSERT INTO @concept_count_temp + SELECT @domain_concept_id concept_id, + YEAR(@domain_start_date) event_year, + 0 AS event_month, + 'N' concept_is_standard, + COUNT_BIG(*) concept_count, + COUNT_BIG(DISTINCT person_id) subject_count + FROM @cdm_database_schema.@domain_table dt + LEFT JOIN ( + SELECT concept_id + FROM @vocabulary_database_schema.CONCEPT + WHERE standard_concept = 'S' + ) std ON @domain_concept_id = std.concept_id + WHERE @domain_concept_id IN ( + SELECT DISTINCT concept_id + FROM @concept_id_universe + ) + AND YEAR(@domain_start_date) > 0 + AND @domain_concept_id > 0 + AND std.concept_id IS NULL + GROUP BY @domain_concept_id, + YEAR(@domain_start_date);" + sql6 <- " INSERT INTO @concept_count_temp + SELECT @domain_concept_id concept_id, + 0 AS event_year, + 0 AS event_month, + 'N' concept_is_standard, + COUNT_BIG(*) concept_count, + COUNT_BIG(DISTINCT person_id) subject_count + FROM @cdm_database_schema.@domain_table dt + LEFT JOIN ( + SELECT concept_id + FROM @vocabulary_database_schema.CONCEPT + WHERE standard_concept = 'S' + ) std ON @domain_concept_id = std.concept_id + WHERE @domain_concept_id IN ( + SELECT DISTINCT concept_id + FROM @concept_id_universe + ) + AND YEAR(@domain_start_date) > 0 + AND @domain_concept_id > 0 + AND std.concept_id IS NULL + GROUP BY @domain_concept_id;" + + for (i in (1:nrow(domains))) { + rowData <- domains[i, ] + + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sql1, + tempEmulationSchema = tempEmulationSchema, + domain_table = rowData$domainTable, + domain_concept_id = rowData$domainConceptId, + cdm_database_schema = cdmDatabaseSchema, + domain_start_date = rowData$domainStartDate, + concept_count_temp = paste0(tempTableName, "cc"), + concept_id_universe = tempTableName, + progressBar = FALSE, + reportOverallTime = FALSE + ) + + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sql2, + tempEmulationSchema = tempEmulationSchema, + domain_table = rowData$domainTable, + domain_concept_id = rowData$domainConceptId, + cdm_database_schema = cdmDatabaseSchema, + domain_start_date = rowData$domainStartDate, + concept_count_temp = paste0(tempTableName, "cc"), + concept_id_universe = tempTableName, + progressBar = FALSE, + reportOverallTime = FALSE + ) + + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sql3, + tempEmulationSchema = tempEmulationSchema, + domain_table = rowData$domainTable, + domain_concept_id = rowData$domainConceptId, + cdm_database_schema = cdmDatabaseSchema, + domain_start_date = rowData$domainStartDate, + concept_count_temp = paste0(tempTableName, "cc"), + concept_id_universe = tempTableName, + progressBar = FALSE, + reportOverallTime = FALSE + ) + } + + for (i in (1:nrow(domains))) { + rowData <- domains[i, ] + if (nchar(rowData$domainSourceConceptId) > 4) { + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sql4, + tempEmulationSchema = tempEmulationSchema, + domain_table = rowData$domainTable, + domain_concept_id = rowData$domainSourceConceptId, + cdm_database_schema = cdmDatabaseSchema, + domain_start_date = rowData$domainStartDate, + concept_count_temp = paste0(tempTableName, "cc"), + concept_id_universe = tempTableName, + vocabulary_database_schema = vocabularyDatabaseSchema, + progressBar = FALSE, + reportOverallTime = FALSE + ) + + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sql5, + tempEmulationSchema = tempEmulationSchema, + domain_table = rowData$domainTable, + domain_concept_id = rowData$domainSourceConceptId, + cdm_database_schema = cdmDatabaseSchema, + domain_start_date = rowData$domainStartDate, + concept_count_temp = paste0(tempTableName, "cc"), + concept_id_universe = tempTableName, + vocabulary_database_schema = vocabularyDatabaseSchema, + progressBar = FALSE, + reportOverallTime = FALSE + ) + + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sql6, + tempEmulationSchema = tempEmulationSchema, + domain_table = rowData$domainTable, + domain_concept_id = rowData$domainSourceConceptId, + cdm_database_schema = cdmDatabaseSchema, + domain_start_date = rowData$domainStartDate, + concept_count_temp = paste0(tempTableName, "cc"), + concept_id_universe = tempTableName, + vocabulary_database_schema = vocabularyDatabaseSchema, + progressBar = FALSE, + reportOverallTime = FALSE + ) + } + } + retrieveSql <- "SELECT concept_id, event_year, event_month, + sum(concept_count) concept_count, + max(subject_count) subject_count + FROM @concept_count_temp c + GROUP BY concept_id, event_year, event_month + ORDER By concept_id, event_year, event_month;" + data <- renderTranslateQuerySql( + connection = connection, + sql = retrieveSql, + concept_count_temp = paste0(tempTableName, "cc"), + snakeCaseToCamelCase = TRUE + ) %>% + dplyr::tibble() %>% + dplyr::filter(.data$subjectCount > minCellCount) + + # i was thinking of keeping counts at the table level - but the file size became too big + # so i decided to not include them + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sqlDdlDrop, + tempEmulationSchema = tempEmulationSchema, + concept_count_temp = paste0(tempTableName, "cc"), + progressBar = FALSE, + reportOverallTime = FALSE + ) + return(data) +} diff --git a/R/GetCountOfSourceCodesMappedToStandardConcept.R b/R/GetCountOfSourceCodesMappedToStandardConcept.R new file mode 100644 index 0000000..6a267cc --- /dev/null +++ b/R/GetCountOfSourceCodesMappedToStandardConcept.R @@ -0,0 +1,170 @@ +# Copyright 2022 Observational Health Data Sciences and Informatics +# +# This file is part of ConceptSetDiagnostics +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +#' Given conceptId(s) get the counts of occurrence with mapping. +#' +#' @description +#' Given conceptId(s) get the counts of occurrence with mapping. +#' +#' @template Connection +#' +#' @template ConceptIds +#' +#' @template CdmDatabaseSchema +#' +#' @template TempEmulationSchema +#' +#' @param minCellCount The minimum cell count for fields containing person/subject count. +#' +#' @return +#' Returns a tibble data frame. +#' +#' @export +# function: getCountOfSourceCodesMappedToStandardConcept ---- +getCountOfSourceCodesMappedToStandardConcept <- function(conceptIds, + connection = NULL, + connectionDetails = NULL, + cdmDatabaseSchema, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + minCellCount = 0) { + if (is.null(connection)) { + connection <- DatabaseConnector::connect(connectionDetails) + on.exit(DatabaseConnector::disconnect(connection)) + } + + tempTableName <- loadTempConceptTable( + conceptIds = conceptIds, + connection = connection, + tempEmulationSchema = tempEmulationSchema + ) + + domains <- + getDomainInformation(packageName = "ConceptSetDiagnostics") + domains <- domains$wide %>% + dplyr::filter(nchar(.data$domainSourceConceptId) > 1) + + sqlConceptMapping <- + " DROP TABLE IF EXISTS @concept_mapping_table; + CREATE TABLE @concept_mapping_table (concept_id INT, + source_concept_id INT, + domain_table VARCHAR(20), + concept_count BIGINT, + subject_count BIGINT);" + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sqlConceptMapping, + tempEmulationSchema = tempEmulationSchema, + concept_mapping_table = paste0(tempTableName, "cc"), + progressBar = FALSE, + reportOverallTime = FALSE + ) + + sqlMapping <- " + INSERT INTO @concept_mapping_table + SELECT @domain_concept_id concept_id, + @domain_source_concept_id source_concept_id, + '@domainTableShort' domain_table, + COUNT(*) AS concept_count, + COUNT(DISTINCT person_id) AS subject_count + FROM @cdm_database_schema.@domain_table + WHERE + @domain_source_concept_id IS NOT NULL + AND @domain_source_concept_id > 0 + AND @domain_concept_id IN + (SELECT concept_id FROM @concept_id_table) + GROUP BY @domain_concept_id, + @domain_source_concept_id + ORDER BY @domain_concept_id, + @domain_source_concept_id;" + + conceptMapping <- list() + for (i in (1:nrow(domains))) { + rowData <- domains[i, ] + + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sqlMapping, + tempEmulationSchema = tempEmulationSchema, + domain_table = rowData$domainTable, + domain_concept_id = rowData$domainConceptId, + domain_source_concept_id = rowData$domainSourceConceptId, + cdm_database_schema = cdmDatabaseSchema, + concept_id_table = tempTableName, + concept_mapping_table = paste0(tempTableName, "cc"), + domainTableShort = rowData$domainTableShort, + reportOverallTime = FALSE, + progressBar = FALSE + ) + } + sql <- "SELECT DISTINCT * + FROM @concept_mapping_table + ORDER BY domain_table, + concept_id, + source_concept_id, + concept_count, + subject_count;" + conceptMapping <- + DatabaseConnector::renderTranslateQuerySql( + connection = connection, + sql = sql, + concept_mapping_table = paste0(tempTableName, "cc"), + snakeCaseToCamelCase = TRUE, + tempEmulationSchema = tempEmulationSchema + ) + conceptMapping <- conceptMapping %>% + dplyr::arrange( + .data$domainTable, + .data$conceptId, + .data$sourceConceptId, + .data$conceptCount, + .data$subjectCount + ) %>% + dplyr::tibble() + + if (nrow(conceptMapping) > 0) { + conceptMapping <- dplyr::bind_rows( + conceptMapping, + conceptMapping %>% + dplyr::group_by( + .data$conceptId, + .data$sourceConceptId + ) %>% + dplyr::summarise( + conceptCount = sum(.data$conceptCount), + subjectCount = max(.data$subjectCount), + .groups = "keep" + ) %>% + dplyr::mutate(domainTable = "All") + ) %>% + dplyr::distinct() + } + + conceptMapping <- conceptMapping %>% + dplyr::filter(.data$subjectCount > minCellCount) + + sqlDdlDrop <- + "DROP TABLE IF EXISTS @concept_mapping_table;" + DatabaseConnector::renderTranslateExecuteSql( + connection = connection, + sql = sqlDdlDrop, + concept_mapping_table = paste0(tempTableName, "cc"), + tempEmulationSchema = tempEmulationSchema, + progressBar = FALSE, + reportOverallTime = FALSE + ) + return(conceptMapping) +} diff --git a/R/Private.R b/R/Private.R index ca8f5d7..dd437ce 100644 --- a/R/Private.R +++ b/R/Private.R @@ -118,3 +118,168 @@ dropTempConceptTable <- concept_id_table = tempTableName ) } + + +#' Get domain information +#' +#' @param packageName e.g. 'CohortDiagnostics' +#' +#' @return +#' A list with two tibble data frame objects with domain information represented in wide and long format respectively. +getDomainInformation <- function(packageName = NULL) { + domains <- + readr::read_csv(system.file(file.path("csv", "domains.csv"), + package = "ConceptSetDiagnostics" + ), + col_types = readr::cols() + ) + + domains <- domains %>% + .replaceNaInDataFrameWithEmptyString() %>% + dplyr::mutate(domainTableShort = stringr::str_sub( + string = toupper(.data$domain), + start = 1, + end = 2 + )) %>% + dplyr::mutate( + domainTableShort = dplyr::case_when( + stringr::str_detect(string = tolower(.data$domain), pattern = "era") ~ paste0(.data$domainTableShort, "E"), + TRUE ~ .data$domainTableShort + ) + ) + + domains$domainConceptIdShort <- + stringr::str_replace_all( + string = sapply( + stringr::str_extract_all( + string = camelCaseToTitleCase(snakeCaseToCamelCase(domains$domainConceptId)), + pattern = "[A-Z]" + ), + paste, + collapse = " " + ), + pattern = " ", + replacement = "" + ) + domains$domainSourceConceptIdShort <- + stringr::str_replace_all( + string = sapply( + stringr::str_extract_all( + string = camelCaseToTitleCase(snakeCaseToCamelCase(domains$domainSourceConceptId)), + pattern = "[A-Z]" + ), + paste, + collapse = " " + ), + pattern = " ", + replacement = "" + ) + domains <- domains %>% + dplyr::mutate(isEraTable = stringr::str_detect( + string = .data$domainTable, + pattern = "era" + )) + data <- list() + data$wide <- domains + data$long <- dplyr::bind_rows( + data$wide %>% + dplyr::select( + .data$domainTableShort, + .data$domainTable, + .data$domainConceptIdShort, + .data$domainConceptId + ) %>% + dplyr::rename( + domainFieldShort = .data$domainConceptIdShort, + domainField = .data$domainConceptId + ), + data$wide %>% + dplyr::select( + .data$domainTableShort, + .data$domainSourceConceptIdShort, + .data$domainTable, + .data$domainSourceConceptId + ) %>% + dplyr::rename( + domainFieldShort = .data$domainSourceConceptIdShort, + domainField = .data$domainSourceConceptId + ) + ) %>% + dplyr::distinct() %>% + dplyr::filter(.data$domainFieldShort != "") %>% + dplyr::mutate(eraTable = stringr::str_detect( + string = .data$domainTable, + pattern = "era" + )) %>% + dplyr::mutate(isSourceField = stringr::str_detect( + string = .data$domainField, + pattern = "source" + )) + return(data) +} + +.replaceNaInDataFrameWithEmptyString <- function(data) { + # https://github.com/r-lib/tidyselect/issues/201 + data %>% + dplyr::collect() %>% + dplyr::mutate(dplyr::across( + tidyselect:::where(is.character), + ~ tidyr::replace_na(.x, as.character("")) + )) %>% + dplyr::mutate(dplyr::across( + tidyselect:::where(is.logical), + ~ tidyr::replace_na(.x, as.character("")) + )) %>% + dplyr::mutate(dplyr::across( + tidyselect:::where(is.numeric), + ~ tidyr::replace_na(.x, as.numeric("")) + )) +} + + +# private function - not exported +camelCaseToTitleCase <- function(string) { + string <- gsub("([A-Z])", " \\1", string) + string <- gsub("([a-z])([0-9])", "\\1 \\2", string) + substr(string, 1, 1) <- toupper(substr(string, 1, 1)) + return(string) +} + +# private function - not exported +snakeCaseToCamelCase <- function(string) { + string <- tolower(string) + for (letter in letters) { + string <- + gsub(paste("_", letter, sep = ""), toupper(letter), string) + } + string <- gsub("_([0-9])", "\\1", string) + return(string) +} + +# private function - not exported +camelCaseToSnakeCase <- function(string) { + string <- gsub("([A-Z])", "_\\1", string) + string <- tolower(string) + string <- gsub("([a-z])([0-9])", "\\1_\\2", string) + return(string) +} + +# private function - not exported +titleCaseToCamelCase <- function(string) { + string <- stringr::str_replace_all( + string = string, + pattern = " ", + replacement = "" + ) + substr(string, 1, 1) <- tolower(substr(string, 1, 1)) + return(string) +} + +# private function - not exported +quoteLiterals <- function(x) { + if (is.null(x)) { + return("") + } else { + return(paste0("'", paste(x, collapse = "', '"), "'")) + } +} diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 96731be..eac562e 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -2,5 +2,5 @@ pandoc: 2.17.1.1 pkgdown: 2.0.6 pkgdown_sha: ~ articles: {} -last_built: 2022-07-22T02:24Z +last_built: 2022-07-22T22:11Z diff --git a/docs/reference/getConceptRecordCount.html b/docs/reference/getConceptRecordCount.html new file mode 100644 index 0000000..795baa7 --- /dev/null +++ b/docs/reference/getConceptRecordCount.html @@ -0,0 +1,128 @@ + +Given conceptId(s) get the record count. — getConceptRecordCount • ConceptSetDiagnostics + + +
+
+ + + +
+
+ + +
+

Given conceptId(s) get the record count.

+
+ +
+
getConceptRecordCount(
+  conceptIds,
+  connection = NULL,
+  connectionDetails = NULL,
+  cdmDatabaseSchema,
+  vocabularyDatabaseSchema = cdmDatabaseSchema,
+  tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
+  minCellCount = 0
+)
+
+ +
+

Arguments

+
conceptIds
+

An array of Concept ids.

+ + +
connection
+

An object of type connection as created using the +connect function in the +DatabaseConnector package. Can be left NULL if connectionDetails +is provided, in which case a new connection will be opened at the start +of the function, and closed when the function finishes.

+ + +
connectionDetails
+

An object of type connectionDetails as created using the +createConnectionDetails function in the +DatabaseConnector package. Can be left NULL if connection is +provided.

+ + +
cdmDatabaseSchema
+

Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.

+ + +
vocabularyDatabaseSchema
+

The schema name of containing the vocabulary tables.

+ + +
tempEmulationSchema
+

Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp +tables, provide a schema with write privileges where temp tables can be created.

+ + +
minCellCount
+

The minimum cell count for fields containing person/subject count.

+ +
+
+

Value

+ + +

Returns a tibble data frame.

+
+ +
+ +
+ + +
+ + + + + + + + diff --git a/docs/reference/getCountOfSourceCodesMappedToStandardConcept.html b/docs/reference/getCountOfSourceCodesMappedToStandardConcept.html new file mode 100644 index 0000000..d803cc5 --- /dev/null +++ b/docs/reference/getCountOfSourceCodesMappedToStandardConcept.html @@ -0,0 +1,123 @@ + +Given conceptId(s) get the counts of occurrence with mapping. — getCountOfSourceCodesMappedToStandardConcept • ConceptSetDiagnostics + + +
+
+ + + +
+
+ + +
+

Given conceptId(s) get the counts of occurrence with mapping.

+
+ +
+
getCountOfSourceCodesMappedToStandardConcept(
+  conceptIds,
+  connection = NULL,
+  connectionDetails = NULL,
+  cdmDatabaseSchema,
+  tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
+  minCellCount = 0
+)
+
+ +
+

Arguments

+
conceptIds
+

An array of Concept ids.

+ + +
connection
+

An object of type connection as created using the +connect function in the +DatabaseConnector package. Can be left NULL if connectionDetails +is provided, in which case a new connection will be opened at the start +of the function, and closed when the function finishes.

+ + +
connectionDetails
+

An object of type connectionDetails as created using the +createConnectionDetails function in the +DatabaseConnector package. Can be left NULL if connection is +provided.

+ + +
cdmDatabaseSchema
+

Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.

+ + +
tempEmulationSchema
+

Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp +tables, provide a schema with write privileges where temp tables can be created.

+ + +
minCellCount
+

The minimum cell count for fields containing person/subject count.

+ +
+
+

Value

+ + +

Returns a tibble data frame.

+
+ +
+ +
+ + +
+ + + + + + + + diff --git a/docs/reference/getDomainInformation.html b/docs/reference/getDomainInformation.html new file mode 100644 index 0000000..f4fe5e4 --- /dev/null +++ b/docs/reference/getDomainInformation.html @@ -0,0 +1,86 @@ + +Get domain information — getDomainInformation • ConceptSetDiagnostics + + +
+
+ + + +
+
+ + +
+

Get domain information

+
+ +
+
getDomainInformation(packageName = NULL)
+
+ +
+

Arguments

+
packageName
+

e.g. 'CohortDiagnostics'

+ +
+
+

Value

+ + +

A list with two tibble data frame objects with domain information represented in wide and long format respectively.

+
+ +
+ +
+ + +
+ + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 626d0bb..33371e5 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -77,6 +77,10 @@

All functions getConceptPrevalenceCounts()

get concept id count

+ +

getConceptRecordCount()

+ +

Given conceptId(s) get the record count.

getConceptRelationship()

@@ -85,10 +89,18 @@

All functions getConceptSynonym()

given a list of conceptIds, get their synonyms

+ +

getCountOfSourceCodesMappedToStandardConcept()

+ +

Given conceptId(s) get the counts of occurrence with mapping.

getDomain()

Get all the domain id(s) in the vocabulary schema.

+ +

getDomainInformation()

+ +

Get domain information

getDrugIngredients()

diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 4a3b03a..fe2b9c7 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -48,6 +48,9 @@ /reference/getConceptPrevalenceCountsForConceptIds.html + + /reference/getConceptRecordCount.html + /reference/getConceptRelationship.html @@ -66,12 +69,18 @@ /reference/getConceptSynonym.html + + /reference/getCountOfSourceCodesMappedToStandardConcept.html + /reference/getDeepConceptRelationship.html /reference/getDomain.html + + /reference/getDomainInformation.html + /reference/getDrugIngredients.html diff --git a/extras/ConceptSetDiagnostics.pdf b/extras/ConceptSetDiagnostics.pdf index c48dc7f..3c2d262 100644 Binary files a/extras/ConceptSetDiagnostics.pdf and b/extras/ConceptSetDiagnostics.pdf differ diff --git a/extras/tests/TestEunomia.R b/extras/tests/TestEunomia.R deleted file mode 100644 index d5402ce..0000000 --- a/extras/tests/TestEunomia.R +++ /dev/null @@ -1,229 +0,0 @@ -library(magrittr) -databaseSchema <- "main" -cdmDatabaseSchema <- databaseSchema -connectionDetails <- Eunomia::getEunomiaConnectionDetails() -# DatabaseConnector::insertTable() -connection <- - DatabaseConnector::connect(connectionDetails = connectionDetails) -# dbReadTable(connection,CONCEPT) - -cohortDefinitionSet <- - CohortGenerator::getCohortDefinitionSet( - settingsFileName = "cohorts/CohortsToCreate.csv", - jsonFolder = "cohorts", - sqlFolder = "cohorts", - packageName = "ConceptSetDiagnostics" - ) %>% - dplyr::tibble() - - -#-------------------------------------------------------------Pre-Requisites ---------------------------- -# Get conceptIds to filter from concept_prevalence scheme (Filter Eunomia conceptIds fro Main Database) -concepts <- - DatabaseConnector::renderTranslateQuerySql( - conn = connection, - sql = "select CONCEPT_ID, - concept_name, - vocabulary_id, - domain_id, - standard_concept - from CONCEPT", - snakeCaseToCamelCase = TRUE - ) %>% - dplyr::tibble() - -concepts$rc <- - sample( - x = 1:nrow(concepts) * 2, - size = nrow(concepts), - replace = TRUE - ) -concepts$drc <- concepts$rc * 2 -concepts$dbc <- - sample(x = 1:2, - size = nrow(concepts), - replace = TRUE) -concepts$ddbc <- concepts$dbc * 2 - -DatabaseConnector::insertTable( - connection = connection, - tableName = "universe", - data = concepts, - camelCaseToSnakeCase = TRUE, - dropTableIfExists = TRUE, - createTable = TRUE -) - - - -#----1. getConceptAncestor---- -conceptAncestor <- ConceptSetDiagnostics::getConceptAncestor( - conceptIds = conceptId, - connection = connection, - vocabularyDatabaseSchema = databaseSchema -) -conceptAncestor - -#----2. getConceptDescendant---- -conceptDescendants <- ConceptSetDiagnostics::getConceptDescendant( - conceptIds = conceptAncestor %>% - dplyr::arrange(dplyr::desc(.data$maxLevelsOfSeparation)) %>% - dplyr::slice(1) %>% - dplyr::pull(.data$ancestorConceptId), - connection = connection, - vocabularyDatabaseSchema = databaseSchema -) -conceptDescendants - -#----3. getConceptIdDetails---- -allConceptIds <- ConceptSetDiagnostics::getConceptIdDetails( - conceptIds = c( - conceptDescendants$ancestorConceptId, - conceptDescendants$descendantConceptId, - conceptAncestor$ancestorConceptId, - conceptAncestor$descendantConceptId - ) %>% unique(), - connection = connection, - vocabularyDatabaseSchema = databaseSchema -) %>% - dplyr::arrange(.data$conceptId) -allConceptIds - -#----4. getConceptPrevalenceCountsForConceptIds---- -conceptPrevalenceCount <- - ConceptSetDiagnostics::getConceptPrevalenceCounts( - conceptIds = allConceptIds$conceptId, - connection = connection, - conceptPrevalenceTable = "main.universe" - ) %>% - dplyr::arrange(dplyr::desc(.data$drc)) -conceptPrevalenceCount - - -#----5. getConceptRelationship---- -conceptIdforRelationship <- 40162359 -ConceptSetDiagnostics::getConceptRelationship( - conceptIds = conceptIdforRelationship, - connectionDetails = connectionDetails, - vocabularyDatabaseSchema = databaseSchema -) - -#----6. getConceptSynonym---- -ConceptSetDiagnostics::getConceptSynonym( - conceptIds = conceptId, - connection = connection, - vocabularyDatabaseSchema = databaseSchema -) - -#----7. getDomain---- -ConceptSetDiagnostics::getDomain(connection = connection, - vocabularyDatabaseSchema = databaseSchema) - -#----8. getDrugIngredients -ConceptSetDiagnostics::getDrugIngredients( - connection = connection, - conceptIds = c(1127078, 1127433), - vocabularyDatabaseSchema = databaseSchema -) - -#----9. getExcludedConceptsInConceptSetExpression -conceptSetExpression <- - dplyr::bind_rows( - dplyr::tibble(conceptId = 4274025, - includeDescendants = TRUE), - dplyr::tibble( - conceptId = 4101796, - includeDescendants = TRUE, - isExcluded = TRUE - ) - ) %>% - convertConceptSetDataFrameToExpression() -excludedConcepts <- getExcludedConceptsInConceptSetExpression( - conceptSetExpression = conceptSetExpression, - connectionDetails = connectionDetails, - vocabularyDatabaseSchema = "main" -) - -#----10. getMappedSourceConcepts---- -ConceptSetDiagnostics::getMappedSourceConcepts( - conceptIds = 192671, - connectionDetails = connectionDetails, - vocabularyDatabaseSchema = databaseSchema -) - -#----11.getMappedStandardConcepts---- -ConceptSetDiagnostics::getMappedStandardConcepts( - conceptIds = 35208414, - connectionDetails = connectionDetails, - vocabularyDatabaseSchema = databaseSchema -) - -#----12.getMedraRelationship---- - -#----13. getRelationship---- -ConceptSetDiagnostics::getRelationship(connection = connection, vocabularyDatabaseSchema = databaseSchema) - -#----14. performStringSearchForConcepts---- -searchResultDataFrame <- - ConceptSetDiagnostics::performStringSearchForConcepts( - connectionDetails = connectionDetails, - vocabularyDatabaseSchema = databaseSchema, - searchString = searchKeyword - ) - -searchResultDataFrame$includeDescendants <- TRUE -searchResultDataFrame$includeMapped <- TRUE -searchResultDataFrame$isExcluded <- FALSE - -#----15. performStringSearchForConceptsUsingTsv---- - -#----16. getVocabulary---- -getVocabulary(connectionDetails = connectionDetails, - vocabularyDatabaseSchema = databaseSchema) - -#----17. resolveConceptSetExpression---- - - -#----18. resolveConceptSetsInCohortExpression---- - -#----19. convertConceptSetDataFrameToExpression---- -conceptSetExpression <- - ConceptSetDiagnostics::convertConceptSetDataFrameToExpression(conceptSetExpressionDataFrame = searchResultDataFrame) - - -#----20. convertConceptSetExpressionToDataFrame---- -conceptSetDataFrame <- - ConceptSetDiagnostics::convertConceptSetExpressionToDataFrame( - connectionDetails = connectionDetails, - conceptSetExpression = conceptSetExpression, - vocabularyDatabaseSchema = databaseSchema, - updateVocabularyFields = TRUE - ) - -#----21. resolveConceptSetExpression---- -resolvedConceptsIds <- - ConceptSetDiagnostics::resolveConceptSetExpression( - conceptSetExpression = conceptSetExpression, - connectionDetails = connectionDetails, - vocabularyDatabaseSchema = databaseSchema - ) - -#----22. ResolveConceptSetsInCohortExpression---- - - -#----21. convertConceptSetDataFrameToExpression---- - - -#----22. extractConceptSetsInCohortDefinition---- -ConceptSetDiagnostics::extractConceptSetsInCohortDefinition(cohortExpression = cohortDefinitionSet[1, ]$json %>% - RJSONIO::fromJSON(digits = 23)) - -#----23. extractConceptSetsInCohortDefinitionSet---- - - -#----24. optimizeConceptSetExpression -ConceptSetDiagnostics::optimizeConceptSetExpression( - conceptSetExpression = conceptSetExpression, - connection = connection, - vocabularyDatabaseSchema = databaseSchema -) diff --git a/inst/csv/domains.csv b/inst/csv/domains.csv new file mode 100644 index 0000000..3c4b50b --- /dev/null +++ b/inst/csv/domains.csv @@ -0,0 +1,10 @@ +domain,domainTable,domainStartDate,domainConceptId,domainSourceValue,domainSourceConceptId +DrugExposure,drug_exposure,drug_exposure_start_date,drug_concept_id,drug_source_value,drug_source_concept_id +DrugEra,drug_era,drug_era_start_date,drug_concept_id,, +ConditionOccurrence,condition_occurrence,condition_start_date,condition_concept_id,condition_source_value,condition_source_concept_id +ConditionEra,condition_era,condition_era_start_date,condition_concept_id,, +ProcedureOccurrence,procedure_occurrence,procedure_date,procedure_concept_id,procedure_source_value,procedure_source_concept_id +DeviceExposure,device_exposure,device_exposure_start_date,device_concept_id,device_source_value,device_source_concept_id +Measurement,measurement,measurement_date,measurement_concept_id,measurement_source_value,measurement_source_concept_id +Observation,observation,observation_date,observation_concept_id,observation_source_value,observation_source_concept_id +VisitOccurrence,visit_occurrence,visit_start_date,visit_concept_id,visit_source_value,visit_source_concept_id \ No newline at end of file diff --git a/man/getConceptRecordCount.Rd b/man/getConceptRecordCount.Rd new file mode 100644 index 0000000..82b8811 --- /dev/null +++ b/man/getConceptRecordCount.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GetConceptRecordCount.R +\name{getConceptRecordCount} +\alias{getConceptRecordCount} +\title{Given conceptId(s) get the record count.} +\usage{ +getConceptRecordCount( + conceptIds, + connection = NULL, + connectionDetails = NULL, + cdmDatabaseSchema, + vocabularyDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + minCellCount = 0 +) +} +\arguments{ +\item{conceptIds}{An array of Concept ids.} + +\item{connection}{An object of type \code{connection} as created using the +\code{\link[DatabaseConnector]{connect}} function in the +DatabaseConnector package. Can be left NULL if \code{connectionDetails} +is provided, in which case a new connection will be opened at the start +of the function, and closed when the function finishes.} + +\item{connectionDetails}{An object of type \code{connectionDetails} as created using the +\code{\link[DatabaseConnector]{createConnectionDetails}} function in the +DatabaseConnector package. Can be left NULL if \code{connection} is +provided.} + +\item{cdmDatabaseSchema}{Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.} + +\item{vocabularyDatabaseSchema}{The schema name of containing the vocabulary tables.} + +\item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp +tables, provide a schema with write privileges where temp tables can be created.} + +\item{minCellCount}{The minimum cell count for fields containing person/subject count.} +} +\value{ +Returns a tibble data frame. +} +\description{ +Given conceptId(s) get the record count. +} diff --git a/man/getCountOfSourceCodesMappedToStandardConcept.Rd b/man/getCountOfSourceCodesMappedToStandardConcept.Rd new file mode 100644 index 0000000..5cd099a --- /dev/null +++ b/man/getCountOfSourceCodesMappedToStandardConcept.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GetCountOfSourceCodesMappedToStandardConcept.R +\name{getCountOfSourceCodesMappedToStandardConcept} +\alias{getCountOfSourceCodesMappedToStandardConcept} +\title{Given conceptId(s) get the counts of occurrence with mapping.} +\usage{ +getCountOfSourceCodesMappedToStandardConcept( + conceptIds, + connection = NULL, + connectionDetails = NULL, + cdmDatabaseSchema, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + minCellCount = 0 +) +} +\arguments{ +\item{conceptIds}{An array of Concept ids.} + +\item{connection}{An object of type \code{connection} as created using the +\code{\link[DatabaseConnector]{connect}} function in the +DatabaseConnector package. Can be left NULL if \code{connectionDetails} +is provided, in which case a new connection will be opened at the start +of the function, and closed when the function finishes.} + +\item{connectionDetails}{An object of type \code{connectionDetails} as created using the +\code{\link[DatabaseConnector]{createConnectionDetails}} function in the +DatabaseConnector package. Can be left NULL if \code{connection} is +provided.} + +\item{cdmDatabaseSchema}{Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.} + +\item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp +tables, provide a schema with write privileges where temp tables can be created.} + +\item{minCellCount}{The minimum cell count for fields containing person/subject count.} +} +\value{ +Returns a tibble data frame. +} +\description{ +Given conceptId(s) get the counts of occurrence with mapping. +} diff --git a/man/getDomainInformation.Rd b/man/getDomainInformation.Rd new file mode 100644 index 0000000..b0ddec5 --- /dev/null +++ b/man/getDomainInformation.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Private.R +\name{getDomainInformation} +\alias{getDomainInformation} +\title{Get domain information} +\usage{ +getDomainInformation(packageName = NULL) +} +\arguments{ +\item{packageName}{e.g. 'CohortDiagnostics'} +} +\value{ +A list with two tibble data frame objects with domain information represented in wide and long format respectively. +} +\description{ +Get domain information +} diff --git a/tests/testthat/test-conceptPrevalence.R b/tests/testthat/test-conceptPrevalence.R index 6f91950..15170e0 100644 --- a/tests/testthat/test-conceptPrevalence.R +++ b/tests/testthat/test-conceptPrevalence.R @@ -77,3 +77,39 @@ testthat::test_that("Concept Prevalence - connectionDetails", { ) } }) + + +testthat::test_that("Concept Prevalence - table does not exist", { + if (dbms == "postgresql") { + connection <- + DatabaseConnector::connect(connectionDetails = connectionDetails) + + testthat::expect_error( + getConceptPrevalenceCounts( + conceptIds = 0, + connection = connection, + conceptPrevalenceSchema = cdmDatabaseSchema + ) + ) + + testthat::expect_error( + getRecommendedStandard( + conceptIds = 0, + vocabularyDatabaseSchema = cdmDatabaseSchema, + connection = connection, + conceptPrevalenceSchema = cdmDatabaseSchema + ) + ) + + testthat::expect_error( + getRecommendedSource( + conceptIds = 0, + vocabularyDatabaseSchema = cdmDatabaseSchema, + connection = connection, + conceptPrevalenceSchema = cdmDatabaseSchema + ) + ) + + DatabaseConnector::disconnect(connection = connection) + } +}) diff --git a/tests/testthat/test-queryConceptTable.R b/tests/testthat/test-queryConceptTable.R index 9b7eaae..d307808 100644 --- a/tests/testthat/test-queryConceptTable.R +++ b/tests/testthat/test-queryConceptTable.R @@ -159,12 +159,31 @@ testthat::test_that("Map MedDra to Snomed - connection", { testthat::expect_gte(object = nrow(output), expected = 0) }) -# Disconnection ---- -DatabaseConnector::disconnect(connection = connection) - +# getCountOfSourceCodesMappedToStandardConcept 1 ---- +testthat::test_that("Source codes Mapped to Standard Concept - connection", { + output <- + ConceptSetDiagnostics::getCountOfSourceCodesMappedToStandardConcept( + connection = connection, + conceptIds = 0, + cdmDatabaseSchema = cdmDatabaseSchema + ) + testthat::expect_gte(object = nrow(output), expected = 0) +}) +# getConceptRecordCount 1 ---- +testthat::test_that("Source codes Mapped to Standard Concept - connection", { + output <- + ConceptSetDiagnostics::getConceptRecordCount( + connection = connection, + conceptIds = 0, + cdmDatabaseSchema = cdmDatabaseSchema + ) + testthat::expect_gte(object = nrow(output), expected = 0) +}) +# Disconnection ---- +DatabaseConnector::disconnect(connection = connection) @@ -327,3 +346,25 @@ testthat::test_that("Map MedDra to Snomed - connectionDetails", { ) testthat::expect_gte(object = nrow(output), expected = 0) }) + +# getCountOfSourceCodesMappedToStandardConcept 2 ---- +testthat::test_that("Source codes Mapped to Standard Concept - connectionDetails", { + output <- + ConceptSetDiagnostics::getCountOfSourceCodesMappedToStandardConcept( + connectionDetails = connectionDetails, + conceptIds = c(19025280, 19077577), + cdmDatabaseSchema = cdmDatabaseSchema + ) + testthat::expect_gte(object = nrow(output), expected = 0) +}) + +# getConceptRecordCount 2 ---- +testthat::test_that("Source codes Mapped to Standard Concept - connectionDetails", { + output <- + ConceptSetDiagnostics::getConceptRecordCount( + connectionDetails = connectionDetails, + conceptIds = c(19025280, 19077577), + cdmDatabaseSchema = cdmDatabaseSchema + ) + testthat::expect_gte(object = nrow(output), expected = 0) +}) diff --git a/tests/testthat/test-supportingFunctions.R b/tests/testthat/test-supportingFunctions.R index d60080e..67a3bad 100644 --- a/tests/testthat/test-supportingFunctions.R +++ b/tests/testthat/test-supportingFunctions.R @@ -17,3 +17,45 @@ testthat::test_that("Check if cohort definition set", { checkIfCohortDefinitionSet(cohortDefinitionSet = cohortDefinitionSet) testthat::expect_false(object = errorMessage$isEmpty()) }) + +testthat::test_that("Check helper functions", { + testthat::expect_equal( + camelCaseToTitleCase("appleTree"), + "Apple Tree" + ) + testthat::expect_equal( + snakeCaseToCamelCase("apple_tree"), + "appleTree" + ) + testthat::expect_equal( + camelCaseToSnakeCase("appleTree"), + "apple_tree" + ) + testthat::expect_equal( + titleCaseToCamelCase("Apple Tree"), + "appleTree" + ) + testthat::expect_equal(quoteLiterals(NULL), "") +}) + + +testthat::test_that("Get Domain Information", { + domainInformation <- getDomainInformation + testthat::expect_equal( + camelCaseToTitleCase("appleTree"), + "Apple Tree" + ) + testthat::expect_equal( + snakeCaseToCamelCase("apple_tree"), + "appleTree" + ) + testthat::expect_equal( + camelCaseToSnakeCase("appleTree"), + "apple_tree" + ) + testthat::expect_equal( + titleCaseToCamelCase("Apple Tree"), + "appleTree" + ) + testthat::expect_equal(quoteLiterals(NULL), "") +})