From e2552478fa9d02c71aaa7b16f3fe11caaaf9db82 Mon Sep 17 00:00:00 2001 From: Gowtham Rao Date: Fri, 13 Sep 2024 08:48:56 -0400 Subject: [PATCH] main fixes --- DESCRIPTION | 2 + R/ExtractConceptSetsInCohortDefinition.R | 26 +++++----- R/ExtractConceptSetsInCohortDefinitionSet.R | 2 +- R/GetConceptRecordCount.R | 36 ++++++++++---- R/GetConceptSetOccurrenceDate.R | 2 + R/InstantiateCohortFromConceptSetExpression.R | 8 +-- R/MapMedraToSnomedViaVocabulary.R | 49 ++++++------------- R/Private.R | 16 ++---- man/getConceptRecordCount.Rd | 17 +++++++ man/getConceptSetOccurrenceDate.Rd | 2 + ...stantiateCohortFromConceptSetExpression.Rd | 3 ++ 11 files changed, 91 insertions(+), 72 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6627fb1..f2f10c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,8 +11,10 @@ Depends: dplyr, R (>= 4.0.0) Imports: + CohortAlgebra, checkmate, CirceR, + ParallelLogger, purrr, RJSONIO, rlang, diff --git a/R/ExtractConceptSetsInCohortDefinition.R b/R/ExtractConceptSetsInCohortDefinition.R index 9928852..d4ace61 100644 --- a/R/ExtractConceptSetsInCohortDefinition.R +++ b/R/ExtractConceptSetsInCohortDefinition.R @@ -96,7 +96,7 @@ extractConceptSetsInCohortDefinition <- ) conceptSetExpressionMetaData[[j]] <- conceptSetExpression2[[j]][1, ] |> - dplyr::select(dplyr::all_of("conceptSetId")) |> + dplyr::select(dplyr::all_of(c("conceptSetId"))) |> dplyr::mutate( hasStandard = as.integer( conceptSetDataFrame |> @@ -156,7 +156,7 @@ extractConceptSetsInCohortDefinition <- pattern = "condition" ) ) |> - dplyr::select(dplyr::all_of("conceptId")) |> + dplyr::select(dplyr::all_of(c("conceptId"))) |> dplyr::distinct() |> nrow(), hasProcedure = as.integer( @@ -177,7 +177,7 @@ extractConceptSetsInCohortDefinition <- pattern = "procedure" ) ) |> - dplyr::select(dplyr::all_of("conceptId")) |> + dplyr::select(dplyr::all_of(c("conceptId"))) |> dplyr::distinct() |> nrow(), hasDevice = as.integer( @@ -194,7 +194,7 @@ extractConceptSetsInCohortDefinition <- string = tolower(.data$domainId), pattern = "device" )) |> - dplyr::select(dplyr::all_of("conceptId")) |> + dplyr::select(dplyr::all_of(c("conceptId"))) |> dplyr::distinct() |> nrow(), hasDrug = as.integer( @@ -211,7 +211,7 @@ extractConceptSetsInCohortDefinition <- string = tolower(.data$domainId), pattern = "drug" )) |> - dplyr::select(dplyr::all_of("conceptId")) |> + dplyr::select(dplyr::all_of(c("conceptId"))) |> dplyr::distinct() |> nrow(), hasObservation = as.integer( @@ -232,7 +232,7 @@ extractConceptSetsInCohortDefinition <- pattern = "observation" ) ) |> - dplyr::select(dplyr::all_of("conceptId")) |> + dplyr::select(dplyr::all_of(c("conceptId"))) |> dplyr::distinct() |> nrow(), hasVisit = as.integer( @@ -249,7 +249,7 @@ extractConceptSetsInCohortDefinition <- string = tolower(.data$domainId), pattern = "visit" )) |> - dplyr::select(dplyr::all_of("conceptId")) |> + dplyr::select(dplyr::all_of(c("conceptId"))) |> dplyr::distinct() |> nrow(), hasType = as.integer( @@ -266,7 +266,7 @@ extractConceptSetsInCohortDefinition <- string = tolower(.data$domainId), pattern = "type" )) |> - dplyr::select(dplyr::all_of("conceptId")) |> + dplyr::select(dplyr::all_of(c("conceptId"))) |> dplyr::distinct() |> nrow(), isSelectedIncludeMapped = max(as.integer(conceptSetDataFrame$includeMapped)), @@ -284,13 +284,13 @@ extractConceptSetsInCohortDefinition <- numberOfUniqueConceptIdsWithoutDescendants = length( conceptSetDataFrame |> dplyr::filter(.data$includeDescendants == FALSE) |> - dplyr::pull(dplyr::all_of("conceptId")) |> + dplyr::pull(dplyr::all_of(c("conceptId"))) |> unique() ), numberOfUniqueConceptIdsWitDescendants = length( conceptSetDataFrame |> dplyr::filter(.data$includeDescendants == TRUE) |> - dplyr::pull(dplyr::all_of("conceptId")) |> + dplyr::pull(dplyr::all_of(c("conceptId"))) |> unique() ), numberOfUniqueConceptIdIsStandard = length( @@ -301,7 +301,7 @@ extractConceptSetsInCohortDefinition <- pattern = "S" ) ) |> - dplyr::pull(dplyr::all_of("conceptId")) |> + dplyr::pull(dplyr::all_of(c("conceptId"))) |> unique() ), numberOfUniqueConceptIdIsNonStandard = length( @@ -313,7 +313,7 @@ extractConceptSetsInCohortDefinition <- negate = TRUE ) ) |> - dplyr::pull(dplyr::all_of("conceptId")) |> + dplyr::pull(dplyr::all_of(c("conceptId"))) |> unique() ) ) @@ -340,7 +340,7 @@ extractConceptSetsInCohortDefinition <- if (length(codeSetsIdsInPrimaryCriteria) > 0) { conceptSetExpression <- conceptSetExpression |> - dplyr::select(-dplyr::all_of("conceptSetUsedInEntryEvent")) |> + dplyr::select(-dplyr::all_of(c("conceptSetUsedInEntryEvent"))) |> dplyr::left_join( dplyr::tibble(conceptSetId = codeSetsIdsInPrimaryCriteria) |> dplyr::distinct() |> diff --git a/R/ExtractConceptSetsInCohortDefinitionSet.R b/R/ExtractConceptSetsInCohortDefinitionSet.R index 0d9c9d1..b88a4cf 100644 --- a/R/ExtractConceptSetsInCohortDefinitionSet.R +++ b/R/ExtractConceptSetsInCohortDefinitionSet.R @@ -52,7 +52,7 @@ extractConceptSetsInCohortDefinitionSet <- !class(conceptSetsInCohortDefinition) == "try-error" )) { conceptSets[[i]] <- conceptSetsInCohortDefinition |> - dplyr::select(dplyr::all_of(uniqueConceptSetId)) |> + dplyr::select(dplyr::all_of(c("uniqueConceptSetId"))) |> dplyr::mutate(cohortId = cohort$cohortId) |> dplyr::relocate(dplyr::all_of(c("cohortId", "conceptSetId"))) } diff --git a/R/GetConceptRecordCount.R b/R/GetConceptRecordCount.R index 2f34988..b90a3b8 100644 --- a/R/GetConceptRecordCount.R +++ b/R/GetConceptRecordCount.R @@ -42,6 +42,23 @@ #' #' @param cohortDefinitionId Optional #' +#' @param domainTableName Vector of strings Domains to look for concept IDs. Supported domains +#' include "drug_exposure", "condition_occurrence", "procedure_occurrence", "measurement", "observation". +#' +#' @param stratifyByGender Logical Whether to stratify the counts by gender. +#' +#' @param stratifyByYear Logical Whether to stratify the counts by year. +#' +#' @param stratifyByYearQuarter Logical Whether to stratify the counts by quarter of the year. +#' +#' @param stratifyByYearMonth Logical Whether to stratify the counts by month of the year. +#' +#' @param stratifyByAgeGroup Logical Whether to stratify the counts by age group. +#' +#' @param stratifyByIncidence Logical Whether to limit the counts to first occurrences (incidence). +#' +#' @param getOverallCounts Logical Whether to include overall counts across all specified stratifications. +#' #' @return #' Returns a tibble data frame. #' @@ -114,15 +131,16 @@ getConceptRecordCount <- function(conceptIds = NULL, {@concept_id_universe != ''} ? { DROP TABLE IF EXISTS #concept_id_unv_2; CREATE TABLE #concept_id_unv_2 as - SELECT DISTINCT u.concept_id - FROM @concept_id_universe u - INNER JOIN ( - SELECT concept_id - FROM @vocabulary_database_schema.CONCEPT - WHERE concept_id > 0 - ) c - ON u.concept_id = c.concept_id - ; + ( + SELECT DISTINCT u.concept_id + FROM @concept_id_universe u + INNER JOIN ( + SELECT concept_id + FROM @vocabulary_database_schema.CONCEPT + WHERE concept_id > 0 + ) c + ON u.concept_id = c.concept_id + ) ; } --PERCENTILES are difficult and will need subqueries diff --git a/R/GetConceptSetOccurrenceDate.R b/R/GetConceptSetOccurrenceDate.R index 054f845..397551b 100644 --- a/R/GetConceptSetOccurrenceDate.R +++ b/R/GetConceptSetOccurrenceDate.R @@ -39,6 +39,8 @@ #' #' @param conceptIds An array of concept ids #' +#' @param subset options are "all", "first", "last" +#' #' @param limitToPersonDate Do you want to limit to person dates #' #' @return diff --git a/R/InstantiateCohortFromConceptSetExpression.R b/R/InstantiateCohortFromConceptSetExpression.R index c6bbc2a..f838c7b 100644 --- a/R/InstantiateCohortFromConceptSetExpression.R +++ b/R/InstantiateCohortFromConceptSetExpression.R @@ -29,6 +29,8 @@ #' #' @template CdmDatabaseSchema #' +#' @template CohortDatabaseSchema +#' #' @template TempEmulationSchema #' #' @param cohortId An integer value to identify the cohort. @@ -66,10 +68,10 @@ instantiateCohortFromConceptSetExpression <- connection = connection, vocabularyDatabaseSchema = vocabularyDatabaseSchema ) |> - dplyr::select(dplyr::all_of("conceptId")) |> + dplyr::select(dplyr::all_of(c("conceptId"))) |> dplyr::distinct() |> - dplyr::arrange(dplyr::all_of("conceptId")) |> - dplyr::pull(dplyr::all_of("conceptId")) + dplyr::arrange(dplyr::all_of(c("conceptId"))) |> + dplyr::pull(dplyr::all_of(c("conceptId"))) tempTableWithConceptDates <- getConceptSetOccurrenceDate( diff --git a/R/MapMedraToSnomedViaVocabulary.R b/R/MapMedraToSnomedViaVocabulary.R index 0d0ec11..f50a887 100644 --- a/R/MapMedraToSnomedViaVocabulary.R +++ b/R/MapMedraToSnomedViaVocabulary.R @@ -115,8 +115,7 @@ mapMedraToSnomedViaVocabulary <- dplyr::inner_join( medDraRelationship$pt |> dplyr::select( - .data$givenConceptId, - .data$ptConceptName + dplyr::all_of(c("givenConceptId", "ptConceptName")) ) |> rename( "medDraConceptId" = .data$givenConceptId, @@ -129,8 +128,7 @@ mapMedraToSnomedViaVocabulary <- dplyr::inner_join( medDraRelationship$llt |> dplyr::select( - .data$givenConceptId, - .data$lltConceptName + dplyr::all_of(c("givenConceptId", "lltConceptName")) ) |> rename( "medDraConceptId" = .data$givenConceptId, @@ -222,8 +220,7 @@ mapMedraToSnomedViaVocabulary <- relatedToSnomed <- medDraRelated |> dplyr::select( - .data$conceptId1, - .data$conceptId2 + dplyr::all_of(c("conceptId1", "conceptId2")) ) |> dplyr::distinct() |> dplyr::inner_join( @@ -255,8 +252,7 @@ mapMedraToSnomedViaVocabulary <- dplyr::inner_join( conceptRelationship |> dplyr::select( - .data$conceptId1, - .data$conceptId2 + dplyr::all_of(c("conceptId1", "conceptId2")) ), by = c("invalidSnomedConceptId" = "conceptId1") ) |> @@ -307,8 +303,7 @@ mapMedraToSnomedViaVocabulary <- dplyr::inner_join( snomedSynonyms |> dplyr::select( - .data$conceptId, - .data$conceptSynonymName + dplyr::all_of(c("conceptId", "conceptSynonymName")) ) |> dplyr::distinct() |> dplyr::rename("conceptName" = .data$conceptSynonymName), @@ -414,9 +409,7 @@ mapMedraToSnomedViaVocabulary <- ) ) |> dplyr::select( - .data$medDraConceptId, - .data$snomedConceptId, - .data$stringDistanceScore + dplyr::all_of(c("medDraConceptId", "snomedConceptId", "stringDistanceScore")) ) |> dplyr::distinct() @@ -430,16 +423,12 @@ mapMedraToSnomedViaVocabulary <- tidyr::replace_na(list(stringDistanceScore = 999)) |> dplyr::group_by(.data$medDraConceptId) |> dplyr::arrange( - .data$minLevelsOfSeparation, - .data$maxLevelsOfSeparation, - .data$stringDistanceScore + dplyr::all_of(c("minLevelsOfSeparation", "maxLevelsOfSeparation", "stringDistanceScore")) ) |> dplyr::mutate(rank = dplyr::row_number()) |> dplyr::ungroup() |> dplyr::select( - .data$medDraConceptId, - .data$snomedConceptId, - .data$rank + dplyr::all_of(c("medDraConceptId", "snomedConceptId", "rank")) ) |> dplyr::distinct() @@ -481,9 +470,7 @@ mapMedraToSnomedViaVocabulary <- ) |> dplyr::mutate(ancestorRank = dplyr::row_number()) |> dplyr::arrange( - .data$medDraConceptId, - .data$descendantConceptId, - .data$ancestorRank + dplyr::all_of(c("medDraConceptId", "descendantConceptId", "ancestorRank")) ) canBeRolledUp <- @@ -497,10 +484,7 @@ mapMedraToSnomedViaVocabulary <- ) |> dplyr::rename("descendantConceptId" = .data$snomedConceptId) |> dplyr::select( - .data$medDraConceptId, - .data$descendantConceptId, - .data$ancestorConceptId, - .data$ancestorRank + dplyr::all_of(c("medDraConceptId", "descendantConceptId", "ancestorConceptId", "ancestorRank")) ) |> dplyr::distinct() @@ -527,9 +511,7 @@ mapMedraToSnomedViaVocabulary <- suppressWarnings( canBeRolledUp |> dplyr::select( - .data$medDraConceptId, - .data$descendantConceptId, - .data$ancestorRank + dplyr::all_of(c("medDraConceptId", "descendantConceptId", "ancestorRank")) ) |> dplyr::group_by( .data$medDraConceptId, @@ -584,7 +566,7 @@ mapMedraToSnomedViaVocabulary <- dplyr::group_by(.data$medDraConceptId) |> dplyr::arrange(.data$rank) |> dplyr::mutate(rn = dplyr::row_number()) |> - dplyr::select(-.data$rank) |> + dplyr::select(-dplyr::all_of(c("rank"))) |> dplyr::rename(rank = .data$rn) |> dplyr::ungroup() |> dplyr::arrange(.data$medDraConceptId) @@ -633,14 +615,11 @@ mapMedraToSnomedViaVocabulary <- by = "medDraConceptId" ) |> dplyr::arrange( - .data$medDraConceptId, - .data$medDraConceptName, - .data$medDraConceptClassId, - .data$rank + dplyr::all_of(c("medDraConceptId", "medDraConceptName", "medDraConceptName", "rank")) ) |> dplyr::select( dplyr::starts_with(c("medDra", "snomed")), - .data$rank + dplyr::all_of(c("rank")) ) return(mappedUsingVocabaulary) diff --git a/R/Private.R b/R/Private.R index cf7b769..2bb7a03 100644 --- a/R/Private.R +++ b/R/Private.R @@ -200,10 +200,7 @@ getDomainInformation <- function() { data$long <- dplyr::bind_rows( data$wide |> dplyr::select( - .data$domainTableShort, - .data$domainTable, - .data$domainConceptIdShort, - .data$domainConceptId + dplyr::all_of(c("domainTableShort", "domainTable", "domainConceptIdShort", "domainConceptId")) ) |> dplyr::rename( domainFieldShort = .data$domainConceptIdShort, @@ -211,10 +208,7 @@ getDomainInformation <- function() { ), data$wide |> dplyr::select( - .data$domainTableShort, - .data$domainSourceConceptIdShort, - .data$domainTable, - .data$domainSourceConceptId + dplyr::all_of(c("domainTableShort", "domainSourceConceptIdShort", "domainTable", "domainSourceConceptId")) ) |> dplyr::rename( domainFieldShort = .data$domainSourceConceptIdShort, @@ -239,15 +233,15 @@ getDomainInformation <- function() { data |> dplyr::collect() |> dplyr::mutate(dplyr::across( - tidyselect:::where(is.character), + tidyselect::where(is.character), ~ tidyr::replace_na(.x, as.character("")) )) |> dplyr::mutate(dplyr::across( - tidyselect:::where(is.logical), + tidyselect::where(is.logical), ~ tidyr::replace_na(.x, as.character("")) )) |> dplyr::mutate(dplyr::across( - tidyselect:::where(is.numeric), + tidyselect::where(is.numeric), ~ tidyr::replace_na(.x, as.numeric("")) )) } diff --git a/man/getConceptRecordCount.Rd b/man/getConceptRecordCount.Rd index a5112be..d042636 100644 --- a/man/getConceptRecordCount.Rd +++ b/man/getConceptRecordCount.Rd @@ -57,6 +57,23 @@ tables, provide a schema with write privileges where temp tables can be created. \item{cohortDefinitionId}{Optional} +\item{stratifyByGender}{Logical Whether to stratify the counts by gender.} + +\item{stratifyByYear}{Logical Whether to stratify the counts by year.} + +\item{stratifyByYearQuarter}{Logical Whether to stratify the counts by quarter of the year.} + +\item{stratifyByYearMonth}{Logical Whether to stratify the counts by month of the year.} + +\item{stratifyByAgeGroup}{Logical Whether to stratify the counts by age group.} + +\item{stratifyByIncidence}{Logical Whether to limit the counts to first occurrences (incidence).} + +\item{getOverallCounts}{Logical Whether to include overall counts across all specified stratifications.} + +\item{domainTableName}{Vector of strings Domains to look for concept IDs. Supported domains +include "drug_exposure", "condition_occurrence", "procedure_occurrence", "measurement", "observation".} + \item{domain}{domains to look for concept id} \item{limitToCohort}{Do you want to limit to a cohort_definition_id?} diff --git a/man/getConceptSetOccurrenceDate.Rd b/man/getConceptSetOccurrenceDate.Rd index 95d1a93..f830bd1 100644 --- a/man/getConceptSetOccurrenceDate.Rd +++ b/man/getConceptSetOccurrenceDate.Rd @@ -27,6 +27,8 @@ schema name, for example 'cdm_data.dbo'.} \item{conceptIds}{An array of concept ids} +\item{subset}{options are "all", "first", "last"} + \item{limitToPersonDate}{Do you want to limit to person dates} \item{restrictToObservationPeriod}{(Default = TRUE) Do you want to restrict to Observation period? i.e diff --git a/man/instantiateCohortFromConceptSetExpression.Rd b/man/instantiateCohortFromConceptSetExpression.Rd index 9531520..4c40aec 100644 --- a/man/instantiateCohortFromConceptSetExpression.Rd +++ b/man/instantiateCohortFromConceptSetExpression.Rd @@ -35,6 +35,9 @@ schema name, for example 'cdm_data.dbo'.} \item{vocabularyDatabaseSchema}{The schema name of containing the vocabulary tables.} +\item{cohortDatabaseSchema}{Schema name where your user has write access (has CRUD privileges). This is the location, +of the cohort tables.} + \item{cohortId}{An integer value to identify the cohort.} \item{cohortTable}{the name of the cohort table}