diff --git a/NAMESPACE b/NAMESPACE index 6b0c56a..ecf0beb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,6 @@ export(convertConceptSetDataFrameToExpression) export(convertConceptSetExpressionToDataFrame) -export(createPheValuatorCohortsFromConceptSet) export(extractConceptSetsInCohortDefinition) export(extractConceptSetsInCohortDefinitionSet) export(findOrphanConcepts) diff --git a/R/ConvertConceptSetDataFrameToExpression.R b/R/ConvertConceptSetDataFrameToExpression.R index f8b37b8..9a45bc4 100644 --- a/R/ConvertConceptSetDataFrameToExpression.R +++ b/R/ConvertConceptSetDataFrameToExpression.R @@ -140,7 +140,7 @@ convertConceptSetDataFrameToExpression <- ) ) } - + if (selectAllDescendants) { conceptSetExpressionDataFrame <- dplyr::bind_rows( diff --git a/R/ConvertConceptSetExpressionToDataFrame.R b/R/ConvertConceptSetExpressionToDataFrame.R index bcf1edb..41628a8 100644 --- a/R/ConvertConceptSetExpressionToDataFrame.R +++ b/R/ConvertConceptSetExpressionToDataFrame.R @@ -56,43 +56,41 @@ convertConceptSetExpressionToDataFrame <- items2 <- list() - errorMessage <- - "Given concept set expression R list object does not conform to expected structure. \n - It is a vector that is more than 3 levels deep." - for (i in (1:length(items))) { df <- as.data.frame(items[[i]]) |> dplyr::tibble() - names(df) <- stringr::str_replace(string = tolower(names(df)), - pattern = "concept.", - replacement = "") - - if ('isExcluded' %in% names(df)) { + names(df) <- stringr::str_replace( + string = tolower(names(df)), + pattern = "concept.", + replacement = "" + ) + + if ("isExcluded" %in% names(df)) { df <- df |> dplyr::rename("is_excluded" = "isExcluded") - } else if ('isexcluded' %in% names(df)) { + } else if ("isexcluded" %in% names(df)) { df <- df |> dplyr::rename("is_excluded" = "isexcluded") } else { df <- df |> dplyr::mutate(is_excluded = FALSE) } - - if ('includeMapped' %in% names(df)) { + + if ("includeMapped" %in% names(df)) { df <- df |> dplyr::rename("include_mapped" = "includeMapped") - } else if ('includemapped' %in% names(df)) { + } else if ("includemapped" %in% names(df)) { df <- df |> dplyr::rename("include_mapped" = "includemapped") } else { df <- df |> dplyr::mutate(include_mapped = FALSE) } - - if ('includeDescendants' %in% names(df)) { + + if ("includeDescendants" %in% names(df)) { df <- df |> dplyr::rename("include_descendants" = "includeDescendants") - } else if ('includedescendants' %in% names(df)) { + } else if ("includedescendants" %in% names(df)) { df <- df |> dplyr::rename("include_descendants" = "includedescendants") } else { @@ -101,9 +99,9 @@ convertConceptSetExpressionToDataFrame <- } items2[[i]] <- df } - - conceptSetExpressionDetails <- dplyr::bind_rows(items2) |> - SqlRender::snakeCaseToCamelCaseNames() |> + + conceptSetExpressionDetails <- dplyr::bind_rows(items2) |> + SqlRender::snakeCaseToCamelCaseNames() |> tidyr::replace_na( replace = list( isExcluded = FALSE, diff --git a/R/ExtractConceptSetsInCohortDefinition.R b/R/ExtractConceptSetsInCohortDefinition.R index b001413..9b92555 100644 --- a/R/ExtractConceptSetsInCohortDefinition.R +++ b/R/ExtractConceptSetsInCohortDefinition.R @@ -34,66 +34,76 @@ extractConceptSetsInCohortDefinition <- } else { expression <- cohortExpression } - + # extract concept set expression from cohort expression conceptSetExpression <- extractConceptSetExpressionsFromCohortExpression(cohortExpression = expression) - + if (is.null(conceptSetExpression)) { stop("No concept set expressions found in cohort expression") } - + # use circe to render cohort sql and extract concept set sql circeRenderedSqlExpression <- - getCohortSqlFromCohortDefinition(cohortExpression = expression, - generateStats = TRUE) - + getCohortSqlFromCohortDefinition( + cohortExpression = expression, + generateStats = TRUE + ) + extractedConceptSetSql <- extractConceptSetsSqlFromCohortSql(cohortSql = circeRenderedSqlExpression) - + primaryCriterias <- expression$PrimaryCriteria$CriteriaList codeSetsIdsInPrimaryCriteria <- c() - + for (i in (1:length(primaryCriterias))) { codesets <- primaryCriterias[[i]][[1]] - + if (typeof(codesets) == "list") { if (!is.null(codesets$CodesetId)) { - codeSetsIdsInPrimaryCriteria <- c(codeSetsIdsInPrimaryCriteria, - codesets$CodesetId) |> + codeSetsIdsInPrimaryCriteria <- c( + codeSetsIdsInPrimaryCriteria, + codesets$CodesetId + ) |> unique() |> sort() } } else { if (names(codesets) == "CodesetId") { - codeSetsIdsInPrimaryCriteria <- c(codeSetsIdsInPrimaryCriteria, - as.double(codesets)) |> + codeSetsIdsInPrimaryCriteria <- c( + codeSetsIdsInPrimaryCriteria, + as.double(codesets) + ) |> unique() |> sort() } } } - + conceptSetExpression2 <- list() conceptSetExpressionMetaData <- list() - + for (j in (1:nrow(conceptSetExpression))) { - conceptSetExpression2[[j]] <- conceptSetExpression[j,] - + conceptSetExpression2[[j]] <- conceptSetExpression[j, ] + conceptSetDataFrame <- - convertConceptSetExpressionToDataFrame(conceptSetExpression = - conceptSetExpression2[[j]][1, ]$conceptSetExpression |> - RJSONIO::fromJSON(digits = 23)) + convertConceptSetExpressionToDataFrame( + conceptSetExpression = + conceptSetExpression2[[j]][1, ]$conceptSetExpression |> + RJSONIO::fromJSON(digits = 23) + ) conceptSetExpressionMetaData[[j]] <- - conceptSetExpression2[[j]][1,] |> + conceptSetExpression2[[j]][1, ] |> dplyr::select(conceptSetId) |> dplyr::mutate( hasStandard = as.integer( conceptSetDataFrame |> dplyr::filter( - stringr::str_detect(string = standardConcept, - pattern = "S") + stringr::str_detect( + string = standardConcept, + pattern = "S" + ) ) |> nrow() > 0 ), @@ -130,37 +140,45 @@ extractConceptSetsInCohortDefinition <- hasCondition = as.integer( conceptSetDataFrame |> dplyr::filter( - stringr::str_detect(string = tolower(domainId), - pattern = "condition") + stringr::str_detect( + string = tolower(domainId), + pattern = "condition" + ) ) |> nrow() > 0 ), countCondition = conceptSetDataFrame |> - dplyr::filter( - stringr::str_detect(string = tolower(domainId), - pattern = "condition") - ) |> - dplyr::select(conceptId) |> - dplyr::distinct() |> - nrow(), + dplyr::filter( + stringr::str_detect( + string = tolower(domainId), + pattern = "condition" + ) + ) |> + dplyr::select(conceptId) |> + dplyr::distinct() |> + nrow(), hasProcedure = as.integer( conceptSetDataFrame |> dplyr::filter( - stringr::str_detect(string = tolower(domainId), - pattern = "procedure") + stringr::str_detect( + string = tolower(domainId), + pattern = "procedure" + ) ) |> nrow() > 0 ), countProcedure = conceptSetDataFrame |> - dplyr::filter( - stringr::str_detect(string = tolower(domainId), - pattern = "procedure") - ) |> - dplyr::select(conceptId) |> - dplyr::distinct() |> - nrow(), + dplyr::filter( + stringr::str_detect( + string = tolower(domainId), + pattern = "procedure" + ) + ) |> + dplyr::select(conceptId) |> + dplyr::distinct() |> + nrow(), hasDevice = as.integer( conceptSetDataFrame |> dplyr::filter(stringr::str_detect( @@ -171,13 +189,13 @@ extractConceptSetsInCohortDefinition <- ), countDevice = conceptSetDataFrame |> - dplyr::filter(stringr::str_detect( - string = tolower(domainId), - pattern = "device" - )) |> - dplyr::select(conceptId) |> - dplyr::distinct() |> - nrow(), + dplyr::filter(stringr::str_detect( + string = tolower(domainId), + pattern = "device" + )) |> + dplyr::select(conceptId) |> + dplyr::distinct() |> + nrow(), hasDrug = as.integer( conceptSetDataFrame |> dplyr::filter(stringr::str_detect( @@ -188,30 +206,34 @@ extractConceptSetsInCohortDefinition <- ), countDrug = conceptSetDataFrame |> - dplyr::filter(stringr::str_detect( - string = tolower(domainId), - pattern = "drug" - )) |> - dplyr::select(conceptId) |> - dplyr::distinct() |> - nrow(), + dplyr::filter(stringr::str_detect( + string = tolower(domainId), + pattern = "drug" + )) |> + dplyr::select(conceptId) |> + dplyr::distinct() |> + nrow(), hasObservation = as.integer( conceptSetDataFrame |> dplyr::filter( - stringr::str_detect(string = tolower(domainId), - pattern = "observation") + stringr::str_detect( + string = tolower(domainId), + pattern = "observation" + ) ) |> nrow() > 0 ), countObservation = conceptSetDataFrame |> - dplyr::filter( - stringr::str_detect(string = tolower(domainId), - pattern = "observation") - ) |> - dplyr::select(conceptId) |> - dplyr::distinct() |> - nrow(), + dplyr::filter( + stringr::str_detect( + string = tolower(domainId), + pattern = "observation" + ) + ) |> + dplyr::select(conceptId) |> + dplyr::distinct() |> + nrow(), hasVisit = as.integer( conceptSetDataFrame |> dplyr::filter(stringr::str_detect( @@ -222,14 +244,13 @@ extractConceptSetsInCohortDefinition <- ), countVisit = conceptSetDataFrame |> - dplyr::filter(stringr::str_detect( - string = tolower(domainId), - pattern = "visit" - )) |> - dplyr::select(conceptId) |> - dplyr::distinct() |> - nrow() - , + dplyr::filter(stringr::str_detect( + string = tolower(domainId), + pattern = "visit" + )) |> + dplyr::select(conceptId) |> + dplyr::distinct() |> + nrow(), hasType = as.integer( conceptSetDataFrame |> dplyr::filter(stringr::str_detect( @@ -240,13 +261,13 @@ extractConceptSetsInCohortDefinition <- ), countType = conceptSetDataFrame |> - dplyr::filter(stringr::str_detect( - string = tolower(domainId), - pattern = "type" - )) |> - dplyr::select(conceptId) |> - dplyr::distinct() |> - nrow(), + dplyr::filter(stringr::str_detect( + string = tolower(domainId), + pattern = "type" + )) |> + dplyr::select(conceptId) |> + dplyr::distinct() |> + nrow(), isSelectedIncludeMapped = max(as.integer(conceptSetDataFrame$includeMapped)), isSelectedIncludeDescendants = max(as.integer( conceptSetDataFrame$includeDescendants @@ -274,8 +295,10 @@ extractConceptSetsInCohortDefinition <- numberOfUniqueConceptIdIsStandard = length( conceptSetDataFrame |> dplyr::filter( - stringr::str_detect(string = standardConcept, - pattern = "S") + stringr::str_detect( + string = standardConcept, + pattern = "S" + ) ) |> dplyr::pull(conceptId) |> unique() @@ -293,7 +316,7 @@ extractConceptSetsInCohortDefinition <- unique() ) ) - + conceptSetExpression2[[j]]$conceptSetExpressionSignature <- conceptSetDataFrame |> dplyr::select( @@ -306,14 +329,14 @@ extractConceptSetsInCohortDefinition <- dplyr::arrange(.data$conceptId) |> RJSONIO::toJSON(digits = 23, pretty = TRUE) } - + conceptSetExpressionMetaData <- dplyr::bind_rows(conceptSetExpressionMetaData) - + conceptSetExpression <- dplyr::bind_rows(conceptSetExpression2) |> dplyr::mutate(conceptSetUsedInEntryEvent = 0) - + if (length(codeSetsIdsInPrimaryCriteria) > 0) { conceptSetExpression <- conceptSetExpression |> dplyr::select(-conceptSetUsedInEntryEvent) |> @@ -324,28 +347,32 @@ extractConceptSetsInCohortDefinition <- by = "conceptSetId" ) } - + uniqueConceptSets <- conceptSetExpression |> dplyr::select(.data$conceptSetExpressionSignature) |> dplyr::distinct() |> dplyr::mutate(uniqueConceptSetId = dplyr::row_number()) - + conceptSetExpression <- conceptSetExpression |> dplyr::left_join(uniqueConceptSets, - by = "conceptSetExpressionSignature") |> + by = "conceptSetExpressionSignature" + ) |> dplyr::select(-.data$conceptSetExpressionSignature) - - data <- dplyr::inner_join(x = conceptSetExpression, - y = extractedConceptSetSql, - by = c("conceptSetId")) - + + data <- dplyr::inner_join( + x = conceptSetExpression, + y = extractedConceptSetSql, + by = c("conceptSetId") + ) + data <- data |> tidyr::replace_na(replace = list(conceptSetUsedInEntryEvent = 0)) - + data <- data |> dplyr::left_join(conceptSetExpressionMetaData, - by = "conceptSetId") - + by = "conceptSetId" + ) + return(data) } @@ -373,30 +400,32 @@ extractConceptSetExpressionsFromCohortExpression <- extractConceptSetsSqlFromCohortSql <- function(cohortSql) { sql <- gsub("with primary_events.*", "", cohortSql) - + # Find opening and closing parentheses: starts <- stringr::str_locate_all(sql, "\\(")[[1]][, 1] ends <- stringr::str_locate_all(sql, "\\)")[[1]][, 1] - + x <- rep(0, nchar(sql)) x[starts] <- 1 x[ends] <- -1 level <- cumsum(x) level0 <- which(level == 0) - + subQueryLocations <- stringr::str_locate_all(sql, "SELECT [0-9]+ as codeset_id")[[1]] subQueryCount <- nrow(subQueryLocations) conceptsetSqls <- vector("character", subQueryCount) conceptSetIds <- vector("integer", subQueryCount) - + temp <- list() for (i in 1:subQueryCount) { startForSubQuery <- min(starts[starts > subQueryLocations[i, 2]]) endForSubQuery <- min(level0[level0 > startForSubQuery]) subQuery <- - paste(stringr::str_sub(sql, subQueryLocations[i, 1], endForSubQuery), - "C") + paste( + stringr::str_sub(sql, subQueryLocations[i, 1], endForSubQuery), + "C" + ) conceptsetSqls[i] <- subQuery conceptSetIds[i] <- stringr::str_replace( subQuery, @@ -409,8 +438,10 @@ extractConceptSetsSqlFromCohortSql <- function(cohortSql) { replacement = "\\1" ) |> utils::type.convert(as.is = TRUE) - temp[[i]] <- tidyr::tibble(conceptSetId = conceptSetIds[i], - conceptSetSql = conceptsetSqls[i]) + temp[[i]] <- tidyr::tibble( + conceptSetId = conceptSetIds[i], + conceptSetSql = conceptsetSqls[i] + ) } return(dplyr::bind_rows(temp)) } @@ -425,11 +456,13 @@ getCohortSqlFromCohortDefinition <- } else { expression <- cohortExpression } - + # use circe to render cohort sql circeRCohortExpressionFromJson <- - CirceR::cohortExpressionFromJson(expressionJson = RJSONIO::toJSON(x = expression, - digits = 23)) + CirceR::cohortExpressionFromJson(expressionJson = RJSONIO::toJSON( + x = expression, + digits = 23 + )) circeRenderedSqlExpression <- CirceR::buildCohortQuery( expression = circeRCohortExpressionFromJson, diff --git a/R/ExtractConceptSetsInCohortDefinitionSet.R b/R/ExtractConceptSetsInCohortDefinitionSet.R index 4e382de..54eca09 100644 --- a/R/ExtractConceptSetsInCohortDefinitionSet.R +++ b/R/ExtractConceptSetsInCohortDefinitionSet.R @@ -42,15 +42,15 @@ extractConceptSetsInCohortDefinitionSet <- ) conceptSetsInCohortDefinition <- NULL conceptSetsInCohortDefinition <- - try(expr = extractConceptSetsInCohortDefinition(cohortExpression = cohortJsonAsList), - silent = TRUE + try( + expr = extractConceptSetsInCohortDefinition(cohortExpression = cohortJsonAsList), + silent = TRUE ) if (all( !is.null(conceptSetsInCohortDefinition), - !class(conceptSetsInCohortDefinition) == 'try-error' + !class(conceptSetsInCohortDefinition) == "try-error" )) { - conceptSets[[i]] <- conceptSetsInCohortDefinition |> dplyr::select(-.data$uniqueConceptSetId) |> dplyr::mutate(cohortId = cohort$cohortId) |> diff --git a/R/FindOrphanConceptsForConceptSetExpression.R b/R/FindOrphanConceptsForConceptSetExpression.R index a9c444b..675db88 100644 --- a/R/FindOrphanConceptsForConceptSetExpression.R +++ b/R/FindOrphanConceptsForConceptSetExpression.R @@ -38,7 +38,6 @@ findOrphanConceptsForConceptSetExpression <- connection = NULL, connectionDetails = NULL, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) { - resolvedConceptIds <- resolveConceptSetExpression( conceptSetExpression, diff --git a/R/GetConceptPrevalenceCounts.R b/R/GetConceptPrevalenceCounts.R index fcb9a5f..ae2db32 100644 --- a/R/GetConceptPrevalenceCounts.R +++ b/R/GetConceptPrevalenceCounts.R @@ -42,14 +42,16 @@ getConceptPrevalenceCounts <- function(conceptIds = NULL, connection <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection)) } - + conceptPrevalenceTables <- - DatabaseConnector::getTableNames(connection = connection, - databaseSchema = conceptPrevalenceSchema) |> + DatabaseConnector::getTableNames( + connection = connection, + databaseSchema = conceptPrevalenceSchema + ) |> tolower() - + conceptPrevalenceTablesExist <- FALSE - + if (all( "recommender_set" %in% conceptPrevalenceTables, "cp_master" %in% conceptPrevalenceTables, @@ -57,13 +59,13 @@ getConceptPrevalenceCounts <- function(conceptIds = NULL, )) { conceptPrevalenceTablesExist <- TRUE } - + if (!conceptPrevalenceTablesExist) { stop( "Concept Prevalence schema does not have the required concept prevalence tables. recommender_set, cp_master, recommended_blacklist" ) } - + tempTableName <- NULL if (!is.null(conceptIds)) { tempTableName <- loadTempConceptTable( @@ -72,7 +74,7 @@ getConceptPrevalenceCounts <- function(conceptIds = NULL, tempEmulationSchema = tempEmulationSchema ) } - + sql <- "SELECT cp.* FROM @concept_prevalence_schema.cp_master cp @@ -81,7 +83,7 @@ getConceptPrevalenceCounts <- function(conceptIds = NULL, @concept_id_table t ON cp.concept_id = t.concept_id };" - + data <- DatabaseConnector::renderTranslateQuerySql( connection = connection, @@ -90,7 +92,7 @@ getConceptPrevalenceCounts <- function(conceptIds = NULL, sql = sql, snakeCaseToCamelCase = TRUE ) |> dplyr::tibble() - + if (!is.null(conceptIds)) { dropTempConceptTable( connection = connection, diff --git a/R/GetConceptRecordCount.R b/R/GetConceptRecordCount.R index f2d9d37..8298cac 100644 --- a/R/GetConceptRecordCount.R +++ b/R/GetConceptRecordCount.R @@ -75,26 +75,28 @@ getConceptRecordCount <- function(conceptIds = NULL, connection <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection)) } - - uploadedConceptTable <- '' + + uploadedConceptTable <- "" if (!is.null(conceptIds)) { uploadedConceptTable <- - loadTempConceptTable(conceptIds = conceptIds, - connection = connection) + loadTempConceptTable( + conceptIds = conceptIds, + connection = connection + ) } - + domainInformation <- getDomainInformation(packageName = "ConceptSetDiagnostics") - + domainsWide <- domainInformation$wide |> dplyr::filter(domainTable %in% c(domainTableName)) |> dplyr::filter(.data$isEraTable == FALSE) - + domainsLong <- domainInformation$long |> dplyr::filter(domainTable %in% c(domainTableName)) |> dplyr::filter(eraTable == FALSE) # filtering out ERA tables because they are supposed to be derived tables, and counting them is double counting - + limitToCohort <- FALSE if (all( !is.null(cohortDatabaseSchema), @@ -103,7 +105,7 @@ getConceptRecordCount <- function(conceptIds = NULL, )) { limitToCohort <- TRUE } - + # REASON for many SQL --DISTINCT subject_count cannot be computed from aggregation query of calendar month level data sql <- " @@ -208,7 +210,7 @@ getConceptRecordCount <- function(conceptIds = NULL, {@use_date_quarter} ? {DATEPART(qq, @domain_start_date),}}; " - + iterations <- domainsLong |> tidyr::crossing(dplyr::tibble(includeConceptId = c("Y", "N", ""))) |> tidyr::crossing(dplyr::tibble(genderConceptId = c(0, 8507, 8532))) |> @@ -244,93 +246,97 @@ getConceptRecordCount <- function(conceptIds = NULL, tidyr::crossing(dplyr::tibble(useAgeGroup = c("Y", "N"))) |> dplyr::arrange() |> dplyr::mutate(combination = dplyr::row_number()) - + if (!stratifyByGender) { - iterations <- iterations |> + iterations <- iterations |> dplyr::filter(!genderConceptId %in% c(8507, 8532)) } - + if (!stratifyByYear) { - iterations <- iterations |> - dplyr::filter(calendarType != 'Y') + iterations <- iterations |> + dplyr::filter(calendarType != "Y") } - + if (!stratifyByYearQuarter) { - iterations <- iterations |> - dplyr::filter(calendarType != 'Q') + iterations <- iterations |> + dplyr::filter(calendarType != "Q") } - + if (!stratifyByYearMonth) { - iterations <- iterations |> - dplyr::filter(calendarType != 'M') + iterations <- iterations |> + dplyr::filter(calendarType != "M") } - + if (!stratifyByAgeGroup) { - iterations <- iterations |> - dplyr::filter(useAgeGroup != 'Y') + iterations <- iterations |> + dplyr::filter(useAgeGroup != "Y") } - + if (!stratifyByIncidence) { - iterations <- iterations |> - dplyr::filter(incidence != 'Y') + iterations <- iterations |> + dplyr::filter(incidence != "Y") } - + if (!getOverallCounts) { - iterations <- iterations |> - dplyr::filter(includeConceptId == 'Y') + iterations <- iterations |> + dplyr::filter(includeConceptId == "Y") } - + existingOutput <- c() - + for (i in (1:nrow(iterations))) { rowData <- iterations[i, ] - + extraMessage <- - paste0("Working on ", - rowData$domainTable, - ".", - rowData$domainField, - ".") + paste0( + "Working on ", + rowData$domainTable, + ".", + rowData$domainField, + "." + ) progress <- (i / nrow(iterations)) * 100 message <- - sprintf("\rProgress: %d/%d (%0.2f%%)", - i, - nrow(iterations), - progress) - + sprintf( + "\rProgress: %d/%d (%0.2f%%)", + i, + nrow(iterations), + progress + ) + ParallelLogger::logInfo(message) - + showProgress( currentIteration = i, totalIterations = nrow(iterations), extraMessage = extraMessage ) - + sqlRendered <- SqlRender::render( sql = sql, cdm_database_schema = cdmDatabaseSchema, vocabulary_database_schema = vocabularyDatabaseSchema, concept_id_universe = uploadedConceptTable, use_group_by = any( - rowData$includeConceptId == 'Y', + rowData$includeConceptId == "Y", rowData$genderConceptId > 0, - rowData$useDateYear == 'Y', - rowData$useDateQuarter == 'Y', - rowData$useDateMonth == 'Y', - rowData$useAgeGroup == 'Y' + rowData$useDateYear == "Y", + rowData$useDateQuarter == "Y", + rowData$useDateMonth == "Y", + rowData$useAgeGroup == "Y" ), - include_concept_id = (rowData$includeConceptId == 'Y'), + include_concept_id = (rowData$includeConceptId == "Y"), domain_concept_id = rowData$domainField, domain_start_date = domainsWide |> dplyr::filter(domainTable == rowData$domainTable) |> dplyr::pull(domainStartDate), domain_table = rowData$domainTable, gender_concept_id = (rowData$genderConceptId > 0), - incidence = (rowData$incidence == 'Y'), + incidence = (rowData$incidence == "Y"), is_source_field = (rowData$isSourceField), - use_date_year = (rowData$useDateYear == 'Y'), - use_date_quarter = (rowData$useDateQuarter == 'Y'), - use_date_month = (rowData$useDateMonth == 'Y'), + use_date_year = (rowData$useDateYear == "Y"), + use_date_quarter = (rowData$useDateQuarter == "Y"), + use_date_month = (rowData$useDateMonth == "Y"), domain_table_short = rowData$domainTableShort, domain_field_short = rowData$domainFieldShort, calendar_type = rowData$calendarType, @@ -338,22 +344,22 @@ getConceptRecordCount <- function(conceptIds = NULL, cohort_database_schema = cohortDatabaseSchema, cohort_table_name = cohortTableName, cohort_definition_id = cohortDefinitionId, - use_age_group = (rowData$useAgeGroup == 'Y') + use_age_group = (rowData$useAgeGroup == "Y") ) - + # Regular expression to find a comma followed by any whitespace (including line breaks) and a semicolon regexPattern <- ",[\\s\\n\\r]*;" - + # Replace the pattern with just a semicolon sqlRendered <- gsub(regexPattern, ";", sqlRendered, perl = TRUE) - + sqlTranslated <- SqlRender::translate( sql = sqlRendered, targetDialect = connection@dbms, tempEmulationSchema = tempEmulationSchema ) - + DatabaseConnector::executeSql( connection = connection, sql = sqlTranslated, @@ -361,18 +367,20 @@ getConceptRecordCount <- function(conceptIds = NULL, reportOverallTime = FALSE, profile = FALSE ) - + output <- DatabaseConnector::querySql( connection = connection, sql = "SELECT * FROM #concept_count_table;", snakeCaseToCamelCase = TRUE ) - - existingOutput <- dplyr::bind_rows(existingOutput, - output) |> + + existingOutput <- dplyr::bind_rows( + existingOutput, + output + ) |> dplyr::tibble() } - + DatabaseConnector::renderTranslateExecuteSql( connection = connection, profile = FALSE, @@ -381,7 +389,7 @@ getConceptRecordCount <- function(conceptIds = NULL, sql = "DROP TABLE IF EXISTS #concept_count_table; DROP TABLE IF EXISTS #concept_id_unv_2;" ) - + existingOutput <- existingOutput |> dplyr::inner_join( domainInformation$long |> @@ -394,11 +402,11 @@ getConceptRecordCount <- function(conceptIds = NULL, by = c("domainTableShort", "domainFieldShort") ) |> dplyr::select(-"domainFieldShort", -"domainTableShort") - + if (!is.null(minCellCount)) { existingOutput <- existingOutput |> dplyr::filter(subjectCount > minCellCount) } - + return(existingOutput) } diff --git a/R/GetConceptSetOccurrenceDate.R b/R/GetConceptSetOccurrenceDate.R index cce229b..d6820fd 100644 --- a/R/GetConceptSetOccurrenceDate.R +++ b/R/GetConceptSetOccurrenceDate.R @@ -30,17 +30,15 @@ #' is provided, in which case a new connection will be opened at the start #' of the function, and closed when the function finishes. #' -#' @template VocabularyDatabaseSchema -#' #' @template CdmDatabaseSchema #' #' @template TempEmulationSchema -#' +#' #' @param restrictToObservationPeriod (Default = TRUE) Do you want to restrict to Observation period? i.e #' Cohort dates are restricted to observation period. #' #' @param conceptIds An array of concept ids -#' +#' #' @param limitToPersonDate Do you want to limit to person dates #' #' @return @@ -49,7 +47,6 @@ #' @export getConceptSetOccurrenceDate <- function(connection, cdmDatabaseSchema, - vocabularyDatabaseSchema = cdmDatabaseSchema, conceptIds, subset = c("all"), limitToPersonDate = TRUE, @@ -58,32 +55,30 @@ getConceptSetOccurrenceDate <- function(connection, subset <- tolower(subset) |> stringr::str_trim() |> stringr::str_squish() - + checkmate::assertChoice( x = subset, choices = c("all", "first", "last"), null.ok = FALSE ) - + checkmate::assertIntegerish( x = conceptIds, lower = 0, any.missing = FALSE, min.len = 1 ) - + tempTableName <- - paste0("#t", (as.numeric(as.POSIXlt(Sys.time( - - )))) * 100000) - - + paste0("#t", (as.numeric(as.POSIXlt(Sys.time()))) * 100000) + + tempConceptTableName <- loadTempConceptTable( conceptIds = conceptIds |> unique(), connection = connection, tempEmulationSchema = tempEmulationSchema ) - + sql <- SqlRender::loadRenderTranslateSql( "GetConceptSetExpressionOccurrenceDates.sql", packageName = utils::packageName(), @@ -103,6 +98,6 @@ getConceptSetOccurrenceDate <- function(connection, progressBar = TRUE, reportOverallTime = FALSE ) - + return(tempTableName) } diff --git a/R/InstantiateCohortFromConceptSetExpression.R b/R/InstantiateCohortFromConceptSetExpression.R index d7b010b..a9b53bb 100644 --- a/R/InstantiateCohortFromConceptSetExpression.R +++ b/R/InstantiateCohortFromConceptSetExpression.R @@ -59,7 +59,7 @@ instantiateCohortFromConceptSetExpression <- connection <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection)) } - + conceptIds <- ConceptSetDiagnostics::resolveConceptSetExpression( conceptSetExpression = conceptSetExpression, @@ -70,7 +70,7 @@ instantiateCohortFromConceptSetExpression <- dplyr::distinct() |> dplyr::arrange(conceptId) |> dplyr::pull(conceptId) - + tempTableWithConceptDates <- getConceptSetOccurrenceDate( connection = connection, @@ -80,12 +80,10 @@ instantiateCohortFromConceptSetExpression <- tempEmulationSchema = tempEmulationSchema, conceptIds = conceptIds ) - + tempCohortTableName <- - paste0("#t", (as.numeric(as.POSIXlt(Sys.time( - - )))) * 100000) - + paste0("#t", (as.numeric(as.POSIXlt(Sys.time()))) * 100000) + sql <- SqlRender::loadRenderTranslateSql( "ConvertConceptIdDatesTableToCohort.sql", packageName = utils::packageName(), @@ -103,7 +101,7 @@ instantiateCohortFromConceptSetExpression <- progressBar = TRUE, reportOverallTime = FALSE ) - + CohortAlgebra:::eraFyCohorts( connection = connection, sourceCohortDatabaseSchema = NULL, diff --git a/R/PerformStringSearchForConcepts.R b/R/PerformStringSearchForConcepts.R index 0056a17..a17f13a 100644 --- a/R/PerformStringSearchForConcepts.R +++ b/R/PerformStringSearchForConcepts.R @@ -116,8 +116,8 @@ performStringSearchForConcepts <- connection = connection, snakeCaseToCamelCase = TRUE ) |> - dplyr::tibble() |> - dplyr::mutate(searchString = eligibleToBeSearched[[i]]) |> + dplyr::tibble() |> + dplyr::mutate(searchString = eligibleToBeSearched[[i]]) |> dplyr::relocate(searchString) } @@ -128,10 +128,10 @@ performStringSearchForConcepts <- data <- data |> dplyr::bind_rows() |> dplyr::distinct() - + missingInResults <- setdiff(searchPhrases, data$searchString |> unique()) - + if (length(missingInResults) > 0) { warning(paste0( "The following search phrases did not yield any results: ", diff --git a/R/Private.R b/R/Private.R index 6b48b71..71f7b16 100644 --- a/R/Private.R +++ b/R/Private.R @@ -65,16 +65,16 @@ hasData <- function(data) { getUniqueString <- function(n = 7) { # create a vector of all alphanumeric characters alphanumericChars <- c(letters, 0:9) - + # generate the first character from the set of letters only firstChar <- sample(c(letters), 1) - + # generate the remaining characters from the set of all alphanumeric characters remainingChars <- sample(alphanumericChars, n, replace = TRUE) - + # combine the first character with the remaining characters uniqueString <- paste0(firstChar, paste0(remainingChars, collapse = "")) - + return(tolower(uniqueString)) } @@ -137,11 +137,9 @@ dropTempConceptTable <- #' Get domain information #' -#' @param packageName e.g. 'CohortDiagnostics' -#' #' @return #' A list with two tibble data frame objects with domain information represented in wide and long format respectively. -getDomainInformation <- function(packageName = NULL) { +getDomainInformation <- function() { domains <- readr::read_csv( system.file(file.path("csv", "domains.csv"), @@ -306,11 +304,13 @@ showProgress <- extraMessage = NULL) { progress <- (currentIteration / totalIterations) * 100 message <- - sprintf("\rProgress: %d/%d (%0.2f%%)", - currentIteration, - totalIterations, - progress) - + sprintf( + "\rProgress: %d/%d (%0.2f%%)", + currentIteration, + totalIterations, + progress + ) + if (!is.null(extraMessage)) { message <- paste0(message, ". ", extraMessage) } @@ -330,8 +330,7 @@ appendPrefixToColNames <- function(dataFrame, prefix) { sapply(colnames(dataFrame), function(colName) { paste0(prefix, capitalizeFirstLetter(colName)) }) - - return(dataFrame |> - dplyr::tibble()) -} + return(dataFrame |> + dplyr::tibble()) +} diff --git a/R/ResolveConceptSetExpression.R b/R/ResolveConceptSetExpression.R index b1e3b59..53dfce3 100644 --- a/R/ResolveConceptSetExpression.R +++ b/R/ResolveConceptSetExpression.R @@ -38,7 +38,7 @@ resolveConceptSetExpression <- function(conceptSetExpression, connection <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection)) } - + # convert concept set expression R object (list) to data frame conceptSetExpressionDataFrame <- convertConceptSetExpressionToDataFrame( @@ -46,14 +46,14 @@ resolveConceptSetExpression <- function(conceptSetExpression, conceptSetExpression = conceptSetExpression, tempEmulationSchema = tempEmulationSchema ) - + # get all descendant concept ids (as dataframe) for concepts that have # includeDescendants selected in conceptSetExpression conceptIdsWithIncludeDescendants <- conceptSetExpressionDataFrame |> dplyr::filter(.data$includeDescendants == TRUE) |> dplyr::pull(.data$conceptId) - + if (length(conceptIdsWithIncludeDescendants) == 0) { # get all resolved concept Ids resolvedConceptIds <- @@ -72,12 +72,12 @@ resolveConceptSetExpression <- function(conceptSetExpression, tempEmulationSchema = tempEmulationSchema, vocabularyDatabaseSchema = vocabularyDatabaseSchema ) - + # get all conceptIds (as dataframe) that are excluded in concept set expression excludedConceptIds <- conceptSetExpressionDataFrame |> dplyr::filter(.data$isExcluded == TRUE) |> dplyr::select(.data$conceptId) - + # get all conceptIds (as dataframe) that are excluded in concept set expression with descendants excludedConceptIdsWithDescendants <- descendantConcepts |> dplyr::filter(.data$ancestorConceptId %in% ( @@ -92,7 +92,7 @@ resolveConceptSetExpression <- function(conceptSetExpression, )) |> dplyr::select(.data$descendantConceptId) |> dplyr::distinct() - + # conceptIds in conceptSetExpression table conceptIdsInConceptSetExpressionTableToBeIncluded <- union( @@ -103,8 +103,8 @@ resolveConceptSetExpression <- function(conceptSetExpression, dplyr::pull(.data$descendantConceptId) |> unique() ) |> unique() - - + + conceptIdsInConceptSetExpressionTableToBeExcluded <- union( x = excludedConceptIds |> @@ -115,12 +115,14 @@ resolveConceptSetExpression <- function(conceptSetExpression, unique() ) |> unique() - + # removed all excluded conceptIds including those with descendants == TRUE resolvedConceptIdArray <- - setdiff(x = conceptIdsInConceptSetExpressionTableToBeIncluded, - y = conceptIdsInConceptSetExpressionTableToBeExcluded) - + setdiff( + x = conceptIdsInConceptSetExpressionTableToBeIncluded, + y = conceptIdsInConceptSetExpressionTableToBeExcluded + ) + # get all resolved concept Ids resolvedConceptIds <- dplyr::union( conceptSetExpressionDataFrame |> @@ -134,7 +136,7 @@ resolveConceptSetExpression <- function(conceptSetExpression, dplyr::pull(.data$conceptId) |> unique() } - + conceptIdDetails <- getConceptIdDetails( conceptIds = resolvedConceptIds, @@ -142,6 +144,6 @@ resolveConceptSetExpression <- function(conceptSetExpression, tempEmulationSchema = tempEmulationSchema, vocabularyDatabaseSchema = vocabularyDatabaseSchema ) - + return(conceptIdDetails) } diff --git a/R/ResolveConceptSetsInCohortExpression.R b/R/ResolveConceptSetsInCohortExpression.R index ab0ea41..1341e81 100644 --- a/R/ResolveConceptSetsInCohortExpression.R +++ b/R/ResolveConceptSetsInCohortExpression.R @@ -46,15 +46,17 @@ resolveConceptSetsInCohortExpression <- function(cohortExpression, DatabaseConnector::renderTranslateQuerySql( connection = connection, sql = sql, - vocabulary_database_schema = vocabularyDatabaseSchema, + vocabulary_database_schema = vocabularyDatabaseSchema, snakeCaseToCamelCase = TRUE ) } - resolvedConceptSet <- dplyr::bind_rows(resolvedConceptSet) |> - dplyr::arrange(codesetId, - conceptId) |> + resolvedConceptSet <- dplyr::bind_rows(resolvedConceptSet) |> + dplyr::arrange( + codesetId, + conceptId + ) |> dplyr::distinct() - + return(resolvedConceptSet) } diff --git a/R/XcreatePheValuatorCohortsFromConceptSet.R b/R/XcreatePheValuatorCohortsFromConceptSet.R deleted file mode 100644 index 25fa359..0000000 --- a/R/XcreatePheValuatorCohortsFromConceptSet.R +++ /dev/null @@ -1,331 +0,0 @@ - - - -#' @export -#' -createPheValuatorCohortsFromConceptSet <- function(connection, - cdmDatabaseSchema, - vocabularyDatabaseSchema = cdmDatabaseSchema, - conceptSetExpression, - tempEmulationSchema = NULL) { - conceptIds <- ConceptSetDiagnostics::resolveConceptSetExpression(connection = connection, - conceptSetExpression = conceptSetExpress) - - tempTableName <- - paste0("#t", (as.numeric(as.POSIXlt(Sys.time( - )))) * 100000) - - invisible(utils::capture.output( - DatabaseConnector::insertTable( - connection = connection, - tableName = tempTableName, - dropTableIfExists = TRUE, - tempTable = TRUE, - tempEmulationSchema = tempEmulationSchema, - data = conceptIdTable, - camelCaseToSnakeCase = TRUE, - bulkLoad = TRUE, - progressBar = TRUE, - createTable = TRUE - ), - file = nullfile() - )) - - ParallelLogger::logInfo("Creating event cohorts for concept set expression -1") - ConceptSetDiagnostics::instantiateCohortFromConceptSetExpression( - connection = connection, - cdmDatabaseSchema = cdmDatabaseSchema, - vocabularyDatabaseSchema = vocabularyDatabaseSchema, - cohortDatabaseSchema = targetCohortDatabaseSchema, - cohortId = -1, - cohortTable = targetCohortTableName, - restrictToObservationPeriod = TRUE, - conceptSetExpression = RJSONIO::fromJSON(content = conceptSetExpression, - digits = 23) - ) - - ParallelLogger::logInfo("Creating prevalence cohort 2") - ### Prevalence cohort is defined as first occurrence of eventCohort - ## prevalenceCohort (2) "First occurrence of conceptSetExpression till end of continuous observation" - sql <- "DELETE FROM @cohort_database_schema.@cohort_table - WHERE cohort_definition_id IN (2); - - INSERT INTO @cohort_database_schema.@cohort_table (cohort_definition_id, subject_id, - cohort_start_date, cohort_end_date) - SELECT 2 cohort_definition_id, - c.subject_id, - min(c.cohort_start_date) cohort_start_date, - max(op.observation_period_end_date) cohort_end_date - FROM @cohort_database_schema.@cohort_table c - INNER JOIN @cdm_database_schema.observation_period op - ON c.subject_id = op.person_id - AND c.cohort_start_date >= op.observation_period_start_date - AND c.cohort_end_date <= op.observation_period_end_date - WHERE cohort_definition_id IN (-1) - GROUP BY subject_id;" - DatabaseConnector::renderTranslateExecuteSql( - connection = connection, - sql = sql, - cohort_database_schema = targetCohortDatabaseSchema, - cdm_database_schema = cdmDatabaseSchema, - cohort_table = targetCohortTableName, - progressBar = FALSE, - reportOverallTime = FALSE - ) - - ParallelLogger::logInfo("Creating xSens 3") - ### xSens cohort is an event cohort of visit dates that started all days after eventCohort - sql <- " DELETE FROM @cohort_database_schema.@cohort_table - WHERE cohort_definition_id = -3; - - INSERT INTO @cohort_database_schema.@cohort_table ( - cohort_definition_id, - subject_id, - cohort_start_date, - cohort_end_date - ) - - SELECT -3 cohort_definition_id, - subject_id, - min(cohort_start_date) cohort_start_date, - cohort_end_date - FROM ( - SELECT v.person_id subject_id, - v.visit_start_date cohort_start_date, - max(CASE - WHEN DATEADD(d, 365, v.visit_start_date) > op.observation_period_end_date - THEN op.observation_period_end_date - ELSE DATEADD(d, 365, v.visit_start_date) - END) cohort_end_date - FROM @cdm_database_schema.visit_occurrence v - INNER JOIN ( - SELECT subject_id, - min(cohort_start_date) cohort_start_date - FROM @cohort_database_schema.@cohort_table - WHERE cohort_definition_id IN (- 1) - GROUP BY subject_id - ) p - ON v.person_id = p.subject_id - AND v.visit_start_date > p.cohort_start_date - INNER JOIN @cdm_database_schema.observation_period op - ON v.person_id = op.person_id - AND v.visit_start_date >= op.observation_period_start_date - AND v.visit_start_date <= op.observation_period_end_date - GROUP BY v.person_id, - v.visit_start_date - ) f - GROUP BY subject_id, - cohort_end_date;" - DatabaseConnector::renderTranslateExecuteSql( - connection = connection, - sql = sql, - cdm_database_schema = cdmDatabaseSchema, - cohort_database_schema = targetCohortDatabaseSchema, - cohort_table = targetCohortTableName, - progressBar = FALSE, - reportOverallTime = FALSE - ) - CohortAlgebra::unionCohorts( - connection = connection, - sourceCohortDatabaseSchema = targetCohortDatabaseSchema, - sourceCohortTable = targetCohortTableName, - targetCohortDatabaseSchema = targetCohortDatabaseSchema, - targetCohortTable = targetCohortTableName, - oldToNewCohortId = dplyr::tibble(oldCohortId = -3, - newCohortId = 3), - tempEmulationSchema = tempEmulationSchema, - purgeConflicts = TRUE - ) - - ParallelLogger::logInfo("Creating base population 4") - # xBasePopulation (4) - ## LogicDescription - ### evaluationPopulation or Base Population is an event cohort indexed on any visit in a subject who - ### never belonged to the xSens cohort, OR indexed on visit that starts after - ### the first occurrence of event cohort. All events will have atleast 365days of observation days after and - ### cohort_end_date is cohort_start_date + 365. - sql <- " DELETE FROM @cohort_database_schema.@cohort_table - WHERE cohort_definition_id = -4; - - INSERT INTO @cohort_database_schema.@cohort_table (cohort_definition_id, - subject_id, - cohort_start_date, - cohort_end_date) - SELECT DISTINCT -4 cohort_definition_id, - subject_id, - min(cohort_start_date) cohort_start_date, - cohort_end_date - FROM - ( - SELECT v.person_id subject_id, - visit_start_date cohort_start_date, - max(CASE - WHEN DATEADD(d, 365, v.visit_start_date) > op.observation_period_end_date - THEN op.observation_period_end_date - ELSE DATEADD(d, 365, v.visit_start_date) - END) cohort_end_date - FROM - @cdm_database_schema.visit_occurrence v - LEFT JOIN - ( - SELECT DISTINCT subject_id - FROM @cohort_database_schema.@cohort_table - WHERE cohort_definition_id = -1 - ) rm - ON v.person_id = rm.subject_id - INNER JOIN @cdm_database_schema.observation_period op - ON v.person_id = op.person_id - AND v.visit_start_date >= op.observation_period_start_date - AND v.visit_end_date <= op.observation_period_end_date - AND DATEADD(d, 365, v.visit_start_date) <= op.observation_period_end_date - WHERE rm.subject_id IS NULL - GROUP BY v.person_id, - v.visit_start_date - - UNION ALL - - SELECT v.person_id subject_id, - v.visit_start_date cohort_start_date, - max(CASE - WHEN DATEADD(d, 365, v.visit_start_date) > op.observation_period_end_date - THEN op.observation_period_end_date - ELSE DATEADD(d, 365, v.visit_start_date) - END) cohort_end_date - FROM - @cdm_database_schema.visit_occurrence v - INNER JOIN - ( - SELECT - subject_id, - min(cohort_start_date) cohort_start_date - FROM @cohort_database_schema.@cohort_table - WHERE cohort_definition_id IN (-1) - GROUP BY subject_id - ) p - ON v.person_id = p.subject_id - AND v.visit_start_date = p.cohort_start_date - INNER JOIN @cdm_database_schema.observation_period op - ON v.person_id = op.person_id - AND v.visit_start_date >= op.observation_period_start_date - AND v.visit_end_date <= op.observation_period_end_date - GROUP BY v.person_id, - v.visit_start_date - ) f - GROUP BY subject_id, - cohort_end_date;" - DatabaseConnector::renderTranslateExecuteSql( - connection = connection, - sql = sql, - cdm_database_schema = cdmDatabaseSchema, - cohort_database_schema = targetCohortDatabaseSchema, - cohort_table = targetCohortTableName, - # condition on the same day as visit with 365 days post observation days - progressBar = FALSE, - reportOverallTime = FALSE - ) - CohortAlgebra::unionCohorts( - connection = connection, - sourceCohortDatabaseSchema = targetCohortDatabaseSchema, - sourceCohortTable = targetCohortTableName, - targetCohortDatabaseSchema = targetCohortDatabaseSchema, - targetCohortTable = targetCohortTableName, - oldToNewCohortId = dplyr::tibble(oldCohortId = -4, - newCohortId = 4), - tempEmulationSchema = tempEmulationSchema, - purgeConflicts = TRUE - ) - - ParallelLogger::logInfo("Creating xSpec 5 - 10") - # xSpec1 (> =5 <= 10 ) - # "First visit after the first event + satisfies additional criteria such as two (or more) events of eventCohort - # in a window of time prior (generally 21 to 1)" - - sql <- "DELETE - FROM @cohort_database_schema.@cohort_table - WHERE cohort_definition_id = -@xSpec_id; - - INSERT INTO @cohort_database_schema.@cohort_table ( - cohort_definition_id, - subject_id, - cohort_start_date, - cohort_end_date - ) - SELECT -@xSpec_id cohort_definition_id, - subject_id, - min(cohort_start_date) cohort_start_date, - cohort_end_date - FROM ( - SELECT v.person_id subject_id, - v.visit_start_date cohort_start_date, - max(CASE - WHEN DATEADD(d, 365, v.visit_end_date) > op.observation_period_end_date - THEN op.observation_period_end_date - ELSE DATEADD(d, 365, v.visit_end_date) - END) cohort_end_date - FROM @cdm_database_schema.visit_occurrence v - INNER JOIN @cdm_database_schema.observation_period op - ON v.person_id = op.person_id - AND v.visit_start_date >= op.observation_period_start_date - AND v.visit_start_date <= op.observation_period_end_date - AND DATEADD(d, 365, v.visit_start_date) <= op.observation_period_end_date - INNER JOIN ( - SELECT subject_id, - min(cohort_start_date) cohort_start_date - FROM @cohort_database_schema.@cohort_table - WHERE cohort_definition_id IN (- 1) - GROUP BY subject_id - ) p - ON v.person_id = p.subject_id - AND DATEADD(DAY, @offset, v.visit_start_date) <= p.cohort_start_date - AND v.visit_start_date > p.cohort_start_date - INNER JOIN @cohort_database_schema.@cohort_table e - ON v.person_id = e.subject_id - AND DATEADD(DAY, @offset, v.visit_start_date) <= e.cohort_start_date - AND v.visit_start_date > e.cohort_start_date - WHERE e.cohort_definition_id = - 1 - GROUP BY v.person_id, - v.visit_start_date - HAVING count(DISTINCT e.cohort_start_date) >= @min_days_overlap - ) f - GROUP BY subject_id, - cohort_end_date;" - - xSpecIds <- c(5:10) |> sort() - for (j in (1:length(xSpecIds))) { - DatabaseConnector::renderTranslateExecuteSql( - connection = connection, - sql = sql, - cdm_database_schema = cdmDatabaseSchema, - cohort_database_schema = targetCohortDatabaseSchema, - cohort_table = targetCohortTableName, - offset = offsetCohortStartDate, - xSpec_id = xSpecIds[[j]], - min_days_overlap = j + 1, - progressBar = FALSE, - reportOverallTime = FALSE - ) - CohortAlgebra::unionCohorts( - connection = connection, - sourceCohortDatabaseSchema = targetCohortDatabaseSchema, - sourceCohortTable = targetCohortTableName, - targetCohortDatabaseSchema = targetCohortDatabaseSchema, - targetCohortTable = targetCohortTableName, - oldToNewCohortId = dplyr::tibble(oldCohortId = xSpecIds[[j]] * -1, - newCohortId = xSpecIds[[j]]), - tempEmulationSchema = tempEmulationSchema, - purgeConflicts = TRUE - ) - } - - DatabaseConnector::renderTranslateExecuteSql( - connection = connection, - sql = " DELETE FROM @target_cohort_database_schema.@target_cohort_table - WHERE cohort_definition_id < 0; - UPDATE STATISTICS @target_cohort_database_schema.@target_cohort_table;", - target_cohort_database_schema = targetCohortDatabaseSchema, - target_cohort_table = targetCohortTableName, - progressBar = FALSE, - reportOverallTime = FALSE - ) - DatabaseConnector::disconnect(connection = connection) -} diff --git a/man/getConceptSetOccurrenceDate.Rd b/man/getConceptSetOccurrenceDate.Rd index e9336ee..95d1a93 100644 --- a/man/getConceptSetOccurrenceDate.Rd +++ b/man/getConceptSetOccurrenceDate.Rd @@ -7,7 +7,6 @@ getConceptSetOccurrenceDate( connection, cdmDatabaseSchema, - vocabularyDatabaseSchema = cdmDatabaseSchema, conceptIds, subset = c("all"), limitToPersonDate = TRUE, @@ -26,8 +25,6 @@ of the function, and closed when the function finishes.} Note that for SQL Server, this should include both the database and schema name, for example 'cdm_data.dbo'.} -\item{vocabularyDatabaseSchema}{The schema name of containing the vocabulary tables.} - \item{conceptIds}{An array of concept ids} \item{limitToPersonDate}{Do you want to limit to person dates} diff --git a/man/getDomainInformation.Rd b/man/getDomainInformation.Rd index b0ddec5..6dab22e 100644 --- a/man/getDomainInformation.Rd +++ b/man/getDomainInformation.Rd @@ -4,10 +4,7 @@ \alias{getDomainInformation} \title{Get domain information} \usage{ -getDomainInformation(packageName = NULL) -} -\arguments{ -\item{packageName}{e.g. 'CohortDiagnostics'} +getDomainInformation() } \value{ A list with two tibble data frame objects with domain information represented in wide and long format respectively.