Skip to content

Commit

Permalink
j
Browse files Browse the repository at this point in the history
  • Loading branch information
gowthamrao committed Nov 18, 2024
1 parent 188cb28 commit 302e719
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 98 deletions.
112 changes: 53 additions & 59 deletions R/GetConceptRecordCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,28 +91,30 @@ getConceptRecordCount <- function(conceptIds = NULL,
)
on.exit(DatabaseConnector::disconnect(connection), add = TRUE)
}

uploadedConceptTable <- ""
if (!is.null(conceptIds)) {
uploadedConceptTable <-
loadTempConceptTable(
conceptIds = conceptIds,
connection = connection
connection = connection,
bulkLoad = Sys.getenv("DATABASE_CONNECTOR_BULK_UPLOAD"),
tempEmulationSchema = tempEmulationSchema
)
}

domainInformation <-
getDomainInformation()

domainsWide <- domainInformation$wide |>
dplyr::filter(.data$domainTable %in% c(domainTableName)) |>
dplyr::filter(.data$isEraTable == FALSE)

domainsLong <- domainInformation$long |>
dplyr::filter(.data$domainTable %in% c(domainTableName)) |>
dplyr::filter(.data$eraTable == FALSE)
# filtering out ERA tables because they are supposed to be derived tables, and counting them is double counting

limitToCohort <- FALSE
if (all(
!is.null(cohortDatabaseSchema),
Expand All @@ -121,25 +123,24 @@ getConceptRecordCount <- function(conceptIds = NULL,
)) {
limitToCohort <- TRUE
}

# REASON for many SQL --DISTINCT subject_count cannot be computed from aggregation query of calendar month level data
sql <- "
DROP TABLE IF EXISTS #concept_count_table;
{@concept_id_universe != ''} ? {
DROP TABLE IF EXISTS #concept_id_unv_2;
CREATE TABLE #concept_id_unv_2 as
(
SELECT DISTINCT u.concept_id
FROM @concept_id_universe u
INNER JOIN (
SELECT concept_id
FROM @vocabulary_database_schema.CONCEPT
WHERE concept_id > 0
) c
ON u.concept_id = c.concept_id
) ;
SELECT DISTINCT u.concept_id
INTO #concept_id_unv_2
FROM @concept_id_universe u
INNER JOIN (
SELECT concept_id
FROM @vocabulary_database_schema.CONCEPT
WHERE concept_id > 0
) c
ON u.concept_id = c.concept_id;
}
--PERCENTILES are difficult and will need subqueries
Expand Down Expand Up @@ -227,7 +228,7 @@ getConceptRecordCount <- function(conceptIds = NULL,
{@use_date_quarter} ? {DATEPART(quarter, @domain_start_date),}};
"

iterations <- domainsLong |>
tidyr::crossing(dplyr::tibble(includeConceptId = c("Y", "N", ""))) |>
tidyr::crossing(dplyr::tibble(genderConceptId = c(0, 8507, 8532))) |>
Expand Down Expand Up @@ -263,72 +264,68 @@ getConceptRecordCount <- function(conceptIds = NULL,
tidyr::crossing(dplyr::tibble(useAgeGroup = c("Y", "N"))) |>
dplyr::arrange() |>
dplyr::mutate(combination = dplyr::row_number())

if (!stratifyByGender) {
iterations <- iterations |>
dplyr::filter(!.data$genderConceptId %in% c(8507, 8532))
}

if (!stratifyByYear) {
iterations <- iterations |>
dplyr::filter(.data$calendarType != "Y")
}

if (!stratifyByYearQuarter) {
iterations <- iterations |>
dplyr::filter(.data$calendarType != "Q")
}

if (!stratifyByYearMonth) {
iterations <- iterations |>
dplyr::filter(.data$calendarType != "M")
}

if (!stratifyByAgeGroup) {
iterations <- iterations |>
dplyr::filter(.data$useAgeGroup != "Y")
}

if (!stratifyByIncidence) {
iterations <- iterations |>
dplyr::filter(.data$incidence != "Y")
}

if (!getOverallCounts) {
iterations <- iterations |>
dplyr::filter(.data$includeConceptId == "Y")
}

existingOutput <- c()

for (i in (1:nrow(iterations))) {
rowData <- iterations[i, ]

extraMessage <-
paste0(
"Working on ",
rowData$domainTable,
".",
rowData$domainField,
"."
)
paste0("Working on ",
rowData$domainTable,
".",
rowData$domainField,
".")
progress <- (i / nrow(iterations)) * 100
message <-
sprintf(
"\rProgress: %d/%d (%0.2f%%)",
i,
nrow(iterations),
progress
)

sprintf("\rProgress: %d/%d (%0.2f%%)",
i,
nrow(iterations),
progress)

ParallelLogger::logInfo(message)

showProgress(
currentIteration = i,
totalIterations = nrow(iterations),
extraMessage = extraMessage
)

sqlRendered <- SqlRender::render(
sql = sql,
cdm_database_schema = cdmDatabaseSchema,
Expand Down Expand Up @@ -363,41 +360,38 @@ getConceptRecordCount <- function(conceptIds = NULL,
cohort_definition_id = cohortDefinitionId,
use_age_group = (rowData$useAgeGroup == "Y")
)

# Regular expression to find a comma followed by any whitespace (including line breaks) and a semicolon
regexPattern <- ",[\\s\\n\\r]*;"

# Replace the pattern with just a semicolon
sqlRendered <- gsub(regexPattern, ";", sqlRendered, perl = TRUE)

sqlTranslated <-
SqlRender::translate(
sql = sqlRendered,
targetDialect = connection@dbms,
tempEmulationSchema = tempEmulationSchema
)

DatabaseConnector::executeSql(
connection = connection,
sql = sqlTranslated,
progressBar = FALSE,
reportOverallTime = FALSE,
profile = FALSE
)

output <- DatabaseConnector::renderTranslateQuerySql(
connection = connection,
sql = "SELECT * FROM #concept_count_table;",
snakeCaseToCamelCase = TRUE
)

existingOutput <- dplyr::bind_rows(
existingOutput,
output
) |>

existingOutput <- dplyr::bind_rows(existingOutput, output) |>
dplyr::tibble()
}

DatabaseConnector::renderTranslateExecuteSql(
connection = connection,
profile = FALSE,
Expand All @@ -406,7 +400,7 @@ getConceptRecordCount <- function(conceptIds = NULL,
sql = "DROP TABLE IF EXISTS #concept_count_table;
DROP TABLE IF EXISTS #concept_id_unv_2;"
)

existingOutput <- existingOutput |>
dplyr::inner_join(
domainInformation$long |>
Expand All @@ -419,11 +413,11 @@ getConceptRecordCount <- function(conceptIds = NULL,
by = c("domainTableShort", "domainFieldShort")
) |>
dplyr::select(-"domainFieldShort", -"domainTableShort")

if (!is.null(minCellCount)) {
existingOutput <-
existingOutput |> dplyr::filter(.data$subjectCount > minCellCount)
}

return(existingOutput)
}
99 changes: 61 additions & 38 deletions R/GetStandardMappingRecommendationsForNonStandard.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ getStandardMappingRecommendationsForNonStandard <- function(connectionDetails =
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
sourceVocabularyId = c("ICD10CM"),
sourceCodes,
removeSpecialCharacters = TRUE) {
removeSpecialCharacters = TRUE,
includeDescendants = FALSE) {
if (is.null(vocabularyDatabaseSchema)) {
stop("vocabularyDatabaseSchema cannot be NULL.")
}
Expand Down Expand Up @@ -85,7 +86,7 @@ getStandardMappingRecommendationsForNonStandard <- function(connectionDetails =
tempEmulationSchema = tempEmulationSchema
) |>
dplyr::tibble()


#do the fuzzy match - but we expect to 100%match for this work.
#message will be shown if there are approximate match
Expand Down Expand Up @@ -136,42 +137,66 @@ getStandardMappingRecommendationsForNonStandard <- function(connectionDetails =
dplyr::left_join(numberOfMappedStandardConceptsMappedToGivenSourceDf, by = "givenConceptId")


# get descendants of all standard concepts
descendantsOfStandardConcept <- ConceptSetDiagnostics::getConceptDescendant(
conceptIds = mappedStandard$conceptId,
connection = connection,
vocabularyDatabaseSchema = vocabularyDatabaseSchema
) |>
dplyr::filter(minLevelsOfSeparation > 0) |>
dplyr::select(ancestorConceptId, descendantConceptId) |>
dplyr::distinct()

#get mapped concept for the standard esp descendants
mappedSource <- ConceptSetDiagnostics::getMappedSourceConcepts(
conceptIds = c(
mappedStandard$conceptId,
descendantsOfStandardConcept$descendantConceptId
) |> unique(),
connection = connection,
vocabularyDatabaseSchema = vocabularyDatabaseSchema,
tempEmulationSchema = tempEmulationSchema
)

#filter to desired vocabulary
mappedSourceFiltered <- mappedSource |>
dplyr::filter(vocabularyId %in% c(sourceVocabularyId)) |>
dplyr::left_join(
output$codesWithConceptId |>
dplyr::select(conceptId) |>
dplyr::distinct() |>
dplyr::mutate(isInputConceptId = 1)
if (includeDescendants) {
# get descendants of all standard concepts
descendantsOfStandardConcept <- ConceptSetDiagnostics::getConceptDescendant(
conceptIds = mappedStandard$conceptId,
connection = connection,
vocabularyDatabaseSchema = vocabularyDatabaseSchema
) |>
tidyr::replace_na(list(isInputConceptId = 0))
dplyr::filter(minLevelsOfSeparation > 0) |>
dplyr::select(ancestorConceptId, descendantConceptId) |>
dplyr::distinct()

#get mapped concept for the standard esp descendants
mappedSource <- ConceptSetDiagnostics::getMappedSourceConcepts(
conceptIds = c(
mappedStandard$conceptId,
descendantsOfStandardConcept$descendantConceptId
) |> unique(),
connection = connection,
vocabularyDatabaseSchema = vocabularyDatabaseSchema,
tempEmulationSchema = tempEmulationSchema
)

#filter to desired vocabulary
mappedSourceFiltered <- mappedSource |>
dplyr::filter(vocabularyId %in% c(sourceVocabularyId)) |>
dplyr::left_join(
output$codesWithConceptId |>
dplyr::select(conceptId) |>
dplyr::distinct() |>
dplyr::mutate(isInputConceptId = 1)
) |>
tidyr::replace_na(list(isInputConceptId = 0))

} else {
#get mapped concept for the standard esp descendants
mappedSource <- ConceptSetDiagnostics::getMappedSourceConcepts(
conceptIds = c(mappedStandard$conceptId) |> unique(),
connection = connection,
vocabularyDatabaseSchema = vocabularyDatabaseSchema,
tempEmulationSchema = tempEmulationSchema
)

#filter to desired vocabulary
mappedSourceFiltered <- mappedSource |>
dplyr::filter(vocabularyId %in% c(sourceVocabularyId)) |>
dplyr::left_join(
output$codesWithConceptId |>
dplyr::select(conceptId) |>
dplyr::distinct() |>
dplyr::mutate(isInputConceptId = 1)
) |>
tidyr::replace_na(list(isInputConceptId = 0))
}

#find all concept id and get their detail
conceptIds <- c(output$codesWithConceptId$conceptId,
mappedStandard$conceptId,
mappedSource$conceptId) |>
conceptIds <- c(
output$codesWithConceptId$conceptId,
mappedStandard$conceptId,
mappedSource$conceptId
) |>
unique()

output$conceptIdDetails <- ConceptSetDiagnostics::getConceptIdDetails(
Expand All @@ -181,9 +206,7 @@ getStandardMappingRecommendationsForNonStandard <- function(connectionDetails =
) |>
dplyr::arrange(conceptId)

browser()

#this is the main output. it has the source and mapped standard
#this is the main output. it has the source and mapped standard
output$sourceMappedToStandard <- mappedStandard |>
dplyr::rename(sourceConceptId = givenConceptId, standardConceptId = conceptId) |>
dplyr::select(
Expand Down
3 changes: 2 additions & 1 deletion man/getStandardMappingRecommendationsForNonStandard.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 302e719

Please sign in to comment.