Skip to content

Commit e8f57c9

Browse files
committed
Merge branch 'darwin_sprint' into issue72
2 parents 2a95d5b + c1bcaac commit e8f57c9

10 files changed

+287
-212
lines changed

R/ConceptSetUtils.R

+1-59
Original file line numberDiff line numberDiff line change
@@ -256,65 +256,7 @@ mergeTempTables <-
256256
}
257257
}
258258

259-
instantiateUniqueConceptSets <- function(uniqueConceptSets,
260-
connection,
261-
vocabularyDatabaseSchema,
262-
tempEmulationSchema,
263-
conceptSetsTable = "#inst_concept_sets") {
264-
ParallelLogger::logInfo("Instantiating concept sets")
265-
266-
if (nrow(uniqueConceptSets) > 0) {
267-
sql <- sapply(
268-
split(uniqueConceptSets, 1:nrow(uniqueConceptSets)),
269-
function(x) {
270-
sub(
271-
"SELECT [0-9]+ as codeset_id",
272-
sprintf("SELECT %s as codeset_id", x$uniqueConceptSetId),
273-
x$conceptSetSql
274-
)
275-
}
276-
)
277-
278-
batchSize <- 100
279-
tempTables <- c()
280-
pb <- utils::txtProgressBar(style = 3)
281-
for (start in seq(1, length(sql), by = batchSize)) {
282-
utils::setTxtProgressBar(pb, start / length(sql))
283-
tempTable <-
284-
paste("#", paste(sample(letters, 20, replace = TRUE), collapse = ""), sep = "")
285-
tempTables <- c(tempTables, tempTable)
286-
end <- min(start + batchSize - 1, length(sql))
287-
sqlSubset <- sql[start:end]
288-
sqlSubset <- paste(sqlSubset, collapse = "\n\n UNION ALL\n\n")
289-
sqlSubset <-
290-
sprintf(
291-
"SELECT *\nINTO %s\nFROM (\n %s\n) tmp;",
292-
tempTable,
293-
sqlSubset
294-
)
295-
sqlSubset <-
296-
SqlRender::render(sqlSubset, vocabulary_database_schema = vocabularyDatabaseSchema)
297-
sqlSubset <- SqlRender::translate(sqlSubset,
298-
targetDialect = connection@dbms,
299-
tempEmulationSchema = tempEmulationSchema
300-
)
301-
DatabaseConnector::executeSql(connection,
302-
sqlSubset,
303-
progressBar = FALSE,
304-
reportOverallTime = FALSE
305-
)
306-
}
307-
utils::setTxtProgressBar(pb, 1)
308-
close(pb)
309-
310-
mergeTempTables(
311-
connection = connection,
312-
tableName = conceptSetsTable,
313-
tempTables = tempTables,
314-
tempEmulationSchema = tempEmulationSchema
315-
)
316-
}
317-
}
259+
318260

319261
getCodeSetId <- function(criterion) {
320262
if (is.list(criterion)) {

R/ResultsDataModel.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ getResultsDataModelSpecifications <- function(tableName = NULL) {
2828
readr::local_edition(1)
2929
pathToCsv <- system.file("settings", "resultsDataModelSpecification.csv", package = "CohortDiagnostics")
3030

31-
resultsDataModelSpecifications <- readr::read_csv(file = pathToCsv, col_types = "cccccccccc")
31+
resultsDataModelSpecifications <- readr::read_csv(file = pathToCsv, col_types = "ccccccccccc")
3232

3333
colnames(resultsDataModelSpecifications) <- SqlRender::snakeCaseToCamelCase(colnames(resultsDataModelSpecifications))
3434

R/runBreakdownIndexEvents.R

-148
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,6 @@ runBreakdownIndexEvents <- function(connection,
5151
vocabularyDatabaseSchema = cdmDatabaseSchema,
5252
databaseId,
5353
cohorts,
54-
runIncludedSourceConcepts,
55-
runOrphanConcepts,
56-
runBreakdownIndexEvents,
5754
exportFolder,
5855
minCellCount,
5956
conceptCountsDatabaseSchema = NULL,
@@ -526,151 +523,6 @@ runBreakdownIndexEvents <- function(connection,
526523
}
527524
}
528525

529-
if (runOrphanConcepts) {
530-
# Orphan concepts ---------------------------------------------------------
531-
ParallelLogger::logInfo("Finding orphan concepts")
532-
if (incremental && (nrow(cohorts) - nrow(subsetOrphans)) > 0) {
533-
ParallelLogger::logInfo(sprintf(
534-
"Skipping %s cohorts in incremental mode.",
535-
nrow(cohorts) - nrow(subsetOrphans)
536-
))
537-
}
538-
if (nrow(subsetOrphans > 0)) {
539-
start <- Sys.time()
540-
541-
if (!useExternalConceptCountsTable) {
542-
ParallelLogger::logTrace("Using internal concept count table.")
543-
} else {
544-
stop("Use of external concept count table is not supported")
545-
}
546-
547-
# [OPTIMIZATION idea] can we modify the sql to do this for all uniqueConceptSetId in one query using group by?
548-
data <- list()
549-
for (i in (1:nrow(uniqueConceptSets))) {
550-
conceptSet <- uniqueConceptSets[i, ]
551-
ParallelLogger::logInfo(
552-
"- Finding orphan concepts for concept set '",
553-
conceptSet$conceptSetName,
554-
"'"
555-
)
556-
557-
timeExecution(
558-
exportFolder,
559-
taskName = "orphanConcepts",
560-
parent = "runConceptSetDiagnostics",
561-
cohortIds = paste("concept_set-", conceptSet$conceptSetName),
562-
expr = {
563-
data[[i]] <- .findOrphanConcepts(
564-
connection = connection,
565-
cdmDatabaseSchema = cdmDatabaseSchema,
566-
tempEmulationSchema = tempEmulationSchema,
567-
useCodesetTable = TRUE,
568-
codesetId = conceptSet$uniqueConceptSetId,
569-
conceptCountsDatabaseSchema = conceptCountsDatabaseSchema,
570-
conceptCountsTable = conceptCountsTable,
571-
conceptCountsTableIsTemp = conceptCountsTableIsTemp,
572-
instantiatedCodeSets = "#inst_concept_sets",
573-
orphanConceptTable = "#orphan_concepts"
574-
)
575-
576-
if (!is.null(conceptIdTable)) {
577-
sql <- "INSERT INTO @concept_id_table (concept_id)
578-
SELECT DISTINCT concept_id
579-
FROM @orphan_concept_table;"
580-
DatabaseConnector::renderTranslateExecuteSql(
581-
connection = connection,
582-
sql = sql,
583-
tempEmulationSchema = tempEmulationSchema,
584-
concept_id_table = conceptIdTable,
585-
orphan_concept_table = "#orphan_concepts",
586-
progressBar = FALSE,
587-
reportOverallTime = FALSE
588-
)
589-
}
590-
}
591-
)
592-
sql <-
593-
"TRUNCATE TABLE @orphan_concept_table;\nDROP TABLE @orphan_concept_table;"
594-
DatabaseConnector::renderTranslateExecuteSql(
595-
connection = connection,
596-
sql = sql,
597-
tempEmulationSchema = tempEmulationSchema,
598-
orphan_concept_table = "#orphan_concepts",
599-
progressBar = FALSE,
600-
reportOverallTime = FALSE
601-
)
602-
}
603-
604-
data <- dplyr::bind_rows(data) %>%
605-
dplyr::distinct() %>%
606-
dplyr::rename("uniqueConceptSetId" = "codesetId") %>%
607-
dplyr::inner_join(
608-
conceptSets %>%
609-
dplyr::select(
610-
"uniqueConceptSetId",
611-
"cohortId",
612-
"conceptSetId"
613-
) %>% dplyr::distinct(),
614-
by = "uniqueConceptSetId",
615-
relationship = "many-to-many"
616-
) %>%
617-
dplyr::select(-"uniqueConceptSetId") %>%
618-
dplyr::select(
619-
"cohortId",
620-
"conceptSetId",
621-
"conceptId",
622-
"conceptCount",
623-
"conceptSubjects"
624-
) %>%
625-
dplyr::group_by(
626-
.data$cohortId,
627-
.data$conceptSetId,
628-
.data$conceptId
629-
) %>%
630-
dplyr::summarise(
631-
conceptCount = max(.data$conceptCount),
632-
conceptSubjects = max(.data$conceptSubjects)
633-
) %>%
634-
dplyr::ungroup()
635-
636-
637-
exportDataToCsv(
638-
data = data,
639-
tableName = "orphan_concept",
640-
fileName = file.path(exportFolder, "orphan_concept.csv"),
641-
minCellCount = minCellCount,
642-
databaseId = databaseId,
643-
incremental = incremental,
644-
cohortId = subsetOrphans$cohortId
645-
)
646-
647-
recordTasksDone(
648-
cohortId = subsetOrphans$cohortId,
649-
task = "runOrphanConcepts",
650-
checksum = subsetOrphans$checksum,
651-
recordKeepingFile = recordKeepingFile,
652-
incremental = incremental
653-
)
654-
655-
delta <- Sys.time() - start
656-
657-
timeExecution(
658-
exportFolder,
659-
taskName = "allOrphanConcepts",
660-
parent = "runConceptSetDiagnostics",
661-
start = start,
662-
execTime = delta
663-
)
664-
665-
ParallelLogger::logInfo(
666-
"Finding orphan concepts took ",
667-
signif(delta, 3),
668-
" ",
669-
attr(delta, "units")
670-
)
671-
}
672-
}
673-
674526
# put all instantiated concepts into #concept_ids table
675527
# this is extracted with vocabulary tables
676528
# this will have more codes than included source concepts

R/runIncludedSourceConcepts.R

+8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11

2+
3+
getIncludedSourceConcepts <- function() {
4+
5+
}
6+
7+
8+
9+
210
#' Title
311
#'
412
#' @param connection

0 commit comments

Comments
 (0)