Skip to content

Commit

Permalink
Fix incremental export for cohort relationships (#1137)
Browse files Browse the repository at this point in the history
* Modified checksum calculation in cohort relationships

* checksum test fix

* Correct use of keys to overwrite old data

* fix for tests

* removed code changes that are out of scope
  • Loading branch information
azimov authored Sep 27, 2024
1 parent b32c966 commit 4185b7f
Showing 1 changed file with 16 additions and 11 deletions.
27 changes: 16 additions & 11 deletions R/CohortRelationship.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ runCohortRelationshipDiagnostics <-
lower = 0,
any.missing = FALSE,
min.len = 1,
unique = TRUE,
add = errorMessage
unique = TRUE
)
checkmate::reportAssertions(collection = errorMessage)

Expand Down Expand Up @@ -215,18 +214,20 @@ executeCohortRelationshipDiagnostics <- function(connection,
targetChecksum = "checksum"
) %>%
dplyr::distinct()
combinationsOfPossibleCohortRelationships <- allCohortIds %>%

posibleCombinations <- allCohortIds %>%
tidyr::crossing(allCohortIds %>%
dplyr::rename(
comparatorCohortId = "targetCohortId",
comparatorChecksum = "targetChecksum"
)) %>%
dplyr::filter(.data$targetCohortId != .data$comparatorCohortId) %>%
dplyr::arrange(.data$targetCohortId, .data$comparatorCohortId) %>%
dplyr::mutate(checksum = paste0(.data$targetChecksum, .data$comparatorChecksum))
dplyr::arrange(.data$targetCohortId, .data$comparatorCohortId)

posibleCombinations$checksum <- paste0(posibleCombinations$targetChecksum, posibleCombinations$comparatorChecksum)

subset <- subsetToRequiredCombis(
combis = combinationsOfPossibleCohortRelationships,
combis = posibleCombinations,
task = "runCohortRelationship",
incremental = incremental,
recordKeepingFile = recordKeepingFile
Expand All @@ -244,17 +245,17 @@ executeCohortRelationshipDiagnostics <- function(connection,
}

if (incremental &&
(nrow(combinationsOfPossibleCohortRelationships) - (
(nrow(posibleCombinations) - (
nrow(
combinationsOfPossibleCohortRelationships %>%
posibleCombinations %>%
dplyr::filter(.data$targetCohortId %in% c(subset$targetCohortId))
)
)) > 0) {
ParallelLogger::logInfo(
sprintf(
" - Skipping %s combinations in incremental mode because these were previously computed.",
nrow(combinationsOfPossibleCohortRelationships) - nrow(
combinationsOfPossibleCohortRelationships %>%
nrow(posibleCombinations) - nrow(
posibleCombinations %>%
dplyr::filter(.data$targetCohortId %in% c(subset$targetCohortId))
)
)
Expand Down Expand Up @@ -366,7 +367,11 @@ executeCohortRelationshipDiagnostics <- function(connection,
writeToCsv(
data = data,
fileName = outputFile,
incremental = TRUE
incremental = TRUE,
cohortId = data$cohortId,
comparatorCohortId = data$comparatorCohortId,
startDay = data$startDay,
endDay = data$endDay
)

recordTasksDone(
Expand Down

0 comments on commit 4185b7f

Please sign in to comment.