Skip to content

Commit

Permalink
Merge pull request #21 from oxford-pharmacoepi/sql_server
Browse files Browse the repository at this point in the history
update for sql server and other packages releases updates
  • Loading branch information
tiozab authored Jul 9, 2024
2 parents c54332d + 67e471b commit 1fff917
Show file tree
Hide file tree
Showing 29 changed files with 751 additions and 258 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: What the package does (one paragraph).
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
DT,
knitr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export(getValueWeightDist)
export(mockPregnancy)
export(summariseGestationalAge)
export(writeResultToDisk)
importFrom(here,here)
importFrom(magrittr,"%>%")
importFrom(rlang,.data)
importFrom(rlang,.env)
42 changes: 33 additions & 9 deletions R/MockDB.R
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ mockPregnancy <- function(mothertable = NULL,
db <- DBI::dbConnect(duckdb::duckdb(), ":memory:")

# add other tables required for snapshot
cdmSource <- dplyr::tibble(
cdm_source <- dplyr::tibble(
cdm_source_name = "test_database",
cdm_source_abbreviation = NA,
cdm_holder = NA,
Expand All @@ -294,18 +294,42 @@ mockPregnancy <- function(mothertable = NULL,
vocabulary_version = NA
)

DBI::dbWriteTable(db, "cdm_source",
cdmSource,
overwrite = TRUE
)
person <- dplyr::tibble(
person_id = 1,
gender_concept_id = 1,
year_of_birth = 1,
race_concept_id = 1,
ethnicity_concept_id = 1
)

observation_period <- dplyr::tibble(
person_id = 1,
observation_period_id = 1,
observation_period_start_date = as.Date(2002-01-01),
observation_period_end_date = as.Date(2002-01-01),
period_type_concept_id = 1
)


cdm <- CDMConnector::cdm_from_con(db,
write_schema = "main",
)
DBI::dbWriteTable(db, "cdm_source",
cdm_source,
overwrite = TRUE
)

write_schema = "main"
DBI::dbWriteTable(db, "person",
person,
overwrite = TRUE)

DBI::dbWriteTable(db, "observation_period",
observation_period,
overwrite = TRUE)


cdm <- CDMConnector::cdm_from_con(db,
cdm_schema = "main",
write_schema = "main",
)
write_schema = "main"

DBI::dbWriteTable(db, CDMConnector::inSchema(write_schema, "mothertable"),
mothertable,
Expand Down
4 changes: 2 additions & 2 deletions R/checkFetusId.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,12 @@ records_prop <- records_n %>%

missing_noOfFetus = round(.data$missing_noOfFetus /nrow(tibble::as_tibble(mothertable)),3)*100,

) %>% tidyr::pivot_longer(cols = everything()) %>%
) %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
percentage = value)


records_n <- records_n %>% tidyr::pivot_longer(cols = everything()) %>%
records_n <- records_n %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
count = value)

Expand Down
4 changes: 2 additions & 2 deletions R/checkFetusesLiveborn.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ checkFetusesLiveborn <- function(
missing_relativeNumber = round(.data$missing_relativeNumber / nrow(tibble::as_tibble(worktable)),3)*100


) %>% tidyr::pivot_longer(cols = everything()) %>%
) %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
percentage = value)


records_n <- records_n %>% tidyr::pivot_longer(cols = everything()) %>%
records_n <- records_n %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
count = value)

Expand Down
4 changes: 2 additions & 2 deletions R/checkOutcomeMode.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ checkOutcomeMode <- function(
match = round(.data$match / nrow(tibble::as_tibble(worktable)),3)*100,

missingUnknown_information = round(.data$missingUnknown_information /nrow(tibble::as_tibble(worktable)),3)*100) %>%
tidyr::pivot_longer(cols = everything()) %>%
tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
percentage = value)


records_n <- records_n %>% tidyr::pivot_longer(cols = everything()) %>%
records_n <- records_n %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
count = value)

Expand Down
4 changes: 2 additions & 2 deletions R/executeChecks.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ executeChecks <- function(#cdm,
checkLogical(verbose, messageStore = errorMessage)
checkmate::reportAssertions(collection = errorMessage)


PETOverviewMother <- NULL
PETOverviewBaby <- NULL
if ("overview" %in% checks) {
Expand Down Expand Up @@ -198,7 +197,7 @@ executeChecks <- function(#cdm,

bitSetOverviewAll <- getBitSet(mothertable,babytable) %>% dplyr::collect()
}
if (!is.null(mothertable)) {
if (!is.null(mothertable) && "pregnancy_number_fetuses" %in% colnames(mothertable)) {

bitSetOverviewMother <- getBitSet(mothertable, babytable = NULL) %>% dplyr::collect()
}
Expand Down Expand Up @@ -279,6 +278,7 @@ executeChecks <- function(#cdm,
#' @param resultList named list with results
#' @param databaseId database identifier
#' @param outputFolder folder to write to
#' @importFrom here here
#'
#' @export
#'
Expand Down
2 changes: 1 addition & 1 deletion R/getAnnualOverview.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ getAnnualOverview <- function(

records <- records %>%
dplyr::mutate(
year = format(.data$pregnancy_end_date, "%Y")
year = lubridate::year(.data$pregnancy_end_date)
) %>% dplyr::group_by(.data$year) %>%
dplyr::summarise(
count = dplyr::n_distinct(.data$pregnancy_id)
Expand Down
4 changes: 2 additions & 2 deletions R/getMissings.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ getMissings <- function(
}
# all the required variables do not have missings, they have "0", whereas non-required variables have NAs

n_missing_long <- n_missing %>% tidyr::pivot_longer(cols = everything()) %>%
n_missing_long <- n_missing %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
count = value)
prop_missing_long <- prop_missing %>% tidyr::pivot_longer(cols = everything()) %>%
prop_missing_long <- prop_missing %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
percentage = value)

Expand Down
4 changes: 2 additions & 2 deletions R/getOverview.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ getOverview <- function(
dplyr::summarise(
pregnancies = dplyr::n_distinct(.data$pregnancy_id),
fetuses = dplyr::n_distinct(.data$fetus_id)
) %>% dplyr::collect() %>% tidyr::pivot_longer(cols = everything()) %>%
) %>% dplyr::collect() %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
count = value)

Expand All @@ -49,7 +49,7 @@ getOverview <- function(
dplyr::summarise(
women = dplyr::n_distinct(.data$person_id),
pregnancies = dplyr::n_distinct(.data$pregnancy_id)
) %>% dplyr::collect() %>% tidyr::pivot_longer(cols = everything()) %>%
) %>% dplyr::collect() %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
count = value)

Expand Down
4 changes: 2 additions & 2 deletions R/getUnknown.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ getUnknown <- function(
}


n_unknown_long <- n_unknown %>% tidyr::pivot_longer(cols = everything()) %>%
n_unknown_long <- n_unknown %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
count = value)
prop_unknown_long <- prop_unknown %>% tidyr::pivot_longer(cols = everything()) %>%
prop_unknown_long <- prop_unknown %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
percentage = value)

Expand Down
4 changes: 2 additions & 2 deletions R/getValueDatesAgeDist.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ getValueDatesAgeDist <- function(
max_start = max(.data$pregnancy_start_date, na.rm = T),
min_end = min(.data$pregnancy_end_date, na.rm=T),
max_end = max(.data$pregnancy_end_date, na.rm = T)
) %>% dplyr::collect() %>% tidyr::pivot_longer(cols = everything()) %>%
) %>% dplyr::collect() %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
value = value)

Expand Down Expand Up @@ -114,7 +114,7 @@ getValueDatesAgeDist <- function(
0.99, na.rm = T
),
max_gestationalAge_inDays = max(.data$gestational_length_in_day, na.rm = T)
) %>% tidyr::pivot_longer(cols = everything()) %>%
) %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
value = value)

Expand Down
2 changes: 1 addition & 1 deletion R/getValueWeightDist.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ getValueWeightDist <- function(
0.95, na.rm = T
),
max_birth_weight_in_gram = max(.data$birth_weight, na.rm = T)
) %>% tidyr::pivot_longer(cols = everything()) %>%
) %>% tidyr::pivot_longer(cols = tidyr::everything()) %>%
dplyr::rename(variable = name,
value = value)

Expand Down
4 changes: 2 additions & 2 deletions R/obscureCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ obscureCounts <- function(table,
if (!is.null(checkColNames)) {
table <- table %>%
dplyr::rowwise() %>%
dplyr::mutate(result_obscured = any(dplyr::across(all_of(checkColNames), ~ (. < minCellCount & . > 0)))) %>%
dplyr::mutate_at(dplyr::vars(all_of(toBeSubstituted)), ~ ifelse(result_obscured, substitute, .))
dplyr::mutate(result_obscured = any(dplyr::across(tidyr::all_of(checkColNames), ~ (. < minCellCount & . > 0)))) %>%
dplyr::mutate_at(dplyr::vars(tidyr::all_of(toBeSubstituted)), ~ ifelse(result_obscured, substitute, .))
}
}

Expand Down
113 changes: 64 additions & 49 deletions R/summariseGestationalAge.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,56 +27,71 @@ summariseGestationalAge <- function(
dplyr::select(
"gestational_length_in_day",
"pregnancy_start_date",
"pregnancy_end_date")


records <- records %>% dplyr::mutate(
n = dplyr::if_else((!is.na(.data$gestational_length_in_day) &
!is.na(.data$pregnancy_start_date) &
!is.na(.data$pregnancy_end_date)), dplyr::if_else(dplyr::between(!!CDMConnector::datediff("pregnancy_start_date", "pregnancy_end_date", interval = "day"),
.data$gestational_length_in_day -7, .data$gestational_length_in_day +7),0, 1),
NA),
endBeforeStart = dplyr::if_else(((!!CDMConnector::dateadd("pregnancy_start_date", minGestAge_Days, interval = "day"))>=.data$pregnancy_end_date),1,0),
endAfterStart = dplyr::if_else(((!!CDMConnector::dateadd("pregnancy_start_date", minGestAge_Days, interval = "day"))<.data$pregnancy_end_date),1,0)) %>%
dplyr::collect()


records_n <- records %>%
"pregnancy_end_date") %>%
dplyr::mutate(pregnancy_start_date = as.Date(.data$pregnancy_start_date),
pregnancy_end_date = as.Date(.data$pregnancy_end_date))


records <- records %>%
dplyr::mutate(
n = dplyr::case_when(
!is.na(.data$gestational_length_in_day) &
!is.na(.data$pregnancy_start_date) &
!is.na(.data$pregnancy_end_date) ~
dplyr::if_else(
!!CDMConnector::datediff("pregnancy_start_date", "pregnancy_end_date", interval = "day") >= .data$gestational_length_in_day - 7 &
!!CDMConnector::datediff("pregnancy_start_date", "pregnancy_end_date", interval = "day") <= .data$gestational_length_in_day + 7,
1,
0
),
TRUE ~ NA_real_
),
endBeforeStart = dplyr::if_else(
!!CDMConnector::dateadd("pregnancy_start_date", minGestAge_Days, interval = "day") >= .data$pregnancy_end_date,
1,
0
),
endAfterStart = dplyr::if_else(
!!CDMConnector::dateadd("pregnancy_start_date", minGestAge_Days, interval = "day") < .data$pregnancy_end_date,
1,
0
)
)

records_n <- records %>%
dplyr::summarise(
different_gestationalAge = sum(.data$n, na.rm = T),

match_gestationalAge = sum(.data$n == 0, na.rm = T),

missing_information = sum(is.na(.data$n)),

endBeforeMinGestAge = sum(.data$endBeforeStart, na.rm =T),

endAfterMinGestAge = sum(.data$endAfterStart, na.rm =T))


records_prop <- records_n %>%
dplyr::summarise(
different_gestationalAge = round(.data$different_gestationalAge / nrow(tibble::as_tibble(worktable)),3)*100,

match_gestationalAge = round(.data$match_gestationalAge / nrow(tibble::as_tibble(worktable)),3)*100,

missing_information = round(.data$missing_information / nrow(tibble::as_tibble(worktable)),3)*100,

endBeforeMinGestAge = round(.data$endBeforeMinGestAge / nrow(tibble::as_tibble(worktable)),3)*100,

endAfterMinGestAge = round(.data$endAfterMinGestAge / nrow(tibble::as_tibble(worktable)),3)*100) %>%
tidyr::pivot_longer(cols = everything()) %>%
dplyr::rename(variable = name,
percentage = value)



records_n <- records_n %>%
tidyr::pivot_longer(cols = everything()) %>%
dplyr::rename(variable = name,
count = value)

records_long <- records_n %>% dplyr::left_join(records_prop, by = "variable") %>% dplyr::mutate(Total = nrow(tibble::as_tibble(worktable)))
different_gestationalAge = sum(.data$n, na.rm = TRUE),
match_gestationalAge = sum(ifelse(.data$n == 0, 1, 0), na.rm = TRUE),
missing_information = sum(ifelse(is.na(.data$n), 1, 0), na.rm = TRUE),
endBeforeMinGestAge = sum(.data$endBeforeStart, na.rm = TRUE),
endAfterMinGestAge = sum(.data$endAfterStart, na.rm = TRUE)
)


total_records <- nrow(tibble::as_tibble(records))

records_prop <- records_n %>%
dplyr::mutate(
different_gestationalAge = round(.data$different_gestationalAge / total_records, 3) * 100,
match_gestationalAge = round(.data$match_gestationalAge / total_records, 3) * 100,
missing_information = round(.data$missing_information / total_records, 3) * 100,
endBeforeMinGestAge = round(.data$endBeforeMinGestAge / total_records, 3) * 100,
endAfterMinGestAge = round(.data$endAfterMinGestAge / total_records, 3) * 100
) %>%
tidyr::pivot_longer(
cols = tidyr::everything(),
names_to = "variable",
values_to = "percentage"
)

records_n <- records_n %>%
tidyr::pivot_longer(
cols = tidyr::everything(),
names_to = "variable",
values_to = "count"
)

records_long <- records_n %>% dplyr::left_join(records_prop, by = "variable") %>% dplyr::mutate(total = total_records)


records <- NULL
Expand Down
Loading

0 comments on commit 1fff917

Please sign in to comment.