From 9499bb2becc59a43131d8ef5de6bfe2a5da872ff Mon Sep 17 00:00:00 2001 From: tiozab Date: Wed, 21 Dec 2022 12:37:38 +0000 Subject: [PATCH 1/2] additional tests implemented no errors, no warnings --- DESCRIPTION | 2 + NAMESPACE | 5 + R/checkOutcomeMode.R | 9 +- R/executeChecks.R | 103 ++++++++-- R/getAnnualOverview.R | 37 ++++ R/getBitSet.R | 102 ++++++++++ R/getUnknown.R | 48 +++++ R/getValueDatesAgeDist.R | 75 ++++++++ R/getValueWeightDist.R | 56 ++++++ R/summariseGestationalAge.R | 5 +- man/executeChecks.Rd | 4 +- man/getAnnualOverview.Rd | 17 ++ man/getBitSet.Rd | 19 ++ man/getUnknown.Rd | 17 ++ man/getValueDatesAgeDist.Rd | 17 ++ man/getValueWeightDist.Rd | 17 ++ tests/testthat/test-bitSetOverview.R | 74 ++++++++ tests/testthat/test-checkFetusId.R | 2 +- tests/testthat/test-checkFetusesLiveborn.R | 2 +- tests/testthat/test-checkOutcomeMode.R | 2 +- tests/testthat/test-executeChecks.R | 12 +- tests/testthat/test-getAnnualOverview.R | 64 +++++++ tests/testthat/test-getBitSet.R | 178 ++++++++++++++++++ tests/testthat/test-getMissings.R | 4 +- tests/testthat/test-getOverview.R | 2 +- tests/testthat/test-getUnknown.R | 66 +++++++ tests/testthat/test-getValueDatesAgeDist.R | 67 +++++++ tests/testthat/test-getValueWeightDist.R | 39 ++++ tests/testthat/test-summariseGestationalAge.R | 2 +- 29 files changed, 1016 insertions(+), 31 deletions(-) create mode 100644 R/getAnnualOverview.R create mode 100644 R/getBitSet.R create mode 100644 R/getUnknown.R create mode 100644 R/getValueDatesAgeDist.R create mode 100644 R/getValueWeightDist.R create mode 100644 man/getAnnualOverview.Rd create mode 100644 man/getBitSet.Rd create mode 100644 man/getUnknown.Rd create mode 100644 man/getValueDatesAgeDist.Rd create mode 100644 man/getValueWeightDist.Rd create mode 100644 tests/testthat/test-bitSetOverview.R create mode 100644 tests/testthat/test-getAnnualOverview.R create mode 100644 tests/testthat/test-getBitSet.R create mode 100644 tests/testthat/test-getUnknown.R create mode 100644 tests/testthat/test-getValueDatesAgeDist.R create mode 100644 tests/testthat/test-getValueWeightDist.R diff --git a/DESCRIPTION b/DESCRIPTION index 8f68ca9..e8be80b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,8 +24,10 @@ Imports: glue, lubridate, magrittr, + misty, reshape2, rlang, + stats, tibble, zip VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index e07c835..c54780c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,8 +5,13 @@ export(checkFetusId) export(checkFetusesLiveborn) export(checkOutcomeMode) export(executeChecks) +export(getAnnualOverview) +export(getBitSet) export(getMissings) export(getOverview) +export(getUnknown) +export(getValueDatesAgeDist) +export(getValueWeightDist) export(mockPregnancy) export(summariseGestationalAge) export(writeResultToDisk) diff --git a/R/checkOutcomeMode.R b/R/checkOutcomeMode.R index 92b5b9e..0ed1a06 100644 --- a/R/checkOutcomeMode.R +++ b/R/checkOutcomeMode.R @@ -23,8 +23,9 @@ checkOutcomeMode <- function( #check if miscarriage or TOP has vaginal or c-section delivery records <- records %>% dplyr::mutate( - n = dplyr::if_else(((.data$pregnancy_outcome == 4067106 | .data$pregnancy_outcome == 4081422) - & (.data$pregnancy_mode_delivery == 4125611 | .data$pregnancy_mode_delivery ==4015701)),1,0,missing = NULL)) %>% + n = dplyr::if_else(.data$pregnancy_outcome !=0 , dplyr::if_else( + (.data$pregnancy_outcome == 4067106 | .data$pregnancy_outcome == 4081422) + & (.data$pregnancy_mode_delivery == 4125611 | .data$pregnancy_mode_delivery ==4015701),1,0,missing = NULL),NA,missing = NULL)) %>% dplyr::collect() records_n <- records %>% @@ -33,7 +34,7 @@ checkOutcomeMode <- function( match = sum(.data$n==0, na.rm = T), - missing_information = sum(is.na(.data$n)), + missingUnknown_information = sum(is.na(.data$n)), ) records_prop <- records_n %>% @@ -43,7 +44,7 @@ checkOutcomeMode <- function( match = round(.data$match / nrow(tibble::as_tibble(workTable)),3)*100, - missing_information = round(.data$missing_information /nrow(tibble::as_tibble(workTable)),3)*100) + missingUnknown_information = round(.data$missingUnknown_information /nrow(tibble::as_tibble(workTable)),3)*100) records_n <- tibble::as_tibble(reshape2::melt(records_n,variable.names="variable",value.name = "count")) diff --git a/R/executeChecks.R b/R/executeChecks.R index 9650064..34e9113 100644 --- a/R/executeChecks.R +++ b/R/executeChecks.R @@ -13,8 +13,8 @@ executeChecks <- function(#cdm, motherTable = NULL, babyTable = NULL, - checks = c("overview", "missing", "gestationalAge", "outcomeMode", "fetusesLiveborn", - "fetusid"), + checks = c("overview","annualOverview","missing", "unknown","gestationalAge","datesAgeDist","outcomeMode", "fetusesLiveborn", + "fetusid","weightDist","bitSet"), minCellCount = 5, verbose = FALSE) { @@ -48,6 +48,17 @@ executeChecks <- function(#cdm, } + if ("annualOverview" %in% checks) { + if (verbose == TRUE) { + start <- printDurationAndMessage("Progress: total number of women, pregnancies (and fetuses) per year", start) + } + if (!is.null(motherTable)) { + AnnualPETOverviewMother <- NULL + AnnualPETOverviewMother <- getAnnualOverview(motherTable) %>% dplyr::collect() + } + + } + if ("missing" %in% checks) { if (verbose == TRUE) { @@ -64,50 +75,108 @@ executeChecks <- function(#cdm, } - gestationalAgeMatch <- NULL + if ("unknown" %in% checks) { + if (verbose == TRUE) { + start <- printDurationAndMessage("Progress: check unknowns of required variables", start) + } + if (!is.null(motherTable)) { + unknownSummaryMother <- NULL + unknownSummaryMother <- getUnknown(motherTable) %>% dplyr::collect() + } + + } + + if ("gestationalAge" %in% checks) { if (verbose == TRUE) { start <- printDurationAndMessage("Progress: check Gestational Age", start) } if (!is.null(motherTable)) { + gestationalAgeMatch <- NULL gestationalAgeMatch <- summariseGestationalAge(motherTable) %>% dplyr::collect() } } - outcomeModeMatch <- NULL + + + if ("datesAgeDist" %in% checks) { + if (verbose == TRUE) { + start <- printDurationAndMessage("Progress: check values of dates and Gestational Age", start) + } + if (!is.null(motherTable)) { + valueDatesAgeDist <- NULL + valueDatesAgeDist <- getValueDatesAgeDist(motherTable) %>% dplyr::collect() + } + } + + + if ("outcomeMode" %in% checks) { if (verbose == TRUE) { start <- printDurationAndMessage("Progress: check Outcome and Mode of Delivery", start) } if (!is.null(motherTable)) { + outcomeModeMatch <- NULL outcomeModeMatch <- checkOutcomeMode(motherTable) %>% dplyr::collect() } } - fetusesLivebornNumber <- NULL + if ("fetusesLiveborn" %in% checks) { if (verbose == TRUE) { start <- printDurationAndMessage("Progress: check number of fetuses versus liveborn", start) } # pregnancy_single is a required variable if ("pregnancy_number_fetuses" %in% colnames(motherTable) && "pregnancy_number_liveborn" %in% colnames(motherTable)) { + fetusesLivebornNumber <- NULL fetusesLivebornNumber <- tibble::as_tibble(checkFetusesLiveborn(motherTable)) %>% dplyr::collect() } } - fetusIdMatch <- NULL + if ("fetusid" %in% checks) { if (verbose == TRUE) { start <- printDurationAndMessage("Progress: check number of fetuses versus liveborn", start) } - if (!is.null(motherTable) && "fetus_id" %in% colnames(babyTable)) { - + if (!is.null(motherTable) && !is.null(babyTable)) { + fetusIdMatch <- NULL fetusIdMatch <- checkFetusId(motherTable,babyTable) %>% dplyr::collect() } } + if ("weightDist" %in% checks) { + if (verbose == TRUE) { + start <- printDurationAndMessage("Progress: check values of birthweight", start) + } + if (!is.null(babyTable)) { + valueWeightDist <- NULL + valueWeightDist <- getValueWeightDist(babyTable) %>% dplyr::collect() + } + + } + + + + + if ("bitSet" %in% checks) { + if (verbose == TRUE) { + start <- printDurationAndMessage("Progress: check missing/unknown data pattern", start) + } + if (!is.null(motherTable) && !is.null(babyTable)) { + bitSetOverviewAll <- NULL + bitSetOverviewAll <- getBitSet(motherTable,babyTable) %>% dplyr::collect() + } else if (!is.null(motherTable)) { + bitSetOverviewMother <- NULL + bitSetOverviewMother <- getBitSet(motherTable, babyTable = NULL) %>% dplyr::collect() + } else if (!is.null(babyTable)) { + bitSetOverviewBaby <- NULL + bitSetOverviewBaby <- getBitSet(motherTable = NULL, babyTable) %>% dplyr::collect() + } + } + + if (verbose == TRUE) { start <- printDurationAndMessage("Finished", start) @@ -118,26 +187,38 @@ executeChecks <- function(#cdm, if (!is.null(motherTable) && !is.null(babyTable)) { result <- list("PETOverviewMother" = PETOverviewMother, + "AnnualPETOverviewMother" = AnnualPETOverviewMother, "PETOverviewBaby" = PETOverviewBaby, "missingSummaryMother" = missingSummaryMother, "missingSummaryBaby" = missingSummaryBaby, + "unknownSummaryMother" = unknownSummaryMother, "gestationalAgeMatch" = gestationalAgeMatch, + "valueDatesAgeDist" = valueDatesAgeDist, "outcomeModeMatch" = outcomeModeMatch, "fetusesLivebornNumber" = fetusesLivebornNumber, - "fetusIdMatch" = fetusIdMatch) + "fetusIdMatch" = fetusIdMatch, + "valueWeightDist" = valueWeightDist, + "bitSetOverviewAll" = bitSetOverviewAll + ) } else if (!is.null(motherTable)) { result <- list("PETOverviewMother" = PETOverviewMother, + "AnnualPETOverviewMother" = AnnualPETOverviewMother, "missingSummaryMother" = missingSummaryMother, + "unknownSummaryMother" = unknownSummaryMother, "gestationalAgeMatch" = gestationalAgeMatch, + "valueDatesAgeDist" = valueDatesAgeDist, "outcomeModeMatch" = outcomeModeMatch, - "fetusesLivebornNumber" = fetusesLivebornNumber) + "fetusesLivebornNumber" = fetusesLivebornNumber, + "bitSetOverviewMother" = bitSetOverviewMother) } else if (!is.null(babyTable)) { result <- list("PETOverviewBaby" = PETOverviewBaby, - "missingSummaryBaby" = missingSummaryBaby) + "missingSummaryBaby" = missingSummaryBaby, + "valueWeightDist" = valueWeightDist, + "bitSetOverviewBaby" = bitSetOverviewBaby) } diff --git a/R/getAnnualOverview.R b/R/getAnnualOverview.R new file mode 100644 index 0000000..4e8fd13 --- /dev/null +++ b/R/getAnnualOverview.R @@ -0,0 +1,37 @@ +#' Title +#' +#' @param motherTable is the motherTable +#' +#' @return returns a table with the number of annual pregnancies +#' @export +#' +#' @examples +getAnnualOverview <- function( + motherTable +) +{ + + # checks + errorMessage <- checkmate::makeAssertCollection() + #checkDbType(cdm = cdm, messageStore = errorMessage) + checkmate::assertTRUE(inherits(motherTable, 'tbl_dbi'), add = errorMessage) + checkmate::reportAssertions(collection = errorMessage) + + + records <- motherTable %>% + dplyr::select( + "pregnancy_id", + "pregnancy_end_date" + ) %>% dplyr::collect() + + records <- records %>% + dplyr::mutate( + year = format(.data$pregnancy_end_date, "%Y") + ) %>% dplyr::group_by(.data$year) %>% + dplyr::summarise( + pregnancies = dplyr::n_distinct(.data$pregnancy_id) + ) + + return(records) + +} diff --git a/R/getBitSet.R b/R/getBitSet.R new file mode 100644 index 0000000..6add062 --- /dev/null +++ b/R/getBitSet.R @@ -0,0 +1,102 @@ +#' Title +#' +#' @param motherTable is the motherTable +#' @param babyTable is the babyTable +#' +#' @return returns a table with the pattern of missing data +#' @export +#' +#' @examples +getBitSet <- function( + motherTable = NULL, + babyTable = NULL +) { + + # checks + errorMessage <- checkmate::makeAssertCollection() + #checkDbType(cdm = cdm, messageStore = errorMessage) + checkmate::assertTRUE(is.null(motherTable) || inherits(motherTable, 'tbl_dbi'), add = errorMessage) + checkmate::assertTRUE(is.null(babyTable) || inherits(babyTable, 'tbl_dbi'), add = errorMessage) + + checkmate::reportAssertions(collection = errorMessage) + + + + if (!is.null(motherTable) && !is.null(babyTable)) { + + recordshelp <- motherTable %>% + dplyr::select( + "pregnancy_number_fetuses", + "pregnancy_single", + "pregnancy_id", + "pregnancy_outcome", + "pregnancy_number_liveborn", + "pregnancy_mode_delivery" + ) %>% + dplyr::left_join((dplyr::select(babyTable,"fetus_id", + "pregnancy_id", + "birth_outcome", + "birth_weight", + "birth_con_malformation", + "birth_SGA", + "birth_FGR", + "birth_APGAR")),by = "pregnancy_id") %>% + dplyr::collect() + + #set required variables with 0 to missing because missing data pattern function expect a missing + #a zero is equal to a missing, because we do not know the information + recordshelp[recordshelp==0]=NA + + records <- misty::na.pattern(recordshelp,order = TRUE, digits = 1, as.na = NULL, write = NULL, + check = TRUE, output = FALSE) + + recordshelp <- NULL + + } else { + if (!is.null(motherTable)){ + + recordshelp <- motherTable %>% + dplyr::select( + "pregnancy_number_fetuses", + "pregnancy_single", + "pregnancy_id", + "pregnancy_outcome", + "pregnancy_number_liveborn", + "pregnancy_mode_delivery" + ) %>% dplyr::collect() + + #set required variables with 0 to missing because missing data pattern function expect a missing + #a zero is equal to a missing, because we do not know the information + recordshelp[recordshelp==0]=NA + + records <- misty::na.pattern(recordshelp,order = TRUE, digits = 1, as.na = NULL, write = NULL, + check = TRUE, output = FALSE) + + recordshelp <- NULL + + } + if (!is.null(babyTable)){ + + recordshelp <- babyTable %>% + dplyr::select( + "fetus_id", + "pregnancy_id", + "birth_outcome", + "birth_weight", + "birth_con_malformation", + "birth_SGA", + "birth_FGR", + "birth_APGAR" + ) %>% + dplyr::collect() + + records <- misty::na.pattern(recordshelp,order = TRUE, digits = 1, as.na = NULL, write = NULL, + check = TRUE, output = FALSE) + + recordshelp <- NULL + + } + } + + return(tibble::as_tibble(records$result)) +} diff --git a/R/getUnknown.R b/R/getUnknown.R new file mode 100644 index 0000000..f0298eb --- /dev/null +++ b/R/getUnknown.R @@ -0,0 +1,48 @@ +#' Title +#' +#' @param motherTable is the motherTable +#' +#' @return returns a table with all the zero counts of required variables in the motherTable +#' @export +#' +#' @examples +getUnknown <- function( + motherTable +) { + + # checks + errorMessage <- checkmate::makeAssertCollection() + #checkDbType(cdm = cdm, messageStore = errorMessage) + checkmate::assertTRUE(inherits(motherTable, 'tbl_dbi'), add = errorMessage) + checkmate::reportAssertions(collection = errorMessage) + + motherTable <- motherTable %>% dplyr::collect() + + n_unknown <- tibble::tibble(.rows=1) + prop_unknown <- tibble::tibble(.rows=1) + + checkCol <- c("pregnancy_start_date","pregnancy_end_date","pregnancy_outcome","pregnancy_mode_delivery","pregnancy_single") + + #loop through the required columns and count the zeros + for (i in 1:length(checkCol)){ + + n_unknown[[checkCol[i]]]<-sum(as.integer(motherTable[[checkCol[i]]]==0), na.rm = TRUE) + prop_unknown[[checkCol[i]]] <- round(n_unknown[[checkCol[i]]]/nrow(motherTable),3)*100 + + } + + n_unknown_long <- tibble::as_tibble(reshape2::melt(n_unknown, variable.names="variable",value.name = "count")) + prop_unknown_long <- tibble::as_tibble(reshape2::melt(prop_unknown, variable.names="variable",value.name = "proportionInPercentage")) + + + summUnknowns <- n_unknown_long %>% dplyr::left_join(prop_unknown_long, by = "variable") %>% dplyr::mutate(Total = nrow(motherTable)) + + + n_unknown_long <- NULL + prop_unknown_long <- NULL + n_unknown <- NULL + prop_unknown <- NULL + + + return(summUnknowns) +} diff --git a/R/getValueDatesAgeDist.R b/R/getValueDatesAgeDist.R new file mode 100644 index 0000000..406e139 --- /dev/null +++ b/R/getValueDatesAgeDist.R @@ -0,0 +1,75 @@ +#' Title +#' +#' @param motherTable is the motherTable +#' +#' @return returns a table with the distrubtion of pregnancy starte date, end date, and gestational Age +#' @export +#' +#' @examples +getValueDatesAgeDist <- function( + motherTable +) +{ + + # checks + errorMessage <- checkmate::makeAssertCollection() + #checkDbType(cdm = cdm, messageStore = errorMessage) + checkmate::assertTRUE(inherits(motherTable, 'tbl_dbi'), add = errorMessage) + checkmate::reportAssertions(collection = errorMessage) + + + records <- motherTable %>% + dplyr::select( + "pregnancy_start_date", + "pregnancy_end_date" + ) %>% dplyr::summarise( + min_start = min(.data$pregnancy_start_date, na.rm=T), + max_start = max(.data$pregnancy_start_date, na.rm = T), + min_end = min(.data$pregnancy_end_date, na.rm=T), + max_end = max(.data$pregnancy_end_date, na.rm = T) + ) %>% dplyr::collect() + + records <- tibble::as_tibble(reshape2::melt(records,variable.names="variable",value.name = "value")) + records$value <- as.character(records$value) + + records2 <- motherTable %>% + dplyr::select( + "gestational_length_in_day" + ) %>% dplyr::collect() %>% dplyr::summarise( + min_gestationalAge_inDays = min(.data$gestational_length_in_day, na.rm=T), + q05_gestationalAge_inDays = stats::quantile(.data$gestational_length_in_day,0.05, na.rm = T), + q10_gestationalAge_inDays = stats::quantile( + .data$gestational_length_in_day, + 0.10, na.rm = T + ), + q25_gestationalAge_inDays = stats::quantile( + .data$gestational_length_in_day, + 0.25, na.rm = T + ), + median_gestationalAge_inDays = stats::median(.data$gestational_length_in_day, na.rm = T), + q75_gestationalAge_inDays = stats::quantile( + .data$gestational_length_in_day, + 0.75, na.rm = T + ), + q90_gestationalAge_inDays = stats::quantile( + .data$gestational_length_in_day, + 0.90, na.rm = T + ), + q95_gestationalAge_inDays = stats::quantile( + .data$gestational_length_in_day, + 0.95, na.rm = T + ), + max_gestationalAge_inDays = max(.data$gestational_length_in_day, na.rm = T) + ) + + + records2 <- tibble::as_tibble(reshape2::melt(records2,variable.names="variable",value.name = "value")) + records2$value <- as.character(records2$value) + + + records_bound <- rbind(records,records2) + + + return(records_bound) + +} diff --git a/R/getValueWeightDist.R b/R/getValueWeightDist.R new file mode 100644 index 0000000..241fc6d --- /dev/null +++ b/R/getValueWeightDist.R @@ -0,0 +1,56 @@ +#' Title +#' +#' @param babyTable is the babyTable +#' +#' @return returns values of birthweight +#' @export +#' +#' @examples +getValueWeightDist <- function( + babyTable +) { + + # checks + errorMessage <- checkmate::makeAssertCollection() + #checkDbType(cdm = cdm, messageStore = errorMessage) + checkmate::assertTRUE(inherits(babyTable, 'tbl_dbi'), add = errorMessage) + checkmate::reportAssertions(collection = errorMessage) + + + records <- babyTable %>% dplyr::collect() %>% dplyr::select( + "birth_weight" + ) %>% dplyr::summarise( + min_birth_weight_in_gram = min(.data$birth_weight, na.rm=T), + q05_birth_weight_in_gram = stats::quantile( + .data$birth_weight, + 0.05, na.rm = T + ), + q10_birth_weight_in_gram = stats::quantile( + .data$birth_weight, + 0.10, na.rm = T + ), + q25_birth_weight_in_gram = stats::quantile( + .data$birth_weight, + 0.25, na.rm = T + ), + median_birth_weight_in_gram = stats::median(.data$birth_weight, na.rm = T), + q75_birth_weight_in_gram = stats::quantile( + .data$birth_weight, + 0.75, na.rm = T + ), + q90_birth_weight_in_gram = stats::quantile( + .data$birth_weight, + 0.90, na.rm = T + ), + q95_birth_weight_in_gram = stats::quantile( + .data$birth_weight, + 0.95, na.rm = T + ), + max_birth_weight_in_gram = max(.data$birth_weight, na.rm = T) + ) + + + records <- tibble::as_tibble(reshape2::melt(records,variable.names="variable",value.name = "value")) + + return(records) +} diff --git a/R/summariseGestationalAge.R b/R/summariseGestationalAge.R index 674ef0f..f6a0d68 100644 --- a/R/summariseGestationalAge.R +++ b/R/summariseGestationalAge.R @@ -25,8 +25,11 @@ summariseGestationalAge <- function( records <- records %>% dplyr::mutate( - n = dplyr::if_else(.data$gestational_length_in_day == + n = dplyr::if_else((!is.na(.data$gestational_length_in_day) & + !is.na(.data$pregnancy_start_date) & + !is.na(.data$pregnancy_end_date)), dplyr::if_else(.data$gestational_length_in_day == !!CDMConnector::datediff("pregnancy_start_date", "pregnancy_end_date", interval = "day"), 0, 1,missing = NULL), + NA,missing=NULL), endBeforeStart = dplyr::if_else((.data$pregnancy_start_date>=.data$pregnancy_end_date),1,0,missing = NULL), endAfterStart = dplyr::if_else((.data$pregnancy_start_date<.data$pregnancy_end_date),1,0,missing = NULL)) %>% dplyr::collect() diff --git a/man/executeChecks.Rd b/man/executeChecks.Rd index 8d4626c..be09fea 100644 --- a/man/executeChecks.Rd +++ b/man/executeChecks.Rd @@ -7,8 +7,8 @@ executeChecks( motherTable = NULL, babyTable = NULL, - checks = c("overview", "missing", "gestationalAge", "outcomeMode", "fetusesLiveborn", - "fetusid"), + checks = c("overview", "annualOverview", "missing", "unknown", "gestationalAge", + "datesAgeDist", "outcomeMode", "fetusesLiveborn", "fetusid", "weightDist", "bitSet"), minCellCount = 5, verbose = FALSE ) diff --git a/man/getAnnualOverview.Rd b/man/getAnnualOverview.Rd new file mode 100644 index 0000000..d2b7881 --- /dev/null +++ b/man/getAnnualOverview.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getAnnualOverview.R +\name{getAnnualOverview} +\alias{getAnnualOverview} +\title{Title} +\usage{ +getAnnualOverview(motherTable) +} +\arguments{ +\item{motherTable}{is the motherTable} +} +\value{ +returns a table with the number of annual pregnancies +} +\description{ +Title +} diff --git a/man/getBitSet.Rd b/man/getBitSet.Rd new file mode 100644 index 0000000..fb375d4 --- /dev/null +++ b/man/getBitSet.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getBitSet.R +\name{getBitSet} +\alias{getBitSet} +\title{Title} +\usage{ +getBitSet(motherTable = NULL, babyTable = NULL) +} +\arguments{ +\item{motherTable}{is the motherTable} + +\item{babyTable}{is the babyTable} +} +\value{ +returns a table with the pattern of missing data +} +\description{ +Title +} diff --git a/man/getUnknown.Rd b/man/getUnknown.Rd new file mode 100644 index 0000000..0c93424 --- /dev/null +++ b/man/getUnknown.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getUnknown.R +\name{getUnknown} +\alias{getUnknown} +\title{Title} +\usage{ +getUnknown(motherTable) +} +\arguments{ +\item{motherTable}{is the motherTable} +} +\value{ +returns a table with all the zero counts of required variables in the motherTable +} +\description{ +Title +} diff --git a/man/getValueDatesAgeDist.Rd b/man/getValueDatesAgeDist.Rd new file mode 100644 index 0000000..052169f --- /dev/null +++ b/man/getValueDatesAgeDist.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getValueDatesAgeDist.R +\name{getValueDatesAgeDist} +\alias{getValueDatesAgeDist} +\title{Title} +\usage{ +getValueDatesAgeDist(motherTable) +} +\arguments{ +\item{motherTable}{is the motherTable} +} +\value{ +returns a table with the distrubtion of pregnancy starte date, end date, and gestational Age +} +\description{ +Title +} diff --git a/man/getValueWeightDist.Rd b/man/getValueWeightDist.Rd new file mode 100644 index 0000000..77f6906 --- /dev/null +++ b/man/getValueWeightDist.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getValueWeightDist.R +\name{getValueWeightDist} +\alias{getValueWeightDist} +\title{Title} +\usage{ +getValueWeightDist(babyTable) +} +\arguments{ +\item{babyTable}{is the babyTable} +} +\value{ +returns values of birthweight +} +\description{ +Title +} diff --git a/tests/testthat/test-bitSetOverview.R b/tests/testthat/test-bitSetOverview.R new file mode 100644 index 0000000..fc96056 --- /dev/null +++ b/tests/testthat/test-bitSetOverview.R @@ -0,0 +1,74 @@ + + +test_that("check working example of bit set creation", { + MT<- tibble::tibble( + pregnancy_id = c("4","5","6","7"), + person_id = c("1","2","2","3"), + pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), + pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), + gestational_length_in_day = c(300, 200,201,49), + pregnancy_outcome = c(4092289,443213,0,4081422), + pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), + pregnancy_single = c(NA,4188540,4188540,4188539), + pregnancy_marital_status = c(4338692,4338692,4338692,4053842), + pregnancy_number_fetuses = c(NA,2,2,NA), + pregnancy_number_liveborn = c(3,1,1,0), + prev_pregnancy_parity = c(4012561,4102166,4012561,4012561), + prev_pregnancy_gravidity = c(9,9,10,2), + prev_livebirth_number = c(8,8,9,1), + prev_stillbirth_number = c(3,3,3,0), + prev_miscar_number = c(1,1,1,0), + prev_TOP_number = c(0,0,0,1), + prev_TOP12_number = c(9,9,9,0), + pregnancy_BMI = c(51,48,48,30), + pregnancy_folic = c(4188539,4188539,4188539,4188540), + pregnancy_TOPFA = c(4188539,4188539,4188539,4188540), + pregnancy_ART = c(4188539,4188539,4188539,4188540), + pregnancy_SMOK = c(4188539,4188539,4188539,4188540), + pregnacy_ALC = c(4188539,4188539,4188539,4188540), + pregnancy_SUBS = c(4188539,4188539,4188539,4188540), + pregnancy_outcome_source_value = c(69617,34789,20934,23948), + pregnancy_mode_delivery_source_value = c(69617,23423,23423,13204), + ) + + + BT <- tibble::tibble( + pregnancy_id = c("4","5","6","7"), + fetus_id = c("4","5","6","7"), + birth_outcome = c(4092289,443213,4092289,4081422), + birth_weight = c(6917,NA,2094, NA), + birth_con_malformation = c(4188540,4188540,NA,NA), + birth_SGA = c(NA,NA,4188540,NA), + birth_FGR = c(NA,4188540,NA,NA), + birth_APGAR = c(4188539,NA,NA,NA) + ) + + + db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") + + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "MT", + MT, + overwrite = TRUE) + }) + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "BT", + BT, + overwrite = TRUE) + }) + + cdm <- CDMConnector::cdm_from_con(db, + cdm_tables = c(), + cohort_tables = c( + "MT", + "BT" + )) + + seeBitSet <- getBitSet(cdm$MT,cdm$BT) + +## do not know what to test + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) diff --git a/tests/testthat/test-checkFetusId.R b/tests/testthat/test-checkFetusId.R index e8556a8..c2e1e66 100644 --- a/tests/testthat/test-checkFetusId.R +++ b/tests/testthat/test-checkFetusId.R @@ -5,7 +5,7 @@ test_that("check working example 1) each count 2) counts add up to total", { pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), gestational_length_in_day = c(300, 200,201,49), - pregnancy_outcome = c(4092289,443213,NA,4081422), + pregnancy_outcome = c(4092289,443213,0,4081422), pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), pregnancy_single = c(NA,4188540,4188540,4188539), pregnancy_marital_status = c(4338692,4338692,4338692,4053842), diff --git a/tests/testthat/test-checkFetusesLiveborn.R b/tests/testthat/test-checkFetusesLiveborn.R index 1b67e2e..fec0375 100644 --- a/tests/testthat/test-checkFetusesLiveborn.R +++ b/tests/testthat/test-checkFetusesLiveborn.R @@ -5,7 +5,7 @@ test_that("check working example 1) each count 2) adds up to total", { pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), gestational_length_in_day = c(300, 200,201,49), - pregnancy_outcome = c(4092289,443213,NA,4081422), + pregnancy_outcome = c(4092289,443213,0,4081422), pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), pregnancy_single = c(NA,4188540,4188540,4188539), pregnancy_marital_status = c(4338692,4338692,4338692,4053842), diff --git a/tests/testthat/test-checkOutcomeMode.R b/tests/testthat/test-checkOutcomeMode.R index a2536bc..c42c4d9 100644 --- a/tests/testthat/test-checkOutcomeMode.R +++ b/tests/testthat/test-checkOutcomeMode.R @@ -5,7 +5,7 @@ test_that("check working example 1) each count 2) adds up to total", { pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), gestational_length_in_day = c(300, 200,201,49), - pregnancy_outcome = c(4092289,443213,NA,4081422), + pregnancy_outcome = c(4092289,443213,0,4081422), pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), pregnancy_single = c(NA,4188540,4188540,4188539), pregnancy_marital_status = c(4338692,4338692,4338692,4053842), diff --git a/tests/testthat/test-executeChecks.R b/tests/testthat/test-executeChecks.R index 6d33c47..128d5ab 100644 --- a/tests/testthat/test-executeChecks.R +++ b/tests/testthat/test-executeChecks.R @@ -5,7 +5,7 @@ test_that("check working example if only mother Table is provided", { pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), gestational_length_in_day = c(300, 200,201,49), - pregnancy_outcome = c(4092289,443213,NA,4081422), + pregnancy_outcome = c(4092289,443213,0,4081422), pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), pregnancy_single = c(NA,4188540,4188540,4188539), pregnancy_marital_status = c(4338692,4338692,4338692,4053842), @@ -51,8 +51,8 @@ test_that("check working example if only mother Table is provided", { seeMotherResults <- executeChecks ( motherTable = testData, babyTable = NULL, - checks = c("overview", "missing", "gestationalAge", "outcomeMode", "fetusesLiveborn", - "fetusid"), + checks = c("overview","annualOverview","missing", "unknown","gestationalAge","datesAgeDist","outcomeMode", "fetusesLiveborn", + "fetusid","weightDist","bitSet"), minCellCount = 5, verbose = FALSE) @@ -61,7 +61,7 @@ test_that("check working example if only mother Table is provided", { expect_true(seeMotherResults[[1]][1,3]==TRUE) - + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) }) test_that("check working example if only baby Table is provided", { @@ -96,8 +96,8 @@ test_that("check working example if only baby Table is provided", { seeBabyResults <- executeChecks ( motherTable = NULL, babyTable = testData, - checks = c("overview", "missing", "gestationalAge", "outcomeMode", "fetusesLiveborn", - "fetusid"), + checks = c("overview","annualOverview","missing", "unknown","gestationalAge","datesAgeDist","outcomeMode", "fetusesLiveborn", + "fetusid","weightDist","bitSet"), minCellCount = 5, verbose = FALSE) diff --git a/tests/testthat/test-getAnnualOverview.R b/tests/testthat/test-getAnnualOverview.R new file mode 100644 index 0000000..4ae77a6 --- /dev/null +++ b/tests/testthat/test-getAnnualOverview.R @@ -0,0 +1,64 @@ + + +test_that("check working example annual number of pregnancies", { + MT<- tibble::tibble( + pregnancy_id = c("4","5","6","7"), + person_id = c("1","2","2","3"), + pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), + pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), + gestational_length_in_day = c(300, 200,201,49), + pregnancy_outcome = c(4092289,443213,0,4081422), + pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), + pregnancy_single = c(NA,4188540,4188540,4188539), + pregnancy_marital_status = c(4338692,4338692,4338692,4053842), + pregnancy_number_fetuses = c(NA,2,2,NA), + pregnancy_number_liveborn = c(3,1,1,0), + prev_pregnancy_parity = c(4012561,4102166,4012561,4012561), + prev_pregnancy_gravidity = c(9,9,10,2), + prev_livebirth_number = c(8,8,9,1), + prev_stillbirth_number = c(3,3,3,0), + prev_miscar_number = c(1,1,1,0), + prev_TOP_number = c(0,0,0,1), + prev_TOP12_number = c(9,9,9,0), + pregnancy_BMI = c(51,48,48,30), + pregnancy_folic = c(4188539,4188539,4188539,4188540), + pregnancy_TOPFA = c(4188539,4188539,4188539,4188540), + pregnancy_ART = c(4188539,4188539,4188539,4188540), + pregnancy_SMOK = c(4188539,4188539,4188539,4188540), + pregnacy_ALC = c(4188539,4188539,4188539,4188540), + pregnancy_SUBS = c(4188539,4188539,4188539,4188540), + pregnancy_outcome_source_value = c(69617,34789,20934,23948), + pregnancy_mode_delivery_source_value = c(69617,23423,23423,13204), + ) + + + + db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") + + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "MT", + MT, + overwrite = TRUE) + }) + + cdm <- CDMConnector::cdm_from_con(db, + cdm_tables = c(), + cohort_tables = c( + "MT" + )) + + testData <- cdm$MT + + seeOverview <- getAnnualOverview(testData) + + #see Overview + expect_true(seeOverview[1,2]==1) + expect_true(seeOverview[2,2]==1) + expect_true(seeOverview[3,2]==1) + expect_true(seeOverview[4,2]==1) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + diff --git a/tests/testthat/test-getBitSet.R b/tests/testthat/test-getBitSet.R new file mode 100644 index 0000000..1cfd8eb --- /dev/null +++ b/tests/testthat/test-getBitSet.R @@ -0,0 +1,178 @@ + + +test_that("check working example of bit set creation with both tables", { + MT<- tibble::tibble( + pregnancy_id = c("4","5","6","7"), + person_id = c("1","2","2","3"), + pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), + pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), + gestational_length_in_day = c(300, 200,201,49), + pregnancy_outcome = c(4092289,443213,0,4081422), + pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), + pregnancy_single = c(NA,4188540,4188540,4188539), + pregnancy_marital_status = c(4338692,4338692,4338692,4053842), + pregnancy_number_fetuses = c(NA,2,2,NA), + pregnancy_number_liveborn = c(3,1,1,0), + prev_pregnancy_parity = c(4012561,4102166,4012561,4012561), + prev_pregnancy_gravidity = c(9,9,10,2), + prev_livebirth_number = c(8,8,9,1), + prev_stillbirth_number = c(3,3,3,0), + prev_miscar_number = c(1,1,1,0), + prev_TOP_number = c(0,0,0,1), + prev_TOP12_number = c(9,9,9,0), + pregnancy_BMI = c(51,48,48,30), + pregnancy_folic = c(4188539,4188539,4188539,4188540), + pregnancy_TOPFA = c(4188539,4188539,4188539,4188540), + pregnancy_ART = c(4188539,4188539,4188539,4188540), + pregnancy_SMOK = c(4188539,4188539,4188539,4188540), + pregnacy_ALC = c(4188539,4188539,4188539,4188540), + pregnancy_SUBS = c(4188539,4188539,4188539,4188540), + pregnancy_outcome_source_value = c(69617,34789,20934,23948), + pregnancy_mode_delivery_source_value = c(69617,23423,23423,13204), + ) + + + BT <- tibble::tibble( + pregnancy_id = c("4","5","6","7"), + fetus_id = c("4","5","6","7"), + birth_outcome = c(4092289,443213,4092289,4081422), + birth_weight = c(6917,NA,2094, NA), + birth_con_malformation = c(4188540,4188540,NA,NA), + birth_SGA = c(NA,NA,4188540,NA), + birth_FGR = c(NA,4188540,NA,NA), + birth_APGAR = c(4188539,NA,NA,NA) + ) + + + db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") + + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "MT", + MT, + overwrite = TRUE) + }) + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "BT", + BT, + overwrite = TRUE) + }) + + cdm <- CDMConnector::cdm_from_con(db, + cdm_tables = c(), + cohort_tables = c( + "MT", + "BT" + )) + + seeBitSet <- getBitSet(cdm$MT,cdm$BT) + +## nothing fails + expect_true(class(seeBitSet[[1]])=="numeric") + expect_true(class(seeBitSet[[18]])=="numeric") + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) + + + +test_that("check working example of bit set creation with mother table only", { + MT<- tibble::tibble( + pregnancy_id = c("4","5","6","7"), + person_id = c("1","2","2","3"), + pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), + pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), + gestational_length_in_day = c(300, 200,201,49), + pregnancy_outcome = c(4092289,443213,0,4081422), + pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), + pregnancy_single = c(NA,4188540,4188540,4188539), + pregnancy_marital_status = c(4338692,4338692,4338692,4053842), + pregnancy_number_fetuses = c(NA,2,2,NA), + pregnancy_number_liveborn = c(3,1,1,0), + prev_pregnancy_parity = c(4012561,4102166,4012561,4012561), + prev_pregnancy_gravidity = c(9,9,10,2), + prev_livebirth_number = c(8,8,9,1), + prev_stillbirth_number = c(3,3,3,0), + prev_miscar_number = c(1,1,1,0), + prev_TOP_number = c(0,0,0,1), + prev_TOP12_number = c(9,9,9,0), + pregnancy_BMI = c(51,48,48,30), + pregnancy_folic = c(4188539,4188539,4188539,4188540), + pregnancy_TOPFA = c(4188539,4188539,4188539,4188540), + pregnancy_ART = c(4188539,4188539,4188539,4188540), + pregnancy_SMOK = c(4188539,4188539,4188539,4188540), + pregnacy_ALC = c(4188539,4188539,4188539,4188540), + pregnancy_SUBS = c(4188539,4188539,4188539,4188540), + pregnancy_outcome_source_value = c(69617,34789,20934,23948), + pregnancy_mode_delivery_source_value = c(69617,23423,23423,13204), + ) + + + + db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") + + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "MT", + MT, + overwrite = TRUE) + }) + + + cdm <- CDMConnector::cdm_from_con(db, + cdm_tables = c(), + cohort_tables = c( + "MT" + )) + + seeBitSet <- getBitSet(cdm$MT , babyTable = NULL) + + ## nothing fails + expect_true(class(seeBitSet[[1]])=="numeric") + expect_true(class(seeBitSet[[11]])=="numeric") + + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) + + + + +test_that("check working example of bit set creation with baby table only", { + + BT <- tibble::tibble( + pregnancy_id = c("4","5","6","7"), + fetus_id = c("4","5","6","7"), + birth_outcome = c(4092289,443213,4092289,4081422), + birth_weight = c(6917,NA,2094, NA), + birth_con_malformation = c(4188540,4188540,NA,NA), + birth_SGA = c(NA,NA,4188540,NA), + birth_FGR = c(NA,4188540,NA,NA), + birth_APGAR = c(4188539,NA,NA,NA) + ) + + + db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") + + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "BT", + BT, + overwrite = TRUE) + }) + + cdm <- CDMConnector::cdm_from_con(db, + cdm_tables = c(), + cohort_tables = c( + "BT" + )) + + seeBitSet <- getBitSet(motherTable = NULL,cdm$BT) + + ## nothing fails + expect_true(class(seeBitSet[[1]])=="numeric") + expect_true(class(seeBitSet[[13]])=="numeric") + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) diff --git a/tests/testthat/test-getMissings.R b/tests/testthat/test-getMissings.R index 39ed004..854daed 100644 --- a/tests/testthat/test-getMissings.R +++ b/tests/testthat/test-getMissings.R @@ -7,7 +7,7 @@ test_that("check working example 1) missing 2) Total equals fetus size", { pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), gestational_length_in_day = c(300, 200,201,49), - pregnancy_outcome = c(4092289,443213,NA,4081422), + pregnancy_outcome = c(4092289,443213,0,4081422), pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), pregnancy_single = c(NA,4188540,4188540,4188539), pregnancy_marital_status = c(4338692,4338692,4338692,4053842), @@ -53,7 +53,7 @@ test_that("check working example 1) missing 2) Total equals fetus size", { seeMissings <- getMissings(testData) #see missings - expect_true(seeMissings[6,2]==1) + expect_true(seeMissings[6,2]==0) expect_true(seeMissings[8,2]==1) expect_true(seeMissings[10,2]==2) diff --git a/tests/testthat/test-getOverview.R b/tests/testthat/test-getOverview.R index 4c293e9..3be3497 100644 --- a/tests/testthat/test-getOverview.R +++ b/tests/testthat/test-getOverview.R @@ -7,7 +7,7 @@ test_that("check working example number or rows equal number of pregnancies", { pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), gestational_length_in_day = c(300, 200,201,49), - pregnancy_outcome = c(4092289,443213,NA,4081422), + pregnancy_outcome = c(4092289,443213,0,4081422), pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), pregnancy_single = c(NA,4188540,4188540,4188539), pregnancy_marital_status = c(4338692,4338692,4338692,4053842), diff --git a/tests/testthat/test-getUnknown.R b/tests/testthat/test-getUnknown.R new file mode 100644 index 0000000..c8a5d73 --- /dev/null +++ b/tests/testthat/test-getUnknown.R @@ -0,0 +1,66 @@ + + +test_that("check working example number of unknowns", { + MT<- tibble::tibble( + pregnancy_id = c("4","5","6","7"), + person_id = c("1","2","2","3"), + pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), + pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), + gestational_length_in_day = c(300, 200,201,49), + pregnancy_outcome = c(4092289,443213,0,4081422), + pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), + pregnancy_single = c(NA,4188540,4188540,4188539), + pregnancy_marital_status = c(4338692,4338692,4338692,4053842), + pregnancy_number_fetuses = c(NA,2,2,NA), + pregnancy_number_liveborn = c(3,1,1,0), + prev_pregnancy_parity = c(4012561,4102166,4012561,4012561), + prev_pregnancy_gravidity = c(9,9,10,2), + prev_livebirth_number = c(8,8,9,1), + prev_stillbirth_number = c(3,3,3,0), + prev_miscar_number = c(1,1,1,0), + prev_TOP_number = c(0,0,0,1), + prev_TOP12_number = c(9,9,9,0), + pregnancy_BMI = c(51,48,48,30), + pregnancy_folic = c(4188539,4188539,4188539,4188540), + pregnancy_TOPFA = c(4188539,4188539,4188539,4188540), + pregnancy_ART = c(4188539,4188539,4188539,4188540), + pregnancy_SMOK = c(4188539,4188539,4188539,4188540), + pregnacy_ALC = c(4188539,4188539,4188539,4188540), + pregnancy_SUBS = c(4188539,4188539,4188539,4188540), + pregnancy_outcome_source_value = c(69617,34789,20934,23948), + pregnancy_mode_delivery_source_value = c(69617,23423,23423,13204), + ) + + + + db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") + + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "MT", + MT, + overwrite = TRUE) + }) + + cdm <- CDMConnector::cdm_from_con(db, + cdm_tables = c(), + cohort_tables = c( + "MT" + )) + + testData <- cdm$MT + + seeOverview <- getUnknown(testData) + + #see Overview + expect_true(seeOverview[1,2]==0) + expect_true(seeOverview[2,2]==0) + expect_true(seeOverview[3,2]==1) + expect_true(seeOverview[4,2]==0) + expect_true(seeOverview[5,2]==0) + + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + diff --git a/tests/testthat/test-getValueDatesAgeDist.R b/tests/testthat/test-getValueDatesAgeDist.R new file mode 100644 index 0000000..420e6cb --- /dev/null +++ b/tests/testthat/test-getValueDatesAgeDist.R @@ -0,0 +1,67 @@ + + +test_that("check working example date and Gestage distribution", { + MT<- tibble::tibble( + pregnancy_id = c("4","5","6","7"), + person_id = c("1","2","2","3"), + pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), + pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), + gestational_length_in_day = c(300, 200,201,49), + pregnancy_outcome = c(4092289,443213,0,4081422), + pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), + pregnancy_single = c(NA,4188540,4188540,4188539), + pregnancy_marital_status = c(4338692,4338692,4338692,4053842), + pregnancy_number_fetuses = c(NA,2,2,NA), + pregnancy_number_liveborn = c(3,1,1,0), + prev_pregnancy_parity = c(4012561,4102166,4012561,4012561), + prev_pregnancy_gravidity = c(9,9,10,2), + prev_livebirth_number = c(8,8,9,1), + prev_stillbirth_number = c(3,3,3,0), + prev_miscar_number = c(1,1,1,0), + prev_TOP_number = c(0,0,0,1), + prev_TOP12_number = c(9,9,9,0), + pregnancy_BMI = c(51,48,48,30), + pregnancy_folic = c(4188539,4188539,4188539,4188540), + pregnancy_TOPFA = c(4188539,4188539,4188539,4188540), + pregnancy_ART = c(4188539,4188539,4188539,4188540), + pregnancy_SMOK = c(4188539,4188539,4188539,4188540), + pregnacy_ALC = c(4188539,4188539,4188539,4188540), + pregnancy_SUBS = c(4188539,4188539,4188539,4188540), + pregnancy_outcome_source_value = c(69617,34789,20934,23948), + pregnancy_mode_delivery_source_value = c(69617,23423,23423,13204), + ) + + + + db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") + + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "MT", + MT, + overwrite = TRUE) + }) + + cdm <- CDMConnector::cdm_from_con(db, + cdm_tables = c(), + cohort_tables = c( + "MT" + )) + + testData <- cdm$MT + + seeOverview <- getValueDatesAgeDist(testData) + + #see Overview + expect_true(seeOverview[1,2]=="2010-01-12") + expect_true(seeOverview[2,2]=="2015-07-22") + expect_true(seeOverview[3,2]=="2010-03-02") + expect_true(seeOverview[4,2]=="2016-02-07") + expect_true(seeOverview[5,2]=="49") + expect_true(seeOverview[13,2]=="300") + + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + diff --git a/tests/testthat/test-getValueWeightDist.R b/tests/testthat/test-getValueWeightDist.R new file mode 100644 index 0000000..039148b --- /dev/null +++ b/tests/testthat/test-getValueWeightDist.R @@ -0,0 +1,39 @@ +test_that("check working example birth weight distribution", { + BT <- tibble::tibble( + pregnancy_id = c("4","5","6","7"), + fetus_id = c("4","5","6","7"), + birth_outcome = c(4092289,443213,4092289,4081422), + birth_weight = c(6917,NA,2094, NA), + birth_con_malformation = c(4188540,4188540,NA,NA), + birth_SGA = c(NA,NA,4188540,NA), + birth_FGR = c(NA,4188540,NA,NA), + birth_APGAR = c(4188539,NA,NA,NA) + ) + + + db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") + + + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "BT", + BT, + overwrite = TRUE) + }) + + cdm <- CDMConnector::cdm_from_con(db, + cdm_tables = c(), + cohort_tables = c( + "BT" + )) + + seeWeightDist <- getValueWeightDist(cdm$BT) + + #check the values + expect_true(seeWeightDist[1,2]==2094) + expect_true(seeWeightDist[9,2]==6917) + + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) +}) + + diff --git a/tests/testthat/test-summariseGestationalAge.R b/tests/testthat/test-summariseGestationalAge.R index f1ddbaf..6f631c5 100644 --- a/tests/testthat/test-summariseGestationalAge.R +++ b/tests/testthat/test-summariseGestationalAge.R @@ -5,7 +5,7 @@ test_that("check working example 1) each count 2) adds up to total", { pregnancy_start_date = c(as.Date("2012-10-15"),as.Date("2013-07-22"),as.Date("2015-07-22"),as.Date("2010-01-12")), pregnancy_end_date = c(as.Date("2013-06-22"),as.Date("2014-02-07"),as.Date("2016-02-07"),as.Date("2010-03-02")), gestational_length_in_day = c(300, 200,201,49), - pregnancy_outcome = c(4092289,443213,NA,4081422), + pregnancy_outcome = c(4092289,443213,0,4081422), pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), pregnancy_single = c(NA,4188540,4188540,4188539), pregnancy_marital_status = c(4338692,4338692,4338692,4053842), From 33e2d60d926cbedfc305757709d689bb7e9bf5f7 Mon Sep 17 00:00:00 2001 From: tiozab Date: Fri, 23 Dec 2022 14:01:14 +0000 Subject: [PATCH 2/2] more checks added --- .gitignore | 1 + R/checkFetusId.R | 15 +++-- R/executeChecks.R | 1 + R/getAnnualOverview.R | 3 +- R/mockDB.R | 4 +- R/summariseGestationalAge.R | 2 +- README.Rmd | 7 ++- README.md | 65 +++++++++++-------- man/checkFetusId.Rd | 2 +- man/getAnnualOverview.Rd | 2 +- tests/testthat/test-checkFetusesLiveborn.R | 6 +- vignettes/Introduction.Rmd | 22 +++---- vignettes/Summary.Rmd | 73 ++++++++++++++++++---- 13 files changed, 134 insertions(+), 69 deletions(-) diff --git a/.gitignore b/.gitignore index e67f93b..06140b5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .Rproj.user inst/doc +.Rhistory diff --git a/R/checkFetusId.R b/R/checkFetusId.R index 0ccb119..ce90857 100644 --- a/R/checkFetusId.R +++ b/R/checkFetusId.R @@ -3,7 +3,7 @@ #' @param motherTable is the motherTable #' @param babyTable is the babyTable #' -#' @return returns a tale with the fetuses checks +#' @return returns a table with the fetuses checks #' @export #' #' @examples @@ -35,11 +35,14 @@ records<- recordshelp %>% dplyr::left_join((recordshelp %>% dplyr::group_by(.dat #add fetus count to pregnancy recordshelp <- NULL + records<- records %>% dplyr::mutate( - single_not_align_with_noOfFetusId = dplyr::if_else((.data$n > 1 & .data$pregnancy_single == 4188539) | (.data$n == 1 & .data$pregnancy_single == 4188540),1,0,missing = NULL), + single_not_align_with_noOfFetusId = ifelse(.data$pregnancy_single !=0 ,dplyr::if_else( + (.data$n > 1 & .data$pregnancy_single == 4188539) | (.data$n == 1 & .data$pregnancy_single == 4188540),1,0,missing = NULL),NA), - single_align_with_noOfFetusId = dplyr::if_else((.data$n > 1 & .data$pregnancy_single == 4188540) | (.data$n == 1 & .data$pregnancy_single == 4188539) ,1,0,missing = NULL), + single_align_with_noOfFetusId = ifelse(.data$pregnancy_single !=0 , dplyr::if_else( + (.data$n > 1 & .data$pregnancy_single == 4188540) | (.data$n == 1 & .data$pregnancy_single == 4188539),1,0,missing = NULL),NA), noOfFetus_not_align_with_noOfFetusId = dplyr::if_else((.data$pregnancy_number_fetuses != .data$n ),1,0,missing = NULL), @@ -56,13 +59,13 @@ records_n <- records %>% single_align_with_noOfFetusId = sum(.data$single_align_with_noOfFetusId, na.rm = T), - missing_single = sum(is.na(.data$pregnancy_single)), #n cannot be missing + missingUnknown_single = sum(is.na(.data$pregnancy_single)), noOfFetus_not_align_with_noOfFetusId = sum(.data$noOfFetus_not_align_with_noOfFetusId, na.rm = T), noOfFetus_align_with_noOfFetusId = sum(.data$noOfFetus_align_with_noOfFetusId, na.rm = T), - missing_noOfFetus = sum(is.na(.data$pregnancy_number_fetuses)) #n cannot be missing + missing_noOfFetus = sum(is.na(.data$pregnancy_number_fetuses)) ) @@ -74,7 +77,7 @@ records_prop <- records_n %>% single_align_with_noOfFetusId = round(.data$single_align_with_noOfFetusId /nrow(tibble::as_tibble(motherTable)),3)*100, - missing_single = round(.data$missing_single /nrow(tibble::as_tibble(motherTable)),3)*100, + missingUnknown_single = round(.data$missingUnknown_single /nrow(tibble::as_tibble(motherTable)),3)*100, noOfFetus_not_align_with_noOfFetusId = round(.data$noOfFetus_not_align_with_noOfFetusId / nrow(tibble::as_tibble(motherTable)),3)*100, diff --git a/R/executeChecks.R b/R/executeChecks.R index 34e9113..15decc7 100644 --- a/R/executeChecks.R +++ b/R/executeChecks.R @@ -146,6 +146,7 @@ executeChecks <- function(#cdm, } + if ("weightDist" %in% checks) { if (verbose == TRUE) { start <- printDurationAndMessage("Progress: check values of birthweight", start) diff --git a/R/getAnnualOverview.R b/R/getAnnualOverview.R index 4e8fd13..3c71898 100644 --- a/R/getAnnualOverview.R +++ b/R/getAnnualOverview.R @@ -1,8 +1,9 @@ + #' Title #' #' @param motherTable is the motherTable #' -#' @return returns a table with the number of annual pregnancies +#' @return a table which shows the number of pregnancies per year #' @export #' #' @examples diff --git a/R/mockDB.R b/R/mockDB.R index c626fed..63a9842 100644 --- a/R/mockDB.R +++ b/R/mockDB.R @@ -62,11 +62,12 @@ mockPregnancy <- function(motherTable = NULL, 443213, 4067106, 4081422, + 4095714, 0 ), pregnancy_size, replace = TRUE, - prob = c(0.7,0.1,0.1,0.05,0.05) + prob = c(0.65,0.1,0.1,0.05,0.05,0.05) ) #assign pregnancy outcome to each pregnancy pregnancy_mode_delivery <- sample( @@ -89,6 +90,7 @@ mockPregnancy <- function(motherTable = NULL, 4053842, 4338692, 4242253, + 4095714, NA ), pregnancy_size, diff --git a/R/summariseGestationalAge.R b/R/summariseGestationalAge.R index f6a0d68..9472cb2 100644 --- a/R/summariseGestationalAge.R +++ b/R/summariseGestationalAge.R @@ -39,7 +39,7 @@ summariseGestationalAge <- function( different_gestationalAge = sum(.data$n, na.rm = T), match_gestationalAge = sum(.data$n == 0, na.rm = T), -# there should not be NAs +# there should not be NAs, there is no space for unknowns "zeros" missing_information = sum(is.na(.data$n)), endBeforeStart = sum(.data$endBeforeStart, na.rm =T), diff --git a/README.Rmd b/README.Rmd index dae1db5..08ee3d8 100644 --- a/README.Rmd +++ b/README.Rmd @@ -56,15 +56,16 @@ head(cdm$babyTable) ## Execute the diagnostic checks of your table(s) ### if both tables are available, all checks are possible -### if only the motherTable is available, the "fetusid" check is not possible, put babyTable = NULL -### if only the babyTable is available, only the "overview" and "missing" check is possible, put motherTable = NULL +### if only the motherTable is available, the "fetusid" and "weightDist" check is not possible, put babyTable = NULL +### if only the babyTable is available, only the "overview", "missing", "weightDist", and "bitSet" check is possible, put motherTable = NULL ```{r} resultList <- executeChecks ( motherTable = cdm$motherTable, babyTable = cdm$babyTable, - checks = c("overview", "missing", "gestationalAge", "outcomeMode","fetusesLiveborn","fetusid"), + checks = c("overview","annualOverview","missing", "unknown","gestationalAge","datesAgeDist","outcomeMode", + "fetusesLiveborn","fetusid","weightDist","bitSet"), minCellCount = 5, verbose = FALSE) ``` diff --git a/README.md b/README.md index 16d0133..373c817 100644 --- a/README.md +++ b/README.md @@ -56,50 +56,52 @@ cdm<-mockPregnancy(motherTable = NULL, # this is what the table(s) look like # use the motherTable and/or the babyTable depending on your data head(cdm$motherTable) -#> # Source: SQL [6 x 16] +#> # Source: SQL [6 x 27] #> # Database: DuckDB 0.5.1 [tburkard@Windows 10 x64:R 4.2.1/:memory:] #> pregna…¹ perso…² pregnanc…³ pregnanc…⁴ gesta…⁵ pregn…⁶ pregn…⁷ pregn…⁸ pregn…⁹ #> -#> 1 1 68 2010-12-05 2011-02-01 58 4092289 4015701 4188539 NA -#> 2 2 39 2004-09-01 2005-02-21 173 4081422 0 4188540 4242253 -#> 3 3 1 2019-10-03 2020-01-19 108 4092289 0 4188540 4053842 -#> 4 4 34 2001-05-15 2002-01-08 238 443213 0 4188539 4338692 -#> 5 5 87 2010-05-15 2011-01-14 244 4092289 4125611 4188540 4242253 -#> 6 6 43 2018-03-21 2018-08-13 145 443213 0 4188539 4242253 -#> # … with 7 more variables: pregnancy_number_fetuses , -#> # pregnancy_number_liveborn , prev_pregnancy_parity , -#> # pregnancy_BMI , pregnancy_outcome_source_value , -#> # pregnancy_mode_delivery_source_value , pregnancy_folic , and -#> # abbreviated variable names ¹​pregnancy_id, ²​person_id, -#> # ³​pregnancy_start_date, ⁴​pregnancy_end_date, ⁵​gestational_length_in_day, -#> # ⁶​pregnancy_outcome, ⁷​pregnancy_mode_delivery, ⁸​pregnancy_single, … +#> 1 1 68 2010-12-05 2011-02-01 58 4092289 4015701 4188539 4095714 +#> 2 2 39 2004-09-01 2005-02-21 173 4081422 0 4188540 4053842 +#> 3 3 1 2019-10-03 2020-01-19 108 4092289 0 4188540 4242253 +#> 4 4 34 2001-05-15 2002-01-08 238 443213 0 4188539 4095714 +#> 5 5 87 2010-05-15 2011-01-14 244 4092289 4125611 4188540 4095714 +#> 6 6 43 2018-03-21 2018-08-13 145 0 0 4188539 4338692 +#> # … with 18 more variables: pregnancy_number_fetuses , +#> # pregnancy_number_liveborn , prev_pregnancy_gravidity , +#> # prev_livebirth_number , prev_stillbirth_number , +#> # prev_miscar_number , prev_TOP_number , prev_TOP12_number , +#> # prev_pregnancy_parity , pregnancy_BMI , +#> # pregnancy_outcome_source_value , +#> # pregnancy_mode_delivery_source_value , pregnancy_folic , … head(cdm$babyTable) -#> # Source: SQL [6 x 6] +#> # Source: SQL [6 x 8] #> # Database: DuckDB 0.5.1 [tburkard@Windows 10 x64:R 4.2.1/:memory:] -#> pregnancy_id fetus_id birth_outcome birth_weight birth_con_malformat…¹ birth…² -#> -#> 1 1 1 4092289 2447 4188540 0 -#> 2 2 2 4092289 1827 4188540 9 -#> 3 3 3 4092289 1056 4188539 0 -#> 4 4 4 NA 786 4188539 2 -#> 5 5 5 4092289 3716 4188540 9 -#> 6 6 6 443213 4201 4188540 6 -#> # … with abbreviated variable names ¹​birth_con_malformation, ²​birth_APGAR +#> pregnancy_id fetus_id birth_outcome birth_we…¹ birth…² birth…³ birth…⁴ birth…⁵ +#> +#> 1 1 1 4092289 2447 4188540 4188539 4188540 6 +#> 2 2 2 4092289 1827 4188540 4188540 4188539 10 +#> 3 3 3 4092289 1056 4188539 4188539 4188539 0 +#> 4 4 4 NA 786 4188539 4188539 4188539 10 +#> 5 5 5 4092289 3716 4188540 4188539 4188539 8 +#> 6 6 6 443213 4201 4188540 4188540 4188539 7 +#> # … with abbreviated variable names ¹​birth_weight, ²​birth_con_malformation, +#> # ³​birth_SGA, ⁴​birth_FGR, ⁵​birth_APGAR ``` ## Execute the diagnostic checks of your table(s) ### if both tables are available, all checks are possible -### if only the motherTable is available, the “fetusid” check is not possible, put babyTable = NULL +### if only the motherTable is available, the “fetusid” and “weightDist” check is not possible, put babyTable = NULL -### if only the babyTable is available, only the “overview” and “missing” check is possible, put motherTable = NULL +### if only the babyTable is available, only the “overview”, “missing”, “weightDist”, and “bitSet” check is possible, put motherTable = NULL ``` r resultList <- executeChecks ( motherTable = cdm$motherTable, babyTable = cdm$babyTable, - checks = c("overview", "missing", "gestationalAge", "outcomeMode","fetusesLiveborn","fetusid"), + checks = c("overview","annualOverview","missing", "unknown","gestationalAge","datesAgeDist","outcomeMode", + "fetusesLiveborn","fetusid","weightDist","bitSet"), minCellCount = 5, verbose = FALSE) #> No id variables; using all as measure variables @@ -114,8 +116,17 @@ resultList <- executeChecks ( #> No id variables; using all as measure variables #> No id variables; using all as measure variables #> No id variables; using all as measure variables +#> Warning: attributes are not identical across measure variables; they will be +#> dropped #> No id variables; using all as measure variables #> No id variables; using all as measure variables +#> No id variables; using all as measure variables +#> No id variables; using all as measure variables +#> No id variables; using all as measure variables +#> No id variables; using all as measure variables +#> No id variables; using all as measure variables +#> Warning: attributes are not identical across measure variables; they will be +#> dropped ``` ## Exporting results diff --git a/man/checkFetusId.Rd b/man/checkFetusId.Rd index 7d9ab3d..2a41f1a 100644 --- a/man/checkFetusId.Rd +++ b/man/checkFetusId.Rd @@ -12,7 +12,7 @@ checkFetusId(motherTable, babyTable) \item{babyTable}{is the babyTable} } \value{ -returns a tale with the fetuses checks +returns a table with the fetuses checks } \description{ Title diff --git a/man/getAnnualOverview.Rd b/man/getAnnualOverview.Rd index d2b7881..3fc0ada 100644 --- a/man/getAnnualOverview.Rd +++ b/man/getAnnualOverview.Rd @@ -10,7 +10,7 @@ getAnnualOverview(motherTable) \item{motherTable}{is the motherTable} } \value{ -returns a table with the number of annual pregnancies +a table which shows the number of pregnancies per year } \description{ Title diff --git a/tests/testthat/test-checkFetusesLiveborn.R b/tests/testthat/test-checkFetusesLiveborn.R index fec0375..d79d43b 100644 --- a/tests/testthat/test-checkFetusesLiveborn.R +++ b/tests/testthat/test-checkFetusesLiveborn.R @@ -7,7 +7,7 @@ test_that("check working example 1) each count 2) adds up to total", { gestational_length_in_day = c(300, 200,201,49), pregnancy_outcome = c(4092289,443213,0,4081422), pregnancy_mode_delivery = c(4015701,4125611,4125611,4125611), - pregnancy_single = c(NA,4188540,4188540,4188539), + pregnancy_single = c(NA,4188540,4188539,4188539), pregnancy_marital_status = c(4338692,4338692,4338692,4053842), pregnancy_number_fetuses = c(NA,2,2,NA), pregnancy_number_liveborn = c(3,1,1,0), @@ -53,8 +53,8 @@ test_that("check working example 1) each count 2) adds up to total", { expect_true(seeFetLive[1,2]==0) expect_true(seeFetLive[2,2]==2) expect_true(seeFetLive[3,2]==2) - expect_true(seeFetLive[4,2]==0) - expect_true(seeFetLive[5,2]==2) + expect_true(seeFetLive[4,2]==1) + expect_true(seeFetLive[5,2]==1) expect_true(seeFetLive[6,2]==2) #check that all counts add up to the Total diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd index fe1c909..6440fb6 100644 --- a/vignettes/Introduction.Rmd +++ b/vignettes/Introduction.Rmd @@ -21,8 +21,7 @@ library(dplyr) library(DT) ``` -First, connect to the database. -#question on whether to use the mock of not (wait Ed) +First, connect to the database. #question on whether to use the mock of not (wait Ed) ```{r} cdm <- PETDiagnostics:::mockPregnancy(motherTable = NULL, @@ -34,22 +33,18 @@ cdm <- PETDiagnostics:::mockPregnancy(motherTable = NULL, We can run all available checks at the same time using the ´executeChecks()´ function. This will return a list which contains the results of each check. - ```{r eval=FALSE} executeChecks ( motherTable = cdm$motherTable, babyTable = cdm$babyTable, - checks = c("overview", "missing", "gestationalAge", "outcomeMode","fetusesLiveborn","fetusid"), + checks = c("overview","annualOverview","missing", "unknown","gestationalAge","datesAgeDist","outcomeMode", + "fetusesLiveborn","fetusid","weightDist","bitSet"), minCellCount = 5, verbose = FALSE) ``` -The`cdm` is the database reference of the OMOP CDM using the `CDMConnector` package. -The `motherTable` is the `motherTable` in the CDM. -The `babyTable` is the `babyTable` in the CDM. -`checks` allows to select the checks to be executed, by default, all the checks will be run. -The `minCellCount` is minimum number of events to report, numbers lower than this will be obscured. The number zero will not be obscured - +The`cdm` is the database reference of the OMOP CDM using the `CDMConnector` package.\ +The `motherTable` is the `motherTable` in the CDM. The `babyTable` is the `babyTable` in the CDM. `checks` allows to select the checks to be executed, by default, all the checks will be run. The `minCellCount` is minimum number of events to report, numbers lower than this will be obscured. The number zero will not be obscured ```{r executeChecks} resultList<-executeChecks(motherTable = cdm$motherTable, @@ -57,28 +52,27 @@ resultList<-executeChecks(motherTable = cdm$motherTable, ``` We can then check what results available from ´executeChecks()´ by + ```{r} names(resultList) ``` Let's take a look at the results. the missingSummaryMother contains all the variables, their count of missings, and the proportion in relation to all pregnancies recorded in the motherTable. - -```{r, message=FALSE } +```{r, message=FALSE } DT::datatable(resultList$missingSummaryMother, rownames = FALSE ) ``` +After running the checks, we can write the CSV files into a zip file to disk using the `writeResultToDisk()` function. -After running the checks, we can write the CSV files into a zip file to disk using the `writeResultToDisk()` function. ```{r eval=FALSE} writeResultToDisk(resultList=resultList, databaseId = "your_database_id", outputFolder = tempdir()) ``` - ```{r, echo=FALSE} DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) ``` diff --git a/vignettes/Summary.Rmd b/vignettes/Summary.Rmd index 6b93b43..e82ed4f 100644 --- a/vignettes/Summary.Rmd +++ b/vignettes/Summary.Rmd @@ -31,21 +31,29 @@ cdm <- PETDiagnostics::mockPregnancy(motherTable = NULL, ## How many women and pregnancies are there? ```{r} -PETOverviewMother <- getOverview(cdm$motherTable) +PETOverviewMother <- PETDiagnostics::getOverview(cdm$motherTable) DT::datatable(PETOverviewMother) ``` + ## How many pregnancies and fetuses are there? ```{r} -PETOverviewBaby <- getOverview(cdm$babyTable) +PETOverviewBaby <- PETDiagnostics::getOverview(cdm$babyTable) DT::datatable(PETOverviewBaby) ``` + +## How many pregnancies are there per year? +```{r} +AnnualPETOverviewMother <- PETDiagnostics::getAnnualOverview(cdm$motherTable) +DT::datatable(AnnualPETOverviewMother) +``` + ## How many missing values are there in the motherTable? The first 8 variables (until pregnancy_single) should not have any missings because they are required variables, i.e. any unknown value is mapped to zero, any observations with missing values are not considered ```{r} -missingSummaryMother <- getMissings(cdm$motherTable) +missingSummaryMother <- PETDiagnostics::getMissings(cdm$motherTable) DT::datatable(missingSummaryMother) ``` @@ -54,10 +62,18 @@ DT::datatable(missingSummaryMother) The first 2 variables (until fetus_id) should not have any missings because they are required variables, i.e. any unknown value is mapped to zero, any observations with missing values are not considered ```{r} -missingSummaryBaby <- getMissings(cdm$babyTable) +missingSummaryBaby <- PETDiagnostics::getMissings(cdm$babyTable) DT::datatable(missingSummaryBaby) ``` +## How many unknown values are there in the motherTable? + +```{r} +unknownSummaryMother <- PETDiagnostics::getUnknown(cdm$motherTable) +DT::datatable(unknownSummaryMother) +``` + + ## How do dates and gestational age add up in the motherTable? @@ -68,20 +84,26 @@ DT::datatable(missingSummaryBaby) ### "end after start" counts the occasions in which the pregnancy end date happened after the start date ```{r} -gestationalAgeMatch <- summariseGestationalAge(cdm$motherTable) +gestationalAgeMatch <- PETDiagnostics::summariseGestationalAge(cdm$motherTable) DT::datatable(gestationalAgeMatch) ``` +## What is the minimum and maximum of pregnancy start and end dates as well as the distribution of gestational age ? + +```{r} +valueDatesAgeDist <- PETDiagnostics::getValueDatesAgeDist(cdm$motherTable) +DT::datatable(valueDatesAgeDist) +``` ## How do outcome and mode of delivery add up in the motherTable? ### "no match" means that a elective termination or miscarriage has a vaginal or c-section delivery -### a "match" means that a livebirth or stillbirth has a vaginal or c-section delivery -### "missing information" counts the missing information in either pregnancy outcome or mode of delivery (there should not be any missings) +### a "match" means that a livebirth, stillbirth, or mixed outcome has a vaginal or c-section delivery +### "missing/unknown information" counts the missing/unknown information in either pregnancy outcome or mode of delivery (there should not be any missings) ```{r} -outcomeModeMatch <- checkOutcomeMode(cdm$motherTable) +outcomeModeMatch <- PETDiagnostics::checkOutcomeMode(cdm$motherTable) DT::datatable(outcomeModeMatch) ``` ## How do number of fetuses and liveborns add up in the motherTable? @@ -94,24 +116,53 @@ DT::datatable(outcomeModeMatch) ### "missing multiple" counts the missing information in either pregnancy single or pregnancy number fetus ```{r} -fetusesLivebornNumber <- checkFetusesLiveborn(cdm$motherTable) +fetusesLivebornNumber <- PETDiagnostics::checkFetusesLiveborn(cdm$motherTable) DT::datatable(fetusesLivebornNumber) ``` ## How do number of fetuses and liveborns add up in the motherTable? ### "single_not_align_with_noOfFetusId" means the variable pregnancy single does not align with the number of fetuses in the babyTable ### "single_align_with_noOfFetusId" means the variable pregnancy single is YES and the number of fetuses in the babyTable is 1 -### "missing_single" counts the missing information in pregnancy single or pregnancy number fetus +### "missing_single" counts the missing/unknown information in pregnancy single or pregnancy number fetus ### "noOfFetus_not_align_with_noOfFetusId" means the variable pregnancy number fetus does not align with the number of fetuses in the babyTable ### "noOfFetus_align_with_noOfFetusId" means the variable pregnancy number fetus align with the number of fetuses in the babyTable ### "missing_noOfFetus" counts the missing information in pregnancy number fetus ```{r} -fetusIdMatch <- checkFetusId(cdm$motherTable,cdm$babyTable) +fetusIdMatch <- PETDiagnostics::checkFetusId(cdm$motherTable,cdm$babyTable) DT::datatable(fetusIdMatch) ``` +## How do number of fetuses and liveborns add up in the motherTable? + +```{r} +valueWeightDist <- PETDiagnostics::getValueWeightDist(cdm$babyTable) +DT::datatable(valueWeightDist) +``` + +## How does the missing/unknown data pattern of both tables, the motherTable and the babyTable look like? + +```{r} +bitSetOverviewAll <- PETDiagnostics::getBitSet(cdm$motherTable,cdm$babyTable) +DT::datatable(bitSetOverviewAll) +``` + + +## How does the missing/unknown data pattern of the motherTable look like? + +```{r} +bitSetOverviewMother <- PETDiagnostics::getBitSet(cdm$motherTable,babyTable=NULL) +DT::datatable(bitSetOverviewMother) +``` + + +## How does the missing/unknown data pattern of the babyTable look like? + +```{r} +bitSetOverviewBaby <- PETDiagnostics::getBitSet(motherTable=NULL,cdm$babyTable) +DT::datatable(bitSetOverviewBaby) +``` ```{r, echo=FALSE}