From 36ca245e3d304f838c8ee8550122640dba75300b Mon Sep 17 00:00:00 2001 From: Keith Goldfeld Date: Wed, 29 Dec 2021 17:13:42 -0500 Subject: [PATCH 1/9] Adding defRepeat functions --- NAMESPACE | 2 + R/define_data.R | 137 +++++++++++++++++++++++++++++++++++++++++++- man/defRepeat.Rd | 55 ++++++++++++++++++ man/defRepeatAdd.Rd | 57 ++++++++++++++++++ 4 files changed, 250 insertions(+), 1 deletion(-) create mode 100644 man/defRepeat.Rd create mode 100644 man/defRepeatAdd.Rd diff --git a/NAMESPACE b/NAMESPACE index 7bebdb05..2bbc271b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,8 @@ export(defMiss) export(defRead) export(defReadAdd) export(defReadCond) +export(defRepeat) +export(defRepeatAdd) export(defSurv) export(delColumns) export(gammaGetShapeRate) diff --git a/R/define_data.R b/R/define_data.R index c4e1471e..0ff90acd 100644 --- a/R/define_data.R +++ b/R/define_data.R @@ -216,6 +216,141 @@ defDataAdd <- function(dtDefs = NULL, return(defNew[]) } +#' Add multiple (similar) rows to definitions table +#' +#' @param dtDefs Definition data.table to be modified +#' @param nvars Number of new variables to define +#' @param prefix Prefix (character) for new variables +#' @param formula An R expression for mean (string) +#' @param variance Number or formula +#' @param dist Distribution. For possibilities, see details +#' @param link The link function for the mean, see details +#' @param id A string indicating the field name for the unique record identifier +#' @return A data.table named dtName that is an updated data definitions table +#' @seealso [distributions] +#' @details The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. +#' +#' @examples +#' def <- defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") +#' def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign") +#' def <- defRepeat(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") +#' def <- defData(def, "y", formula = "0.10", dist = "binary") +#' +#' def +#' @export +#' @concept define_data +defRepeat <- function(dtDefs = NULL, + nvars, + prefix, + formula, + variance = 0, + dist = "normal", + link = "identity", + id = "id") { + + #### Check that arguments have been passed + + if (missing(nvars)) stop("argument 'nvars' is missing", call. = FALSE) + if (missing(prefix)) stop("argument 'prefix' is missing", call. = FALSE) + if (missing(formula)) stop("argument 'formula' is missing", call. = FALSE) + + #### No missing arguments + + varnames <- paste0(prefix, 1 : nvars) + + if (is.null(dtDefs)) { + + defNew <- defData(varname = varnames[1], formula = formula, + variance = variance, dist=dist, link = link, id = id) + + for (i in (2:nvars) ) { + defNew <- defData(defNew, varname = varnames[i], + formula = formula, variance = variance, + dist=dist, link = link, id = id) + } + + } else { + + defNew <- data.table::copy(dtDefs) + + for (i in 1:nvars) { + defNew <- defData(defNew, varname = varnames[i], + formula = formula, variance = variance, + dist=dist, link = link, id = id) + } + } + + return(defNew[]) +} + +#' Add multiple (similar) rows to definitions table that will be used to add data to an +#' existing data.table +#' +#' @param dtDefs Definition data.table to be modified +#' @param nvars Number of new variables to define +#' @param prefix Prefix (character) for new variables +#' @param formula An R expression for mean (string) +#' @param variance Number or formula +#' @param dist Distribution. For possibilities, see details +#' @param link The link function for the mean, see details +#' @param id A string indicating the field name for the unique record identifier +#' @return A data.table named dtName that is an updated data definitions table +#' @seealso [distributions] +#' @details The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. +#' +#' @examples +#' def <- defRepeatAdd(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") +#' def <- defDataAdd(def, varname = "a", formula = "1;1", dist = "trtAssign") +#' def <- defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") +#' def <- defDataAdd(def, "y", formula = "0.10", dist = "binary") +#' +#' def +#' @export +#' @concept define_data +defRepeatAdd <- function(dtDefs = NULL, + nvars, + prefix, + formula, + variance = 0, + dist = "normal", + link = "identity", + id = "id") { + + #### Check that arguments have been passed + + if (missing(nvars)) stop("argument 'nvars' is missing", call. = FALSE) + if (missing(prefix)) stop("argument 'prefix' is missing", call. = FALSE) + if (missing(formula)) stop("argument 'formula' is missing", call. = FALSE) + + #### No missing arguments + + varnames <- paste0(prefix, 1 : nvars) + + if (is.null(dtDefs)) { + + defNew <- defDataAdd(varname = varnames[1], formula = formula, + variance = variance, dist=dist, link = link) + + for (i in (2:nvars) ) { + defNew <- defDataAdd(defNew, varname = varnames[i], + formula = formula, variance = variance, + dist=dist, link = link) + } + + } else { + + defNew <- data.table::copy(dtDefs) + + for (i in 1:nvars) { + defNew <- defDataAdd(defNew, varname = varnames[i], + formula = formula, variance = variance, + dist=dist, link = link) + } + } + + return(defNew[]) +} + #' Read external csv data set definitions #' #' @param filen String file name, including full path. Must be a csv file. @@ -651,7 +786,7 @@ defSurv <- function(dtDefs = NULL, #' Check uniform formula #' -#' @description Unifom formulas must be of the form "min;max" +#' @description Uniform formulas must be of the form "min;max" #' @param formula Formula as string. #' @return Invisible, error if formula not valid. #' @seealso distributions diff --git a/man/defRepeat.Rd b/man/defRepeat.Rd new file mode 100644 index 00000000..a88648b3 --- /dev/null +++ b/man/defRepeat.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/define_data.R +\name{defRepeat} +\alias{defRepeat} +\title{Add multiple (similar) rows to definitions table} +\usage{ +defRepeat( + dtDefs = NULL, + nvars, + prefix, + formula, + variance = 0, + dist = "normal", + link = "identity", + id = "id" +) +} +\arguments{ +\item{dtDefs}{Definition data.table to be modified} + +\item{nvars}{Number of new variables to define} + +\item{prefix}{Prefix (character) for new variables} + +\item{formula}{An R expression for mean (string)} + +\item{variance}{Number or formula} + +\item{dist}{Distribution. For possibilities, see details} + +\item{link}{The link function for the mean, see details} + +\item{id}{A string indicating the field name for the unique record identifier} +} +\value{ +A data.table named dtName that is an updated data definitions table +} +\description{ +Add multiple (similar) rows to definitions table +} +\details{ +The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. +} +\examples{ +def <- defRepeatAdd(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") +def <- defDataAdd(def, varname = "a", formula = "1;1", dist = "trtAssign") +def <- defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") +def <- defDataAdd(def, "y", formula = "0.10", dist = "binary") + +def +} +\seealso{ +[distributions] +} +\concept{define_data} diff --git a/man/defRepeatAdd.Rd b/man/defRepeatAdd.Rd new file mode 100644 index 00000000..3d6eca58 --- /dev/null +++ b/man/defRepeatAdd.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/define_data.R +\name{defRepeatAdd} +\alias{defRepeatAdd} +\title{Add multiple (similar) rows to definitions table that will be used to add data to an +existing data.table} +\usage{ +defRepeatAdd( + dtDefs = NULL, + nvars, + prefix, + formula, + variance = 0, + dist = "normal", + link = "identity", + id = "id" +) +} +\arguments{ +\item{dtDefs}{Definition data.table to be modified} + +\item{nvars}{Number of new variables to define} + +\item{prefix}{Prefix (character) for new variables} + +\item{formula}{An R expression for mean (string)} + +\item{variance}{Number or formula} + +\item{dist}{Distribution. For possibilities, see details} + +\item{link}{The link function for the mean, see details} + +\item{id}{A string indicating the field name for the unique record identifier} +} +\value{ +A data.table named dtName that is an updated data definitions table +} +\description{ +Add multiple (similar) rows to definitions table that will be used to add data to an +existing data.table +} +\details{ +The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. +} +\examples{ +def <- defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") +def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign") +def <- defRepeat(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") +def <- defData(def, "y", formula = "0.10", dist = "binary") + +def +} +\seealso{ +[distributions] +} +\concept{define_data} From 24600702a02891c1a1b12b7fee7ca4f84264443b Mon Sep 17 00:00:00 2001 From: Keith Goldfeld Date: Thu, 13 Jan 2022 14:49:28 -0500 Subject: [PATCH 2/9] Adding tests --- tests/testthat/test-add_data.R | 22 +++++++++++++++++++++- tests/testthat/test-define_data.R | 20 +++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-add_data.R b/tests/testthat/test-add_data.R index 6fc3734f..2b5220c9 100644 --- a/tests/testthat/test-add_data.R +++ b/tests/testthat/test-add_data.R @@ -31,4 +31,24 @@ test_that("addColumns works.", { def2 <- defDataAdd(varname = "y", formula = "2.3 * (1/x)", dist = "normal") expect_silent(addColumns(def2, dt)) -}) \ No newline at end of file +}) + +test_that("defRepeatAdd works", { + expect_silent( + defRepeatAdd(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") + ) + + def <- defDataAdd(varname = "a", formula = "1;1", dist = "trtAssign") + expect_silent( + defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") + ) + + expect_silent(defRepeatAdd(nvars = 4, prefix = "b", formula = "5 + a", variance = 3, dist = "normal")) + +}) + +test_that("defRepeatAdd throws errors correctly.", { + expect_error(defRepeatAdd(prefix = "b", formula = 5, variance = 3, dist = "normal")) + expect_error(defRepeatAdd(nvars = 8, formula = 5, variance = 3, dist = "normal")) + expect_error(defRepeatAdd(vars = 8, prefix = "b", variance = 3, dist = "normal")) +}) diff --git a/tests/testthat/test-define_data.R b/tests/testthat/test-define_data.R index d76db243..63b3e13e 100644 --- a/tests/testthat/test-define_data.R +++ b/tests/testthat/test-define_data.R @@ -27,7 +27,7 @@ test_that("checks combine in .evalDef correctly", { forall(gen_evalDef_call, function(args) expect_silent(do.call(.evalDef, args))) }) -test_that(".evalDef throws erros correctly.", { +test_that(".evalDef throws errors correctly.", { expect_error(.evalDef(newvar = 1, "1 + 2", "normal", 0, "identiy", ""), class = "simstudy::wrongType") expect_error(.evalDef(newvar = c("a", "b"), "1 + 2", "normal", 0, "identiy", ""), class = "simstudy::lengthMismatch") expect_error(.evalDef(newvar = "varname", "1 + 2", "not valid", 0, "identiy", ""), class = "simstudy::optionInvalid") @@ -151,4 +151,22 @@ test_that("utility functions work", { expect_equal(.splitFormula(";split"), c("", "split")) }) +test_that("defRepeat works.", { + expect_silent( + defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") + ) + + def <- defData(varname = "a", formula = "1;1", dist = "trtAssign") + expect_silent( + defRepeat(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") + ) +}) + +test_that("defRepeat throws errors correctly.", { + expect_error(defRepeat(prefix = "b", formula = 5, variance = 3, dist = "normal")) + expect_error(defRepeat(nvars = 8, formula = 5, variance = 3, dist = "normal")) + expect_error(defRepeat(vars = 8, prefix = "b", variance = 3, dist = "normal")) + expect_error(defRepeat(nvars = 4, prefix = "b", formula = "5 + a", variance = 3, dist = "normal")) +}) + rm(list = setdiff(names(.GlobalEnv), freeze_eval), pos = .GlobalEnv) From a16d1a68205665ae68f06e0bdd1d22913129f2cb Mon Sep 17 00:00:00 2001 From: Keith Goldfeld Date: Thu, 13 Jan 2022 15:43:11 -0500 Subject: [PATCH 3/9] Updating tests --- R/define_data.R | 6 ++++-- man/defRepeat.Rd | 9 +++++---- man/defRepeatAdd.Rd | 9 +++++---- tests/testthat/test-add_data.R | 2 +- tests/testthat/test-define_data.R | 2 +- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/R/define_data.R b/R/define_data.R index 0ff90acd..d0429bb1 100644 --- a/R/define_data.R +++ b/R/define_data.R @@ -231,7 +231,8 @@ defDataAdd <- function(dtDefs = NULL, #' @details The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. #' #' @examples -#' def <- defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") +#' def <- defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", +#' variance = 0, dist = "categorical") #' def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign") #' def <- defRepeat(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") #' def <- defData(def, "y", formula = "0.10", dist = "binary") @@ -299,7 +300,8 @@ defRepeat <- function(dtDefs = NULL, #' @details The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. #' #' @examples -#' def <- defRepeatAdd(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") +#' def <- defRepeatAdd(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", +#' variance = 0, dist = "categorical") #' def <- defDataAdd(def, varname = "a", formula = "1;1", dist = "trtAssign") #' def <- defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") #' def <- defDataAdd(def, "y", formula = "0.10", dist = "binary") diff --git a/man/defRepeat.Rd b/man/defRepeat.Rd index a88648b3..1ec09135 100644 --- a/man/defRepeat.Rd +++ b/man/defRepeat.Rd @@ -42,10 +42,11 @@ Add multiple (similar) rows to definitions table The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. } \examples{ -def <- defRepeatAdd(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") -def <- defDataAdd(def, varname = "a", formula = "1;1", dist = "trtAssign") -def <- defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") -def <- defDataAdd(def, "y", formula = "0.10", dist = "binary") +def <- defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", + variance = 0, dist = "categorical") +def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign") +def <- defRepeat(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") +def <- defData(def, "y", formula = "0.10", dist = "binary") def } diff --git a/man/defRepeatAdd.Rd b/man/defRepeatAdd.Rd index 3d6eca58..c2c621e2 100644 --- a/man/defRepeatAdd.Rd +++ b/man/defRepeatAdd.Rd @@ -44,10 +44,11 @@ existing data.table The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. } \examples{ -def <- defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") -def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign") -def <- defRepeat(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") -def <- defData(def, "y", formula = "0.10", dist = "binary") +def <- defRepeatAdd(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", + variance = 0, dist = "categorical") +def <- defDataAdd(def, varname = "a", formula = "1;1", dist = "trtAssign") +def <- defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") +def <- defDataAdd(def, "y", formula = "0.10", dist = "binary") def } diff --git a/tests/testthat/test-add_data.R b/tests/testthat/test-add_data.R index 2b5220c9..3b08e29f 100644 --- a/tests/testthat/test-add_data.R +++ b/tests/testthat/test-add_data.R @@ -50,5 +50,5 @@ test_that("defRepeatAdd works", { test_that("defRepeatAdd throws errors correctly.", { expect_error(defRepeatAdd(prefix = "b", formula = 5, variance = 3, dist = "normal")) expect_error(defRepeatAdd(nvars = 8, formula = 5, variance = 3, dist = "normal")) - expect_error(defRepeatAdd(vars = 8, prefix = "b", variance = 3, dist = "normal")) + expect_error(defRepeatAdd(nvars = 8, prefix = "b", variance = 3, dist = "normal")) }) diff --git a/tests/testthat/test-define_data.R b/tests/testthat/test-define_data.R index 63b3e13e..718726d9 100644 --- a/tests/testthat/test-define_data.R +++ b/tests/testthat/test-define_data.R @@ -165,7 +165,7 @@ test_that("defRepeat works.", { test_that("defRepeat throws errors correctly.", { expect_error(defRepeat(prefix = "b", formula = 5, variance = 3, dist = "normal")) expect_error(defRepeat(nvars = 8, formula = 5, variance = 3, dist = "normal")) - expect_error(defRepeat(vars = 8, prefix = "b", variance = 3, dist = "normal")) + expect_error(defRepeat(nvars = 8, prefix = "b", variance = 3, dist = "normal")) expect_error(defRepeat(nvars = 4, prefix = "b", formula = "5 + a", variance = 3, dist = "normal")) }) From 7b05200faaa1a60ac19c70923f4f6ea13b376bd1 Mon Sep 17 00:00:00 2001 From: Keith Goldfeld Date: Thu, 13 Jan 2022 16:13:01 -0500 Subject: [PATCH 4/9] Updating tests --- R/define_data.R | 20 ++++++-------------- tests/testthat/test-add_data.R | 9 ++++++--- tests/testthat/test-define_data.R | 9 ++++++--- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/R/define_data.R b/R/define_data.R index d0429bb1..ce648d98 100644 --- a/R/define_data.R +++ b/R/define_data.R @@ -249,13 +249,9 @@ defRepeat <- function(dtDefs = NULL, link = "identity", id = "id") { - #### Check that arguments have been passed - - if (missing(nvars)) stop("argument 'nvars' is missing", call. = FALSE) - if (missing(prefix)) stop("argument 'prefix' is missing", call. = FALSE) - if (missing(formula)) stop("argument 'formula' is missing", call. = FALSE) - - #### No missing arguments + assertNotMissing(nvars = missing(nvars), + prefix = missing(prefix), + formula = missing(formula)) varnames <- paste0(prefix, 1 : nvars) @@ -318,13 +314,9 @@ defRepeatAdd <- function(dtDefs = NULL, link = "identity", id = "id") { - #### Check that arguments have been passed - - if (missing(nvars)) stop("argument 'nvars' is missing", call. = FALSE) - if (missing(prefix)) stop("argument 'prefix' is missing", call. = FALSE) - if (missing(formula)) stop("argument 'formula' is missing", call. = FALSE) - - #### No missing arguments + assertNotMissing(nvars = missing(nvars), + prefix = missing(prefix), + formula = missing(formula)) varnames <- paste0(prefix, 1 : nvars) diff --git a/tests/testthat/test-add_data.R b/tests/testthat/test-add_data.R index 3b08e29f..50769b99 100644 --- a/tests/testthat/test-add_data.R +++ b/tests/testthat/test-add_data.R @@ -48,7 +48,10 @@ test_that("defRepeatAdd works", { }) test_that("defRepeatAdd throws errors correctly.", { - expect_error(defRepeatAdd(prefix = "b", formula = 5, variance = 3, dist = "normal")) - expect_error(defRepeatAdd(nvars = 8, formula = 5, variance = 3, dist = "normal")) - expect_error(defRepeatAdd(nvars = 8, prefix = "b", variance = 3, dist = "normal")) + expect_error(defRepeatAdd(prefix = "b", formula = 5, variance = 3, dist = "normal"), + class = "simstudy::missingArgument") + expect_error(defRepeatAdd(nvars = 8, formula = 5, variance = 3, dist = "normal"), + class = "simstudy::missingArgument") + expect_error(defRepeatAdd(nvars = 8, prefix = "b", variance = 3, dist = "normal"), + class = "simstudy::missingArgument") }) diff --git a/tests/testthat/test-define_data.R b/tests/testthat/test-define_data.R index 718726d9..512d9963 100644 --- a/tests/testthat/test-define_data.R +++ b/tests/testthat/test-define_data.R @@ -163,9 +163,12 @@ test_that("defRepeat works.", { }) test_that("defRepeat throws errors correctly.", { - expect_error(defRepeat(prefix = "b", formula = 5, variance = 3, dist = "normal")) - expect_error(defRepeat(nvars = 8, formula = 5, variance = 3, dist = "normal")) - expect_error(defRepeat(nvars = 8, prefix = "b", variance = 3, dist = "normal")) + expect_error(defRepeat(prefix = "b", formula = 5, variance = 3, dist = "normal"), + class = "simstudy::missingArgument") + expect_error(defRepeat(nvars = 8, formula = 5, variance = 3, dist = "normal"), + class = "simstudy::missingArgument") + expect_error(defRepeat(nvars = 8, prefix = "b", variance = 3, dist = "normal"), + class = "simstudy::missingArgument") expect_error(defRepeat(nvars = 4, prefix = "b", formula = "5 + a", variance = 3, dist = "normal")) }) From 6d6994c6888fb0f9964fcede77270de3446b2d11 Mon Sep 17 00:00:00 2001 From: Keith Goldfeld Date: Thu, 13 Jan 2022 20:52:27 -0500 Subject: [PATCH 5/9] Updating vignette for defRepeat --- vignettes/simstudy.Rmd | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/vignettes/simstudy.Rmd b/vignettes/simstudy.Rmd index 9b95ae4c..bc185e4b 100644 --- a/vignettes/simstudy.Rmd +++ b/vignettes/simstudy.Rmd @@ -263,19 +263,35 @@ A *uniform* distribution is a continuous data distribution that takes on values A *uniform integer* distribution is a discrete data distribution that takes on values from $a$ to $b$, where $b$ > $a$, and they both lie anywhere on the integer number line. The `formula` is a string with the format "a;b", where *a* and *b* are scalars or functions of previously defined variables. The `variance` and `link` arguments do not apply to the *uniform integer* distribution. +## Generating multiple variables with a single definition + +`defRepeat` allows us to specify multiple versions of a variable based on a single set of distribution assumptions. The function will add `nvar` variables to the *data definition* table, each of which will be specified with a single set of distribution assumptions. The names of the variables will be based on the `prefix` argument and the distribution assumptions are specified as they are in the `defData` function. Calls to `defRepeat` can be integrated with calls to `defData`. + +```{r} +def <- defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", + variance = 0, dist = "categorical") +def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign") +def <- defRepeat(def, 3, "b", formula = "5 + a", variance = 3, dist = "normal") +def <- defData(def, "y", formula = "0.10", dist = "binary") + +def +``` + ## Adding data to an existing data table Until this point, we have been generating new data sets, building them up from scratch. However, it is often necessary to generate the data in multiple stages so that we would need to add data as we go along. For example, we may have multi-level data with clusters that contain collections of individual observations. The data generation might begin with defining and generating cluster-level variables, followed by the definition and generation of the individual-level data; the individual-level data set would be adding to the cluster-level data set. -### defDataAdd/readDataAdd and addColumns +### defDataAdd/defRepeatAdd/readDataAdd and addColumns -There are several important functions that facilitate the augmentation of data sets. `defDataAdd` and `readDataAdd` are similar to their counterparts `defData` and `readData`; they create data definition tables that will be used by the function `addColumns`. The formulas in these "*add*-ing" functions are permitted to refer to fields that exist in the data set to be augmented, so all variables need not be defined in the current definition able. +There are several important functions that facilitate the augmentation of data sets. `defDataAdd`, `defRepeatAdd`, and `readDataAdd` are similar to their counterparts `defData`, `defRepeat`, and `readData`, respectively; they create data definition tables that will be used by the function `addColumns`. The formulas in these "*add*-ing" functions are permitted to refer to fields that exist in the data set to be augmented, so all variables need not be defined in the current definition able. ```{r} d1 <- defData(varname = "x1", formula = 0, variance = 1, dist = "normal") d1 <- defData(d1, varname = "x2", formula = 0.5, dist = "binary") -d2 <- defDataAdd(varname = "y", formula = "-2 + 0.5*x1 + 0.5*x2 + 1*rx", +d2 <- defRepeatAdd(nvars = 2, prefix = "q", formula = "5 + 3*rx", + variance = 4, dist = "normal") +d2 <- defDataAdd(d2, varname = "y", formula = "-2 + 0.5*x1 + 0.5*x2 + 1*rx", dist = "binary", link = "logit") dd <- genData(5, d1) @@ -295,12 +311,12 @@ In this example, the slope of a regression line of $y$ on $x$ varies depending o ```{r} d <- defData(varname = "x", formula = 0, variance = 9, dist = "normal") -dc <- defCondition(condition = "x <= -2", formula = "4 + 3*x", variance = 2, - dist = "normal") -dc <- defCondition(dc, condition = "x > -2 & x <= 2", formula = "0 + 1*x", variance = 4, - dist = "normal") -dc <- defCondition(dc, condition = "x > 2", formula = "-5 + 4*x", variance = 3, - dist = "normal") +dc <- defCondition(condition = "x <= -2", formula = "4 + 3*x", + variance = 2, dist = "normal") +dc <- defCondition(dc, condition = "x > -2 & x <= 2", formula = "0 + 1*x", + variance = 4, dist = "normal") +dc <- defCondition(dc, condition = "x > 2", formula = "-5 + 4*x", + variance = 3, dist = "normal") dd <- genData(1000, d) dd <- addCondition(dc, dd, newvar = "y") From 10ac0fb49045723bdfad98c46a2dee3f392e9466 Mon Sep 17 00:00:00 2001 From: assignUser Date: Mon, 17 Jan 2022 09:33:26 +0100 Subject: [PATCH 6/9] camelCase nvars --- R/define_data.R | 170 +++++++++++++++++++++++------------------------- 1 file changed, 83 insertions(+), 87 deletions(-) diff --git a/R/define_data.R b/R/define_data.R index ce648d98..a1467e33 100644 --- a/R/define_data.R +++ b/R/define_data.R @@ -219,7 +219,7 @@ defDataAdd <- function(dtDefs = NULL, #' Add multiple (similar) rows to definitions table #' #' @param dtDefs Definition data.table to be modified -#' @param nvars Number of new variables to define +#' @param nVars Number of new variables to define #' @param prefix Prefix (character) for new variables #' @param formula An R expression for mean (string) #' @param variance Number or formula @@ -231,52 +231,58 @@ defDataAdd <- function(dtDefs = NULL, #' @details The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. #' #' @examples -#' def <- defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", -#' variance = 0, dist = "categorical") +#' def <- defRepeat( +#' nVars = 4, prefix = "g", formula = "1/3;1/3;1/3", +#' variance = 0, dist = "categorical" +#' ) #' def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign") #' def <- defRepeat(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") #' def <- defData(def, "y", formula = "0.10", dist = "binary") -#' +#' #' def #' @export #' @concept define_data defRepeat <- function(dtDefs = NULL, - nvars, - prefix, - formula, - variance = 0, - dist = "normal", - link = "identity", - id = "id") { - - assertNotMissing(nvars = missing(nvars), - prefix = missing(prefix), - formula = missing(formula)) - - varnames <- paste0(prefix, 1 : nvars) - + nVars, + prefix, + formula, + variance = 0, + dist = "normal", + link = "identity", + id = "id") { + assertNotMissing( + nVars = missing(nVars), + prefix = missing(prefix), + formula = missing(formula) + ) + + varnames <- paste0(prefix, 1:nVars) + if (is.null(dtDefs)) { - - defNew <- defData(varname = varnames[1], formula = formula, - variance = variance, dist=dist, link = link, id = id) - - for (i in (2:nvars) ) { - defNew <- defData(defNew, varname = varnames[i], - formula = formula, variance = variance, - dist=dist, link = link, id = id) + defNew <- defData( + varname = varnames[1], formula = formula, + variance = variance, dist = dist, link = link, id = id + ) + + for (i in (2:nVars)) { + defNew <- defData(defNew, + varname = varnames[i], + formula = formula, variance = variance, + dist = dist, link = link, id = id + ) } - } else { - defNew <- data.table::copy(dtDefs) - - for (i in 1:nvars) { - defNew <- defData(defNew, varname = varnames[i], - formula = formula, variance = variance, - dist=dist, link = link, id = id) + + for (i in 1:nVars) { + defNew <- defData(defNew, + varname = varnames[i], + formula = formula, variance = variance, + dist = dist, link = link, id = id + ) } } - + return(defNew[]) } @@ -284,7 +290,7 @@ defRepeat <- function(dtDefs = NULL, #' existing data.table #' #' @param dtDefs Definition data.table to be modified -#' @param nvars Number of new variables to define +#' @param nVars Number of new variables to define #' @param prefix Prefix (character) for new variables #' @param formula An R expression for mean (string) #' @param variance Number or formula @@ -296,52 +302,58 @@ defRepeat <- function(dtDefs = NULL, #' @details The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. #' #' @examples -#' def <- defRepeatAdd(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", -#' variance = 0, dist = "categorical") +#' def <- defRepeatAdd( +#' nVars = 4, prefix = "g", formula = "1/3;1/3;1/3", +#' variance = 0, dist = "categorical" +#' ) #' def <- defDataAdd(def, varname = "a", formula = "1;1", dist = "trtAssign") #' def <- defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") #' def <- defDataAdd(def, "y", formula = "0.10", dist = "binary") -#' +#' #' def #' @export #' @concept define_data defRepeatAdd <- function(dtDefs = NULL, - nvars, - prefix, - formula, - variance = 0, - dist = "normal", - link = "identity", - id = "id") { - - assertNotMissing(nvars = missing(nvars), - prefix = missing(prefix), - formula = missing(formula)) - - varnames <- paste0(prefix, 1 : nvars) - + nVars, + prefix, + formula, + variance = 0, + dist = "normal", + link = "identity", + id = "id") { + assertNotMissing( + nVars = missing(nVars), + prefix = missing(prefix), + formula = missing(formula) + ) + + varnames <- paste0(prefix, 1:nVars) + if (is.null(dtDefs)) { - - defNew <- defDataAdd(varname = varnames[1], formula = formula, - variance = variance, dist=dist, link = link) - - for (i in (2:nvars) ) { - defNew <- defDataAdd(defNew, varname = varnames[i], - formula = formula, variance = variance, - dist=dist, link = link) + defNew <- defDataAdd( + varname = varnames[1], formula = formula, + variance = variance, dist = dist, link = link + ) + + for (i in (2:nVars)) { + defNew <- defDataAdd(defNew, + varname = varnames[i], + formula = formula, variance = variance, + dist = dist, link = link + ) } - } else { - defNew <- data.table::copy(dtDefs) - - for (i in 1:nvars) { - defNew <- defDataAdd(defNew, varname = varnames[i], - formula = formula, variance = variance, - dist=dist, link = link) + + for (i in 1:nVars) { + defNew <- defDataAdd(defNew, + varname = varnames[i], + formula = formula, variance = variance, + dist = dist, link = link + ) } } - + return(defNew[]) } @@ -579,11 +591,10 @@ defSurv <- function(dtDefs = NULL, formula = 0, scale, shape = 1) { - if (is.null(dtDefs)) { dtDefs <- data.table::data.table() } - + dt.new <- data.table::data.table( varname, formula, @@ -649,57 +660,42 @@ defSurv <- function(dtDefs = NULL, newvar <- ensureValidName(newvar, call = sys.call(-1)) assertNotInDataTable(vars = newvar, dt = defVars) - switch( - newdist, - + switch(newdist, binary = { .isValidArithmeticFormula(newform, defVars) .isIdLogit(link) }, - beta = , binomial = { .isValidArithmeticFormula(newform, defVars) .isValidArithmeticFormula(variance, defVars) .isIdLogit(link) }, - noZeroPoisson = , - poisson = , - exponential = { .isValidArithmeticFormula(newform, defVars) .isIdLog(link) }, - gamma = , - negBinomial = { .isValidArithmeticFormula(newform, defVars) .isValidArithmeticFormula(variance, defVars) .isIdLog(link) }, - nonrandom = .isValidArithmeticFormula(newform, defVars), - normal = { .isValidArithmeticFormula(newform, defVars) .isValidArithmeticFormula(variance, defVars) }, - categorical = .checkCategorical(newform), - mixture = { .isValidArithmeticFormula(newform, defVars) .checkMixture(newform) }, - uniform = , - uniformInt = .checkUniform(newform), - trtAssign = .checkCategorical(newform), - + trtAssign = .checkCategorical(newform), stop("Unknown distribution.") ) From b4280ed8a30da81479338fa8fb0301bb91382e4e Mon Sep 17 00:00:00 2001 From: assignUser Date: Mon, 17 Jan 2022 09:38:59 +0100 Subject: [PATCH 7/9] fix tests --- tests/testthat/test-add_data.R | 8 ++++---- tests/testthat/test-define_data.R | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-add_data.R b/tests/testthat/test-add_data.R index 50769b99..04bc9896 100644 --- a/tests/testthat/test-add_data.R +++ b/tests/testthat/test-add_data.R @@ -35,7 +35,7 @@ test_that("addColumns works.", { test_that("defRepeatAdd works", { expect_silent( - defRepeatAdd(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") + defRepeatAdd(nVars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") ) def <- defDataAdd(varname = "a", formula = "1;1", dist = "trtAssign") @@ -43,15 +43,15 @@ test_that("defRepeatAdd works", { defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") ) - expect_silent(defRepeatAdd(nvars = 4, prefix = "b", formula = "5 + a", variance = 3, dist = "normal")) + expect_silent(defRepeatAdd(nVars = 4, prefix = "b", formula = "5 + a", variance = 3, dist = "normal")) }) test_that("defRepeatAdd throws errors correctly.", { expect_error(defRepeatAdd(prefix = "b", formula = 5, variance = 3, dist = "normal"), class = "simstudy::missingArgument") - expect_error(defRepeatAdd(nvars = 8, formula = 5, variance = 3, dist = "normal"), + expect_error(defRepeatAdd(nVars = 8, formula = 5, variance = 3, dist = "normal"), class = "simstudy::missingArgument") - expect_error(defRepeatAdd(nvars = 8, prefix = "b", variance = 3, dist = "normal"), + expect_error(defRepeatAdd(nVars = 8, prefix = "b", variance = 3, dist = "normal"), class = "simstudy::missingArgument") }) diff --git a/tests/testthat/test-define_data.R b/tests/testthat/test-define_data.R index 512d9963..1209b3ea 100644 --- a/tests/testthat/test-define_data.R +++ b/tests/testthat/test-define_data.R @@ -153,7 +153,7 @@ test_that("utility functions work", { test_that("defRepeat works.", { expect_silent( - defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") + defRepeat(nVars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") ) def <- defData(varname = "a", formula = "1;1", dist = "trtAssign") @@ -165,11 +165,11 @@ test_that("defRepeat works.", { test_that("defRepeat throws errors correctly.", { expect_error(defRepeat(prefix = "b", formula = 5, variance = 3, dist = "normal"), class = "simstudy::missingArgument") - expect_error(defRepeat(nvars = 8, formula = 5, variance = 3, dist = "normal"), + expect_error(defRepeat(nVars = 8, formula = 5, variance = 3, dist = "normal"), class = "simstudy::missingArgument") - expect_error(defRepeat(nvars = 8, prefix = "b", variance = 3, dist = "normal"), + expect_error(defRepeat(nVars = 8, prefix = "b", variance = 3, dist = "normal"), class = "simstudy::missingArgument") - expect_error(defRepeat(nvars = 4, prefix = "b", formula = "5 + a", variance = 3, dist = "normal")) + expect_error(defRepeat(nVars = 4, prefix = "b", formula = "5 + a", variance = 3, dist = "normal")) }) rm(list = setdiff(names(.GlobalEnv), freeze_eval), pos = .GlobalEnv) From 1b9a46d3126a7029b614096ffcf832dbdef19f00 Mon Sep 17 00:00:00 2001 From: assignUser Date: Mon, 17 Jan 2022 09:50:41 +0100 Subject: [PATCH 8/9] fix vignette --- vignettes/simstudy.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/simstudy.Rmd b/vignettes/simstudy.Rmd index bc185e4b..68e8ec3b 100644 --- a/vignettes/simstudy.Rmd +++ b/vignettes/simstudy.Rmd @@ -268,7 +268,7 @@ A *uniform integer* distribution is a discrete data distribution that takes on v `defRepeat` allows us to specify multiple versions of a variable based on a single set of distribution assumptions. The function will add `nvar` variables to the *data definition* table, each of which will be specified with a single set of distribution assumptions. The names of the variables will be based on the `prefix` argument and the distribution assumptions are specified as they are in the `defData` function. Calls to `defRepeat` can be integrated with calls to `defData`. ```{r} -def <- defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", +def <- defRepeat(nVars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical") def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign") def <- defRepeat(def, 3, "b", formula = "5 + a", variance = 3, dist = "normal") @@ -289,7 +289,7 @@ There are several important functions that facilitate the augmentation of data s d1 <- defData(varname = "x1", formula = 0, variance = 1, dist = "normal") d1 <- defData(d1, varname = "x2", formula = 0.5, dist = "binary") -d2 <- defRepeatAdd(nvars = 2, prefix = "q", formula = "5 + 3*rx", +d2 <- defRepeatAdd(nVars = 2, prefix = "q", formula = "5 + 3*rx", variance = 4, dist = "normal") d2 <- defDataAdd(d2, varname = "y", formula = "-2 + 0.5*x1 + 0.5*x2 + 1*rx", dist = "binary", link = "logit") From 814703c1b1e142081dbe5735d52860bdfcecc155 Mon Sep 17 00:00:00 2001 From: assignUser Date: Mon, 17 Jan 2022 09:59:22 +0100 Subject: [PATCH 9/9] DOcument --- man/defRepeat.Rd | 10 ++++++---- man/defRepeatAdd.Rd | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/man/defRepeat.Rd b/man/defRepeat.Rd index 1ec09135..c68786dd 100644 --- a/man/defRepeat.Rd +++ b/man/defRepeat.Rd @@ -6,7 +6,7 @@ \usage{ defRepeat( dtDefs = NULL, - nvars, + nVars, prefix, formula, variance = 0, @@ -18,7 +18,7 @@ defRepeat( \arguments{ \item{dtDefs}{Definition data.table to be modified} -\item{nvars}{Number of new variables to define} +\item{nVars}{Number of new variables to define} \item{prefix}{Prefix (character) for new variables} @@ -42,8 +42,10 @@ Add multiple (similar) rows to definitions table The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. } \examples{ -def <- defRepeat(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", - variance = 0, dist = "categorical") +def <- defRepeat( + nVars = 4, prefix = "g", formula = "1/3;1/3;1/3", + variance = 0, dist = "categorical" +) def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign") def <- defRepeat(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") def <- defData(def, "y", formula = "0.10", dist = "binary") diff --git a/man/defRepeatAdd.Rd b/man/defRepeatAdd.Rd index c2c621e2..27bdbe47 100644 --- a/man/defRepeatAdd.Rd +++ b/man/defRepeatAdd.Rd @@ -7,7 +7,7 @@ existing data.table} \usage{ defRepeatAdd( dtDefs = NULL, - nvars, + nVars, prefix, formula, variance = 0, @@ -19,7 +19,7 @@ defRepeatAdd( \arguments{ \item{dtDefs}{Definition data.table to be modified} -\item{nvars}{Number of new variables to define} +\item{nVars}{Number of new variables to define} \item{prefix}{Prefix (character) for new variables} @@ -44,8 +44,10 @@ existing data.table The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. } \examples{ -def <- defRepeatAdd(nvars = 4, prefix = "g", formula = "1/3;1/3;1/3", - variance = 0, dist = "categorical") +def <- defRepeatAdd( + nVars = 4, prefix = "g", formula = "1/3;1/3;1/3", + variance = 0, dist = "categorical" +) def <- defDataAdd(def, varname = "a", formula = "1;1", dist = "trtAssign") def <- defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal") def <- defDataAdd(def, "y", formula = "0.10", dist = "binary")