Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Def data enhancements #134

Merged
merged 9 commits into from
Jan 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ export(defMiss)
export(defRead)
export(defReadAdd)
export(defReadCond)
export(defRepeat)
export(defRepeatAdd)
export(defSurv)
export(delColumns)
export(gammaGetShapeRate)
Expand Down
165 changes: 145 additions & 20 deletions R/define_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,147 @@ defDataAdd <- function(dtDefs = NULL,
return(defNew[])
}

#' Add multiple (similar) rows to definitions table
#'
#' @param dtDefs Definition data.table to be modified
#' @param nVars Number of new variables to define
#' @param prefix Prefix (character) for new variables
#' @param formula An R expression for mean (string)
#' @param variance Number or formula
#' @param dist Distribution. For possibilities, see details
#' @param link The link function for the mean, see details
#' @param id A string indicating the field name for the unique record identifier
#' @return A data.table named dtName that is an updated data definitions table
#' @seealso [distributions]
#' @details The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`.
#'
#' @examples
#' def <- defRepeat(
#' nVars = 4, prefix = "g", formula = "1/3;1/3;1/3",
#' variance = 0, dist = "categorical"
#' )
#' def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign")
#' def <- defRepeat(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal")
#' def <- defData(def, "y", formula = "0.10", dist = "binary")
#'
#' def
#' @export
#' @concept define_data
defRepeat <- function(dtDefs = NULL,
nVars,
prefix,
formula,
variance = 0,
dist = "normal",
link = "identity",
id = "id") {
assertNotMissing(
nVars = missing(nVars),
prefix = missing(prefix),
formula = missing(formula)
)

varnames <- paste0(prefix, 1:nVars)

if (is.null(dtDefs)) {
defNew <- defData(
varname = varnames[1], formula = formula,
variance = variance, dist = dist, link = link, id = id
)

for (i in (2:nVars)) {
defNew <- defData(defNew,
varname = varnames[i],
formula = formula, variance = variance,
dist = dist, link = link, id = id
)
}
} else {
defNew <- data.table::copy(dtDefs)

for (i in 1:nVars) {
defNew <- defData(defNew,
varname = varnames[i],
formula = formula, variance = variance,
dist = dist, link = link, id = id
)
}
}

return(defNew[])
}

#' Add multiple (similar) rows to definitions table that will be used to add data to an
#' existing data.table
#'
#' @param dtDefs Definition data.table to be modified
#' @param nVars Number of new variables to define
#' @param prefix Prefix (character) for new variables
#' @param formula An R expression for mean (string)
#' @param variance Number or formula
#' @param dist Distribution. For possibilities, see details
#' @param link The link function for the mean, see details
#' @param id A string indicating the field name for the unique record identifier
#' @return A data.table named dtName that is an updated data definitions table
#' @seealso [distributions]
#' @details The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`.
#'
#' @examples
#' def <- defRepeatAdd(
#' nVars = 4, prefix = "g", formula = "1/3;1/3;1/3",
#' variance = 0, dist = "categorical"
#' )
#' def <- defDataAdd(def, varname = "a", formula = "1;1", dist = "trtAssign")
#' def <- defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal")
#' def <- defDataAdd(def, "y", formula = "0.10", dist = "binary")
#'
#' def
#' @export
#' @concept define_data
defRepeatAdd <- function(dtDefs = NULL,
nVars,
prefix,
formula,
variance = 0,
dist = "normal",
link = "identity",
id = "id") {
assertNotMissing(
nVars = missing(nVars),
prefix = missing(prefix),
formula = missing(formula)
)

varnames <- paste0(prefix, 1:nVars)

if (is.null(dtDefs)) {
defNew <- defDataAdd(
varname = varnames[1], formula = formula,
variance = variance, dist = dist, link = link
)

for (i in (2:nVars)) {
defNew <- defDataAdd(defNew,
varname = varnames[i],
formula = formula, variance = variance,
dist = dist, link = link
)
}
} else {
defNew <- data.table::copy(dtDefs)

for (i in 1:nVars) {
defNew <- defDataAdd(defNew,
varname = varnames[i],
formula = formula, variance = variance,
dist = dist, link = link
)
}
}

return(defNew[])
}

#' Read external csv data set definitions
#'
#' @param filen String file name, including full path. Must be a csv file.
Expand Down Expand Up @@ -450,11 +591,10 @@ defSurv <- function(dtDefs = NULL,
formula = 0,
scale,
shape = 1) {

if (is.null(dtDefs)) {
dtDefs <- data.table::data.table()
}

dt.new <- data.table::data.table(
varname,
formula,
Expand Down Expand Up @@ -520,57 +660,42 @@ defSurv <- function(dtDefs = NULL,
newvar <- ensureValidName(newvar, call = sys.call(-1))
assertNotInDataTable(vars = newvar, dt = defVars)

switch(
newdist,

switch(newdist,
binary = {
.isValidArithmeticFormula(newform, defVars)
.isIdLogit(link)
},

beta = ,
binomial = {
.isValidArithmeticFormula(newform, defVars)
.isValidArithmeticFormula(variance, defVars)
.isIdLogit(link)
},

noZeroPoisson = ,

poisson = ,

exponential = {
.isValidArithmeticFormula(newform, defVars)
.isIdLog(link)
},

gamma = ,

negBinomial = {
.isValidArithmeticFormula(newform, defVars)
.isValidArithmeticFormula(variance, defVars)
.isIdLog(link)
},

nonrandom = .isValidArithmeticFormula(newform, defVars),

normal = {
.isValidArithmeticFormula(newform, defVars)
.isValidArithmeticFormula(variance, defVars)
},

categorical = .checkCategorical(newform),

mixture = {
.isValidArithmeticFormula(newform, defVars)
.checkMixture(newform)
},

uniform = ,

uniformInt = .checkUniform(newform),
trtAssign = .checkCategorical(newform),

trtAssign = .checkCategorical(newform),
stop("Unknown distribution.")
)

Expand Down Expand Up @@ -651,7 +776,7 @@ defSurv <- function(dtDefs = NULL,

#' Check uniform formula
#'
#' @description Unifom formulas must be of the form "min;max"
#' @description Uniform formulas must be of the form "min;max"
#' @param formula Formula as string.
#' @return Invisible, error if formula not valid.
#' @seealso distributions
Expand Down
58 changes: 58 additions & 0 deletions man/defRepeat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 60 additions & 0 deletions man/defRepeatAdd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 24 additions & 1 deletion tests/testthat/test-add_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,27 @@ test_that("addColumns works.", {
def2 <- defDataAdd(varname = "y", formula = "2.3 * (1/x)", dist = "normal")

expect_silent(addColumns(def2, dt))
})
})

test_that("defRepeatAdd works", {
expect_silent(
defRepeatAdd(nVars = 4, prefix = "g", formula = "1/3;1/3;1/3", variance = 0, dist = "categorical")
)

def <- defDataAdd(varname = "a", formula = "1;1", dist = "trtAssign")
expect_silent(
defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal")
)

expect_silent(defRepeatAdd(nVars = 4, prefix = "b", formula = "5 + a", variance = 3, dist = "normal"))

})

test_that("defRepeatAdd throws errors correctly.", {
expect_error(defRepeatAdd(prefix = "b", formula = 5, variance = 3, dist = "normal"),
class = "simstudy::missingArgument")
expect_error(defRepeatAdd(nVars = 8, formula = 5, variance = 3, dist = "normal"),
class = "simstudy::missingArgument")
expect_error(defRepeatAdd(nVars = 8, prefix = "b", variance = 3, dist = "normal"),
class = "simstudy::missingArgument")
})
Loading