Skip to content

Commit 43b1821

Browse files
authored
Fix #154 (#183)
* Fix #154 * Check params * Add test for check * Add tests * Update NEWS [no ci]
1 parent 2a703c9 commit 43b1821

File tree

6 files changed

+102
-48
lines changed

6 files changed

+102
-48
lines changed

NEWS.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# readODS 2.2.0
22

33
* Fix #151 - Now `read_ods()` and `list_ods_sheets()` can also be used to process flat ods files. `read_fods()` and `list_fods_sheets()` are still available, but not as the so-called "common interface."
4+
* Fix #154 - `read_ods()` and `read_fods()` have two arguments - `trim_ws` and `n_max` which are the same as the arguments of `readxl::read_excel()`.
45

56
# readODS 2.1.1
67

R/read_ods.R

+70-44
Original file line numberDiff line numberDiff line change
@@ -32,26 +32,30 @@
3232
return(g)
3333
}
3434

35-
36-
3735
## Based on readxl, although the implementation is different.
3836
## If max row is -1, read to end of row.
3937
## Row and column-numbers are 1-based
40-
.standardise_limits <- function(range, skip) {
38+
.standardise_limits <- function(range, skip, n_max) {
4139
if(is.null(range)) {
4240
skip <- check_nonnegative_integer(x = skip, argument = "skip")
41+
n_max <- check_nonnegative_integer(x = n_max, argument = "n_max")
42+
if (n_max == Inf) {
43+
max_row <- -1
44+
} else {
45+
max_row <- n_max + 1
46+
}
4347
limits <- c(
4448
min_row = skip + 1,
45-
max_row = -1,
49+
max_row = max_row,
4650
min_col = 1,
4751
max_col = -1
4852
)
4953
} else {
50-
if(skip != 0) {
51-
warning("Range and non-zero value for skip given. Defaulting to range.", call. = FALSE)
54+
if(skip != 0 || n_max != Inf) {
55+
warning("Range and non-default value for skip or n_max given. Defaulting to range.", call. = FALSE)
5256
}
5357
tryCatch({
54-
limits <- cellranger::as.cell_limits(range)
58+
limits <- cellranger::as.cell_limits(range)
5559
}, error = function(e) {
5660
stop("Invalid `range`")
5761
})
@@ -82,7 +86,9 @@
8286
row_names = FALSE,
8387
strings_as_factors = FALSE,
8488
verbose = FALSE,
85-
as_tibble = TRUE) {
89+
as_tibble = TRUE,
90+
trim_ws = TRUE,
91+
n_max = Inf) {
8692
if (!file.exists(path)) {
8793
stop("file does not exist", call. = FALSE)
8894
}
@@ -104,6 +110,9 @@
104110
if (!is.logical(as_tibble)) {
105111
stop("as_tibble must be of type `boolean", call. = FALSE)
106112
}
113+
if (!is.logical(trim_ws)) {
114+
stop("trim_ws must be of type `boolean", call. = FALSE)
115+
}
107116
if (row_names && as_tibble) {
108117
stop("Tibbles do not support row names. To use row names, set as_tibble to false", call. = FALSE)
109118
}
@@ -115,6 +124,9 @@
115124
stop("Unknown col_types. Can either be a class col_spec, list, character, NULL or NA.",
116125
call. = FALSE)
117126
}
127+
if (!is.numeric(n_max)) {
128+
stop("n_max must be numeric.", call. = FALSE)
129+
}
118130
}
119131

120132
.return_empty <- function(as_tibble = FALSE) {
@@ -127,22 +139,22 @@
127139
return(data.frame())
128140
}
129141

130-
.type_convert <- function(df, col_types = NULL, verbose = TRUE, na = c("", "NA")) {
142+
.type_convert <- function(df, col_types = NULL, verbose = TRUE, na = c("", "NA"), trim_ws = TRUE) {
131143
if (verbose) {
132144
res <- readr::type_convert(df = df, col_types, na = na)
133145
} else {
134146
suppressMessages({
135-
res <- readr::type_convert(df = df, col_types, na = na)
147+
res <- readr::type_convert(df = df, col_types, na = na, trim_ws = trim_ws)
136148
})
137149
}
138150
return(res)
139151
}
140152

141-
.handle_col_types <- function(res, col_types, verbose, na) {
153+
.handle_col_types <- function(res, col_types, verbose, na, trim_ws) {
142154
if (isTRUE(is.na(col_types)) || nrow(res) == 0) {
143155
return(res)
144156
}
145-
.type_convert(df = res, col_types = col_types, verbose = verbose, na = na)
157+
.type_convert(df = res, col_types = col_types, verbose = verbose, na = na, trim_ws = trim_ws)
146158
}
147159

148160
## standardise `sheet` parameter as a number, i.e. sheet_index
@@ -173,19 +185,21 @@
173185
}
174186

175187
.read_ods <- function(path,
176-
sheet = 1,
177-
col_names = TRUE,
178-
col_types = NULL,
179-
na = "",
180-
skip = 0,
181-
formula_as_formula = FALSE,
182-
range = NULL,
183-
row_names = FALSE,
184-
strings_as_factors = FALSE,
185-
verbose = FALSE,
186-
as_tibble = TRUE,
187-
.name_repair = "unique",
188-
flat = FALSE) {
188+
sheet = 1,
189+
col_names = TRUE,
190+
col_types = NULL,
191+
na = "",
192+
skip = 0,
193+
formula_as_formula = FALSE,
194+
range = NULL,
195+
row_names = FALSE,
196+
strings_as_factors = FALSE,
197+
verbose = FALSE,
198+
as_tibble = TRUE,
199+
.name_repair = "unique",
200+
flat = FALSE,
201+
trim_ws = TRUE,
202+
n_max = Inf) {
189203
.check_read_args(path,
190204
sheet,
191205
col_names,
@@ -197,7 +211,9 @@
197211
row_names,
198212
strings_as_factors,
199213
verbose,
200-
as_tibble)
214+
as_tibble,
215+
trim_ws,
216+
n_max)
201217
path <- normalizePath(path)
202218
if (flat) {
203219
.get_sheet_names_func <- get_flat_sheet_names_
@@ -207,7 +223,7 @@
207223
.read_ods_func <- read_ods_
208224
}
209225
## Get cell range info
210-
limits <- .standardise_limits(range, skip)
226+
limits <- .standardise_limits(range, skip, n_max)
211227
sheet_index <- .standardise_sheet(sheet = sheet, sheet_names = .get_sheet_names_func(file = path, include_external_data = TRUE),
212228
range = range)
213229
strings <- .read_ods_func(file = path,
@@ -233,7 +249,7 @@
233249
byrow = TRUE),
234250
stringsAsFactors = FALSE)
235251
res <- .change_df_with_col_row_header(x = res, col_header = col_names, row_header = row_names, .name_repair = .name_repair)
236-
res <- .handle_col_types(res, col_types = col_types, verbose = verbose, na = na)
252+
res <- .handle_col_types(res, col_types = col_types, verbose = verbose, na = na, trim_ws = trim_ws)
237253
if (strings_as_factors) {
238254
res <- .convert_strings_to_factors(df = res)
239255
}
@@ -300,9 +316,11 @@
300316
#' Default is `"unique"`.
301317
#'
302318
#' @param ods_format character, must be "auto", "ods" or "fods". The default "auto" is to determine the format automatically. By default, the format is determined by file extension, unless `guess` is `FALSE`.
303-
#' @param guess logical. If the file extension is absent or not recognized, this
319+
#' @param guess logical, If the file extension is absent or not recognized, this
304320
#' controls whether we attempt to guess format based on the file signature or
305321
#' "magic number".
322+
#' @param trim_ws logical, should leading and trailing whitespace be trimmed?
323+
#' @param n_max numeric, Maximum number of data rows to read. Ignored if `range` is given.
306324
#' @return A tibble (\code{tibble}) or data frame (\code{data.frame}) containing a representation of data in the (f)ods file.
307325
#' @author Peter Brohan <peter.brohan+cran@@gmail.com>, Chung-hong Chan <chainsawtiney@@gmail.com>, Gerrit-Jan Schutten <phonixor@@gmail.com>
308326
#' @examples
@@ -343,7 +361,9 @@ read_ods <- function(path,
343361
as_tibble = TRUE,
344362
.name_repair = "unique",
345363
ods_format = c("auto", "ods", "fods"),
346-
guess = FALSE) {
364+
guess = FALSE,
365+
trim_ws = TRUE,
366+
n_max = Inf) {
347367
ods_format <- .determine_ods_format(path = path, guess = guess, ods_format = match.arg(ods_format))
348368
## Should use match.call but there's a weird bug if one of the variable names is 'file'
349369
.read_ods(path = path,
@@ -359,24 +379,28 @@ read_ods <- function(path,
359379
verbose = verbose,
360380
as_tibble = as_tibble,
361381
.name_repair = .name_repair,
362-
flat = ods_format == "fods")
382+
flat = ods_format == "fods",
383+
trim_ws = trim_ws,
384+
n_max = n_max)
363385
}
364386

365387
#' @rdname read_ods
366388
#' @export
367389
read_fods <- function(path,
368-
sheet = 1,
369-
col_names = TRUE,
370-
col_types = NULL,
371-
na = "",
372-
skip = 0,
373-
formula_as_formula = FALSE,
374-
range = NULL,
375-
row_names = FALSE,
376-
strings_as_factors = FALSE,
377-
verbose = FALSE,
378-
as_tibble = TRUE,
379-
.name_repair = "unique") {
390+
sheet = 1,
391+
col_names = TRUE,
392+
col_types = NULL,
393+
na = "",
394+
skip = 0,
395+
formula_as_formula = FALSE,
396+
range = NULL,
397+
row_names = FALSE,
398+
strings_as_factors = FALSE,
399+
verbose = FALSE,
400+
as_tibble = TRUE,
401+
.name_repair = "unique",
402+
trim_ws = TRUE,
403+
n_max = Inf) {
380404
## Should use match.call but there's a weird bug if one of the variable names is 'file'
381405
.read_ods(path = normalizePath(path, mustWork = FALSE),
382406
sheet = sheet,
@@ -391,5 +415,7 @@ read_fods <- function(path,
391415
verbose = verbose,
392416
as_tibble = as_tibble,
393417
.name_repair = .name_repair,
394-
flat = TRUE)
418+
flat = TRUE,
419+
trim_ws = trim_ws,
420+
n_max = n_max)
395421
}

man/list_ods_sheets.Rd

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/read_ods.Rd

+11-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testdata/leadingspaces.ods

7.6 KB
Binary file not shown.

tests/testthat/test_read_ods.R

+19
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ test_that("Incorrect Argument", {
88
expect_error(read_ods(path = "../testdata/sum.ods", strings_as_factors = "a"), "strings_as_factors must be of type `boolean`")
99
expect_error(read_ods(path = "../testdata/sum.ods", verbose = "a"), "verbose must be of type `boolean`")
1010
expect_error(read_ods(path = "../testdata/sum.ods", row_names = TRUE), "Tibbles do not support")
11+
expect_error(read_ods(path = "../testdata/sum.ods", n_max = "abc"), "n_max must be numeric")
12+
expect_error(read_ods(path = "../testdata/sum.ods", n_max = -1), "n_max must be a positive integer")
13+
expect_error(read_ods(path = "../testdata/sum.ods", trim_ws = "a"), "trim_ws must be")
1114
})
1215

1316
test_that("exceptions in C++ (hard to test)", {
@@ -58,6 +61,8 @@ test_that("Parses range inputs correctly", {
5861
expect_equal(x[[2,2]], 2)
5962
expect_message(x <- read_ods("../testdata/multisheet.ods", range = "Sheet3!D2:E4"))
6063
expect_equal(x[[1,1]], 3)
64+
expect_warning(x <- read_ods("../testdata/multisheet.ods", n_max = 10, range = "Sheet2!B4:D9"), "Range and non-default")
65+
expect_equal(x[[2,2]], 2)
6166
})
6267

6368
test_that("Deals with repeated spaces correctly when fetching only part of sheet",{
@@ -105,6 +110,20 @@ test_that("No Warning of empty sheet", {
105110
expect_silent(read_fods("../testdata/empty.fods"))
106111
})
107112

113+
test_that("n_max", {
114+
expect_silent(x <- read_ods("../testdata/starwars.ods", n_max = Inf))
115+
expect_equal(nrow(x), 10)
116+
expect_silent(x <- read_ods("../testdata/starwars.ods", n_max = 5))
117+
expect_equal(nrow(x), 5)
118+
expect_silent(x <- read_ods("../testdata/starwars.ods", n_max = 100))
119+
expect_equal(nrow(x), 10)
120+
})
121+
122+
test_that("trim_ws", {
123+
expect_equal(read_ods("../testdata/leadingspaces.ods", trim_ws = FALSE)[1,1, drop = TRUE], " abc")
124+
expect_equal(read_ods("../testdata/leadingspaces.ods", trim_ws = TRUE)[1,1, drop = TRUE], "abc")
125+
})
126+
108127
## V2.0.0 behavior: backward compatibility
109128

110129
test_that("Single column ODS v2.0.0", {

0 commit comments

Comments
 (0)