diff --git a/.Rbuildignore b/.Rbuildignore
index 972db163..8a3bce13 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -13,3 +13,5 @@
^_pkgdown\.yml$
^docs$
^clippy$
+^\[MS-XLS\]\.pdf$
+^excelfileformat\.pdf$
diff --git a/.gitignore b/.gitignore
index 59723256..5f690587 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@
.RData
~*
clippy
+\[MS-XLS\].pdf
+excelfileformat.pdf
diff --git a/NEWS.md b/NEWS.md
index ab0da5e7..c261642f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,15 @@
# readxl 0.1.1.9000
+*currently much of this applies only to xlsx, but will be extended to xls*
+
+* A user-specified `col_types` of length one will be replicated to have length equal to the number of columns. (#127, #114, #261 @jennybc)
+
+* Column type `"blank"` has been deprecated in favor of the more descriptive `"skip"`, which also supports the goal to become more consistent with readr. (#260, #193, #261 @jennybc)
+
+* User-supplied `col_names` are processed relative to user-supplied `col_types`, if given. Specifically, `col_names` is considered valid if it has the same length as `col_types`, before *or after* removing skipped columns. (#81, #261 @jennybc)
+
+* Leading or embedded empty columns are no longer dropped, regardless of whether there is a column name. (#157, #261 @jennybc)
+
* New argument `guess_max` lets user adjust the number of rows used to guess column types, similar to functions in readr. (#223, #257 @tklebel, @jennybc)
* Improved handling of empty cells for xlsx. (#248 @jennybc)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 14700877..2b1c41e2 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -41,8 +41,8 @@ parse_ref <- function(ref) {
.Call('readxl_parse_ref', PACKAGE = 'readxl', ref)
}
-xlsx_col_types <- function(path, sheet = 0L, na = character(), nskip = 0L, guess_max = 1000L) {
- .Call('readxl_xlsx_col_types', PACKAGE = 'readxl', path, sheet, na, nskip, guess_max)
+xlsx_col_types <- function(path, sheet = 0L, na = character(), nskip = 0L, guess_max = 1000L, sheetHasColumnNames = FALSE) {
+ .Call('readxl_xlsx_col_types', PACKAGE = 'readxl', path, sheet, na, nskip, guess_max, sheetHasColumnNames)
}
xlsx_col_names <- function(path, sheet = 0L, nskip = 0L) {
@@ -53,14 +53,6 @@ read_xlsx_ <- function(path, sheet, col_names, col_types, na, nskip = 0L, guess_
.Call('readxl_read_xlsx_', PACKAGE = 'readxl', path, sheet, col_names, col_types, na, nskip, guess_max)
}
-parseXml <- function(base, internal) {
- invisible(.Call('readxl_parseXml', PACKAGE = 'readxl', base, internal))
-}
-
-countRows <- function(base, sheet) {
- .Call('readxl_countRows', PACKAGE = 'readxl', base, sheet)
-}
-
zip_xml <- function(zip_path, file_path) {
invisible(.Call('readxl_zip_xml', PACKAGE = 'readxl', zip_path, file_path))
}
diff --git a/R/read_excel.R b/R/read_excel.R
index 42a6fe3f..5b9e5c68 100644
--- a/R/read_excel.R
+++ b/R/read_excel.R
@@ -7,14 +7,19 @@ NULL
#' @param path Path to the xls/xlsx file
#' @param sheet Sheet to read. Either a string (the name of a sheet), or an
#' integer (the position of the sheet). Defaults to the first sheet.
-#' @param col_names `TRUE` to use the first row as column names, `FALSE`
-#' to get default names, or a character vector giving a name for each column.
+#' @param col_names `TRUE` to use the first row as column names, `FALSE` to get
+#' default names, or a character vector giving a name for each column. If user
+#' provides `col_types` as a vector, `col_names` can have one entry per
+#' column, i.e. have the same length as `col_types`, or one entry per
+#' unskipped column.
#' @param col_types Either `NULL` to guess from the spreadsheet or a character
-#' vector containing one entry per column from these options: "blank",
-#' "numeric", "date" or "text".
-#' @param na Character vector of strings to use for missing values. By default
+#' vector containing one entry per column from these options: "skip",
+#' "numeric", "date" or "text". The content of a cell in a skipped column is
+#' never read and that column will not appear in the data frame output.
+#' @param na Character vector of strings to use for missing values. By default,
#' readxl treats blank cells as missing data.
-#' @param skip Number of rows to skip before reading any data.
+#' @param skip Number of rows to skip before reading any data. Leading blank
+#' rows are automatically skipped.
#' @param guess_max Maximum number of rows to use for guessing column types.
#' @export
#' @examples
@@ -32,6 +37,7 @@ read_excel <- function(path, sheet = 1L, col_names = TRUE, col_types = NULL,
path <- check_file(path)
guess_max <- check_guess_max(guess_max)
+ col_types <- check_col_types(col_types)
switch(excel_format(path),
xls = read_xls(path, sheet, col_names, col_types, na, skip, guess_max),
@@ -128,6 +134,30 @@ standardise_sheet <- function(sheet, sheet_names) {
}
}
+check_col_types <- function(col_types) {
+ if (is.null(col_types)) {
+ return(col_types)
+ }
+ stopifnot(is.character(col_types), length(col_types) > 0, !anyNA(col_types))
+
+ blank <- col_types == "blank"
+ if (any(blank)) {
+ message("`col_type = \"blank\"` deprecated. Use \"skip\" instead.")
+ col_types[blank] <- "skip"
+ }
+
+ accepted_types <- c("skip", "numeric", "date", "text")
+ ok <- col_types %in% accepted_types
+ if (any(!ok)) {
+ info <- paste(
+ paste0("'", col_types[!ok], "' [", seq_along(col_types)[!ok], "]"),
+ collapse = ", "
+ )
+ stop(paste("Illegal column type:", info), call. = FALSE)
+ }
+ col_types
+}
+
## from readr
check_guess_max <- function(guess_max, max_limit = .Machine$integer.max %/% 100) {
diff --git a/README.Rmd b/README.Rmd
index c1619a93..74acf6dc 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -94,6 +94,8 @@ If you are new to the tidyverse conventions for data import, you may want to con
* Loads datetimes into POSIXct columns. Both Windows (1900) and Mac (1904)
date specifications are processed correctly.
-* Blank columns are automatically dropped (*but this is changing!*). Blank rows that appear before the data are automatically dropped; embedded blank rows are not.
+* Blank rows that appear before the data are automatically dropped; embedded blank rows are not. User can exert more control of this with `skip`.
+
+* Column names and types are determined from the data in the sheet, by default, but user can also supply via `col_names` and `col_types`.
* It returns a tibble, i.e. a data frame with an additional `tbl_df` class. Among other things, this provide nicer printing.
diff --git a/README.md b/README.md
index 02f962ed..0a6bef13 100644
--- a/README.md
+++ b/README.md
@@ -117,6 +117,8 @@ Features
- Loads datetimes into POSIXct columns. Both Windows (1900) and Mac (1904) date specifications are processed correctly.
-- Blank columns are automatically dropped (*but this is changing!*). Blank rows that appear before the data are automatically dropped; embedded blank rows are not.
+- Blank rows that appear before the data are automatically dropped; embedded blank rows are not. User can exert more control of this with `skip`.
+
+- Column names and types are determined from the data in the sheet, by default, but user can also supply via `col_names` and `col_types`.
- It returns a tibble, i.e. a data frame with an additional `tbl_df` class. Among other things, this provide nicer printing.
diff --git a/docs/index.html b/docs/index.html
index 0f550515..2d30db2d 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -142,7 +142,8 @@
Re-encodes non-ASCII characters to UTF-8.
Loads datetimes into POSIXct columns. Both Windows (1900) and Mac (1904) date specifications are processed correctly.
-Blank columns are automatically dropped (but this is changing!). Blank rows that appear before the data are automatically dropped; embedded blank rows are not.
+Blank rows that appear before the data are automatically dropped; embedded blank rows are not. User can exert more control of this with skip
.
+Column names and types are determined from the data in the sheet, by default, but user can also supply via col_names
and col_types
.
It returns a tibble, i.e. a data frame with an additional tbl_df
class. Among other things, this provide nicer printing.
diff --git a/docs/news/index.html b/docs/news/index.html
index 479d7f42..925a0309 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -82,7 +82,13 @@ Change log All releases
readxl 0.1.1.9000
+
currently much of this applies only to xlsx, but will be extended to xls
+A user-specified col_types
of length one will be replicated to have length equal to the number of columns. (#127, #114, #261 @jennybc)
+Column type "blank"
has been deprecated in favor of the more descriptive "skip"
, which also support the goal to become more consistent with readr. (#260, #193, #261 @jennybc)
+User-supplied col_names
are processed relative to user-supplied col_types
, if given. Specifically, col_names
are considered valid if they have length equal to col_types
or equal to col_types
after removing skipped columns. (#81, #261 @jennybc)
+Leading or embedded empty columns are no longer dropped, regardless of whether there is a column name. (#157, #261 @jennybc)
+New argument guess_max
lets user adjust the number of rows used to guess column types, similar to functions in readr. (#223, #257 @tklebel, @jennybc)
-
Improved handling of empty cells for xlsx. (#248 @jennybc)
diff --git a/docs/reference/read_excel.html b/docs/reference/read_excel.html
index 652773d4..fa4cfa76 100644
--- a/docs/reference/read_excel.html
+++ b/docs/reference/read_excel.html
@@ -86,13 +86,13 @@ Read xls and xlsx files.
read_excel(path, sheet = 1L, col_names = TRUE, col_types = NULL,
- na = "", skip = 0)
+ na = "", skip = 0, guess_max = 1000)
read_xls(path, sheet = 1L, col_names = TRUE, col_types = NULL, na = "",
- skip = 0)
+ skip = 0, guess_max = 1000)
read_xlsx(path, sheet = 1L, col_names = TRUE, col_types = NULL, na = "",
- skip = 0)
+ skip = 0, guess_max = 1000)
Arguments
@@ -102,17 +102,24 @@ Ar
- Sheet to read. Either a string (the name of a sheet), or an
integer (the position of the sheet). Defaults to the first sheet.
- col_names
- TRUE
to use the first row as column names, FALSE
-to get default names, or a character vector giving a name for each column.
+ TRUE
to use the first row as column names, FALSE
to get
+default names, or a character vector giving a name for each column. If user
+provides col_types
as a vector, col_names
can have one entry per
+column, i.e. have the same length as col_types
, or one entry per
+unskipped column.
- col_types
- Either
NULL
to guess from the spreadsheet or a character
-vector containing one entry per column from these options: "blank",
-"numeric", "date" or "text".
+vector containing one entry per column from these options: "skip",
+"numeric", "date" or "text". The content of a cell in a skipped column is
+never read and that column will not appear in the data frame output.
- na
- - Character vector of strings to use for missing values. By default
+
- Character vector of strings to use for missing values. By default,
readxl treats blank cells as missing data.
- skip
- - Number of rows to skip before reading any data.
+ - Number of rows to skip before reading any data. Leading blank
+rows are automatically skipped.
+ - guess_max
+ - Maximum number of rows to use for guessing column types.
diff --git a/man/read_excel.Rd b/man/read_excel.Rd
index c5ec8ade..36f29159 100644
--- a/man/read_excel.Rd
+++ b/man/read_excel.Rd
@@ -6,8 +6,8 @@
\alias{read_xlsx}
\title{Read xls and xlsx files.}
\usage{
-read_excel(path, sheet = 1, col_names = TRUE, col_types = NULL, na = "",
- skip = 0, guess_max = 1000)
+read_excel(path, sheet = 1L, col_names = TRUE, col_types = NULL,
+ na = "", skip = 0, guess_max = 1000)
read_xls(path, sheet = 1L, col_names = TRUE, col_types = NULL, na = "",
skip = 0, guess_max = 1000)
@@ -21,17 +21,22 @@ read_xlsx(path, sheet = 1L, col_names = TRUE, col_types = NULL, na = "",
\item{sheet}{Sheet to read. Either a string (the name of a sheet), or an
integer (the position of the sheet). Defaults to the first sheet.}
-\item{col_names}{\code{TRUE} to use the first row as column names, \code{FALSE}
-to get default names, or a character vector giving a name for each column.}
+\item{col_names}{\code{TRUE} to use the first row as column names, \code{FALSE} to get
+default names, or a character vector giving a name for each column. If user
+provides \code{col_types} as a vector, \code{col_names} can have one entry per
+column, i.e. have the same length as \code{col_types}, or one entry per
+unskipped column.}
\item{col_types}{Either \code{NULL} to guess from the spreadsheet or a character
-vector containing one entry per column from these options: "blank",
-"numeric", "date" or "text".}
+vector containing one entry per column from these options: "skip",
+"numeric", "date" or "text". The content of a cell in a skipped column is
+never read and that column will not appear in the data frame output.}
-\item{na}{Character vector of strings to use for missing values. By default
+\item{na}{Character vector of strings to use for missing values. By default,
readxl treats blank cells as missing data.}
-\item{skip}{Number of rows to skip before reading any data.}
+\item{skip}{Number of rows to skip before reading any data. Leading blank
+rows are automatically skipped.}
\item{guess_max}{Maximum number of rows to use for guessing column types.}
}
diff --git a/src/CellType.h b/src/CellType.h
index 746d75de..b593d925 100644
--- a/src/CellType.h
+++ b/src/CellType.h
@@ -5,8 +5,12 @@
#include
#include "StringSet.h"
+// CELL_BLANK can arise only from an individual cell during type guessing
+// important that it be the first entry = default type, instead of ...
+// CELL_SKIP which can arise only as a user-specified column type
enum CellType {
CELL_BLANK,
+ CELL_SKIP,
CELL_DATE,
CELL_NUMERIC,
CELL_TEXT
@@ -26,11 +30,12 @@ inline std::vector cellTypes(Rcpp::CharacterVector x) {
types.push_back(CELL_DATE);
} else if (type == "numeric") {
types.push_back(CELL_NUMERIC);
+ } else if (type == "skip") {
+ types.push_back(CELL_SKIP);
} else if (type == "text") {
types.push_back(CELL_TEXT);
} else {
- Rcpp::warning("Unknown type '%s' at position %i. Using text instead.",
- type, i + 1);
+ Rcpp::stop("Unknown type '%s' at position %i", type, i + 1);
}
}
@@ -42,6 +47,7 @@ inline std::string cellTypeDesc(CellType type) {
case CELL_BLANK: return "blank";
case CELL_DATE: return "date";
case CELL_NUMERIC: return "numeric";
+ case CELL_SKIP: return "skip";
case CELL_TEXT: return "text";
}
return "???";
@@ -153,6 +159,9 @@ inline Rcpp::RObject makeCol(CellType type, int n) {
case CELL_NUMERIC:
return Rcpp::NumericVector(n, NA_REAL);
break;
+ case CELL_SKIP:
+ return R_NilValue;
+ break;
case CELL_TEXT:
return Rcpp::CharacterVector(n, NA_STRING);
break;
@@ -161,15 +170,14 @@ inline Rcpp::RObject makeCol(CellType type, int n) {
return R_NilValue;
}
-// Drop blanks from list of columns
-inline Rcpp::List removeBlankColumns(Rcpp::List cols,
- Rcpp::CharacterVector names,
- std::vector types) {
+inline Rcpp::List removeSkippedColumns(Rcpp::List cols,
+ Rcpp::CharacterVector names,
+ std::vector types) {
int p = cols.size();
int p_out = 0;
for (int j = 0; j < p; ++j) {
- if (types[j] != CELL_BLANK)
+ if (types[j] != CELL_SKIP)
p_out++;
}
@@ -177,8 +185,9 @@ inline Rcpp::List removeBlankColumns(Rcpp::List cols,
Rcpp::CharacterVector names_out(p_out);
int j_out = 0;
for (int j = 0; j < p; ++j) {
- if (types[j] == CELL_BLANK)
+ if (types[j] == CELL_SKIP) {
continue;
+ }
out[j_out] = cols[j];
names_out[j_out] = names[j];
diff --git a/src/ColSpec.h b/src/ColSpec.h
new file mode 100644
index 00000000..98facdd5
--- /dev/null
+++ b/src/ColSpec.h
@@ -0,0 +1,49 @@
+#ifndef READXL_COLSPEC_
+#define READXL_COLSPEC_
+
+#include
+#include "CellType.h"
+
+inline std::vector recycleTypes(std::vector types,
+ int ncol) {
+ if (types.size() == 1) {
+ types.resize(ncol);
+ std::fill(types.begin(), types.end(), types[0]);
+ }
+ return types;
+}
+
+inline Rcpp::CharacterVector reconcileNames(Rcpp::CharacterVector names,
+ const std::vector& types,
+ int sheet) {
+ size_t ncol_names = names.size();
+ size_t ncol_types = types.size();
+
+ if (ncol_names == ncol_types) {
+ return names;
+ }
+
+ size_t ncol_noskip = 0;
+ for (size_t i = 0; i < types.size(); i++) {
+ if (types[i] != CELL_SKIP) {
+ ncol_noskip++;
+ }
+ }
+ if (ncol_names != ncol_noskip) {
+ Rcpp::stop("Sheet %d has %d columns (%d unskipped), but `col_names` has length %d.",
+ sheet + 1, ncol_types, ncol_noskip, ncol_names);
+ }
+
+ Rcpp::CharacterVector newNames(ncol_types, "");
+ size_t j_short = 0;
+ for (size_t j_long = 0; j_long < ncol_types; ++j_long) {
+ if (types[j_long] == CELL_SKIP) {
+ continue;
+ }
+ newNames[j_long] = names[j_short];
+ j_short++;
+ }
+ return newNames;
+}
+
+#endif
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 17bcf3aa..614c616f 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -130,8 +130,8 @@ BEGIN_RCPP
END_RCPP
}
// xlsx_col_types
-CharacterVector xlsx_col_types(std::string path, int sheet, CharacterVector na, int nskip, int guess_max);
-RcppExport SEXP readxl_xlsx_col_types(SEXP pathSEXP, SEXP sheetSEXP, SEXP naSEXP, SEXP nskipSEXP, SEXP guess_maxSEXP) {
+CharacterVector xlsx_col_types(std::string path, int sheet, CharacterVector na, int nskip, int guess_max, bool sheetHasColumnNames);
+RcppExport SEXP readxl_xlsx_col_types(SEXP pathSEXP, SEXP sheetSEXP, SEXP naSEXP, SEXP nskipSEXP, SEXP guess_maxSEXP, SEXP sheetHasColumnNamesSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
@@ -140,7 +140,8 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< CharacterVector >::type na(naSEXP);
Rcpp::traits::input_parameter< int >::type nskip(nskipSEXP);
Rcpp::traits::input_parameter< int >::type guess_max(guess_maxSEXP);
- rcpp_result_gen = Rcpp::wrap(xlsx_col_types(path, sheet, na, nskip, guess_max));
+ Rcpp::traits::input_parameter< bool >::type sheetHasColumnNames(sheetHasColumnNamesSEXP);
+ rcpp_result_gen = Rcpp::wrap(xlsx_col_types(path, sheet, na, nskip, guess_max, sheetHasColumnNames));
return rcpp_result_gen;
END_RCPP
}
@@ -174,29 +175,6 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
-// parseXml
-void parseXml(std::string base, std::string internal);
-RcppExport SEXP readxl_parseXml(SEXP baseSEXP, SEXP internalSEXP) {
-BEGIN_RCPP
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< std::string >::type base(baseSEXP);
- Rcpp::traits::input_parameter< std::string >::type internal(internalSEXP);
- parseXml(base, internal);
- return R_NilValue;
-END_RCPP
-}
-// countRows
-int countRows(std::string base, int sheet);
-RcppExport SEXP readxl_countRows(SEXP baseSEXP, SEXP sheetSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< std::string >::type base(baseSEXP);
- Rcpp::traits::input_parameter< int >::type sheet(sheetSEXP);
- rcpp_result_gen = Rcpp::wrap(countRows(base, sheet));
- return rcpp_result_gen;
-END_RCPP
-}
// zip_xml
void zip_xml(const std::string& zip_path, const std::string& file_path);
RcppExport SEXP readxl_zip_xml(SEXP zip_pathSEXP, SEXP file_pathSEXP) {
diff --git a/src/XlsWorkSheet.cpp b/src/XlsWorkSheet.cpp
index 9dab42ff..cd22f280 100644
--- a/src/XlsWorkSheet.cpp
+++ b/src/XlsWorkSheet.cpp
@@ -42,8 +42,10 @@ List xls_cols(std::string path, int i, CharacterVector col_names,
XlsWorkBook wb = XlsWorkBook(path);
XlsWorkSheet sheet = wb.sheet(i);
- if (col_names.size() != col_types.size())
- stop("`col_names` and `col_types` must have the same length");
+ if (col_names.size() != col_types.size()) {
+ Rcpp::stop("Received %d names but %d types.",
+ col_names.size(), col_types.size());
+ }
std::vector types = cellTypes(col_types);
return sheet.readCols(col_names, types, na, nskip);
diff --git a/src/XlsWorkSheet.h b/src/XlsWorkSheet.h
index 4883a2c1..c50211ae 100644
--- a/src/XlsWorkSheet.h
+++ b/src/XlsWorkSheet.h
@@ -18,8 +18,10 @@ class XlsWorkSheet {
XlsWorkSheet(const XlsWorkBook& wb, int i) {
offset_ = dateOffset(wb.workbook()->is1904);
- if (i < 0 || i >= wb.nSheets())
- Rcpp::stop("Invalid sheet index");
+ if (i >= wb.nSheets()) {
+ Rcpp::stop("Can't retrieve sheet in position %d, only %d sheet(s) found.",
+ i + 1, wb.nSheets());
+ }
pWS_ = xls_getWorkSheet(wb.workbook(), i);
if (pWS_ == NULL)
@@ -90,8 +92,10 @@ class XlsWorkSheet {
Rcpp::List readCols(Rcpp::CharacterVector names, std::vector types,
const StringSet &na, int nskip = 0) {
- if ((int) names.size() != ncol_ || (int) types.size() != ncol_)
- Rcpp::stop("Need one name and type for each column");
+ if ((int) names.size() != ncol_ || (int) types.size() != ncol_){
+ Rcpp::stop("Received %d names and %d types, but worksheet contains %d columns.",
+ names.size(), types.size(), ncol_);
+ }
Rcpp::List cols(ncol_);
@@ -115,12 +119,15 @@ class XlsWorkSheet {
switch(types[j]) {
case CELL_BLANK:
break;
+ case CELL_SKIP:
+ break;
case CELL_NUMERIC:
switch(type) {
case CELL_BLANK:
REAL(col)[i] = NA_REAL;
break;
case CELL_NUMERIC:
+ case CELL_SKIP:
case CELL_DATE:
REAL(col)[i] = cell.d;
break;
@@ -132,6 +139,7 @@ class XlsWorkSheet {
break;
case CELL_DATE:
switch(type) {
+ case CELL_SKIP:
case CELL_BLANK:
REAL(col)[i] = NA_REAL;
break;
@@ -163,7 +171,7 @@ class XlsWorkSheet {
}
}
- return removeBlankColumns(cols, names, types);
+ return removeSkippedColumns(cols, names, types);
}
};
diff --git a/src/XlsxCell.h b/src/XlsxCell.h
index 8039c4ea..7985c967 100644
--- a/src/XlsxCell.h
+++ b/src/XlsxCell.h
@@ -50,11 +50,11 @@ class XlsxCell {
return location_.first;
}
- int col() const {
+ int col() const {
return location_.second;
}
- std::string asStdString(const std::vector& stringTable) {
+ std::string asStdString(const std::vector& stringTable) const {
rapidxml::xml_node<>* v = cell_->first_node("v");
if (v == NULL)
return "[NULL]";
@@ -67,7 +67,7 @@ class XlsxCell {
return stringTable.at(id);
}
- double asDouble(const StringSet& na) {
+ double asDouble(const StringSet& na) const {
rapidxml::xml_node<>* v = cell_->first_node("v");
if (v == NULL || na.contains(v->value()))
return NA_REAL;
@@ -75,7 +75,7 @@ class XlsxCell {
return (v == NULL) ? 0 : atof(v->value());
}
- double asDate(const StringSet& na, int offset) {
+ double asDate(const StringSet& na, int offset) const {
rapidxml::xml_node<>* v = cell_->first_node("v");
if (v == NULL || na.contains(v->value()))
return NA_REAL;
@@ -85,7 +85,7 @@ class XlsxCell {
}
Rcpp::RObject asCharSxp(const StringSet& na,
- const std::vector& stringTable) {
+ const std::vector& stringTable) const {
// Is it an inline string? // 18.3.1.53 is (Rich Text Inline) [p1649]
rapidxml::xml_node<>* is = cell_->first_node("is");
@@ -98,7 +98,6 @@ class XlsxCell {
}
}
-
rapidxml::xml_node<>* v = cell_->first_node("v");
if (v == NULL)
return NA_STRING;
@@ -118,7 +117,7 @@ class XlsxCell {
CellType type(const StringSet& na,
const std::vector& stringTable,
- const std::set& dateStyles) {
+ const std::set& dateStyles) const {
rapidxml::xml_attribute<>* t = cell_->first_attribute("t");
if (t == NULL || strncmp(t->value(), "n", 5) == 0) {
@@ -164,7 +163,7 @@ class XlsxCell {
Rcpp::RObject stringFromTable(const char* val, const StringSet& na,
- const std::vector& stringTable) {
+ const std::vector& stringTable) const {
int id = atoi(val);
if (id < 0 || id >= (int) stringTable.size()) {
Rcpp::warning("[%i, %i]: Invalid string id %i", row() + 1, col() + 1, id);
diff --git a/src/XlsxWorkSheet.cpp b/src/XlsxWorkSheet.cpp
index 7848ab04..ba88080f 100644
--- a/src/XlsxWorkSheet.cpp
+++ b/src/XlsxWorkSheet.cpp
@@ -1,5 +1,6 @@
#include
#include "XlsxWorkSheet.h"
+#include "ColSpec.h"
using namespace Rcpp;
// [[Rcpp::export]]
@@ -18,10 +19,11 @@ IntegerVector parse_ref(std::string ref) {
// [[Rcpp::export]]
CharacterVector xlsx_col_types(std::string path, int sheet = 0,
CharacterVector na = CharacterVector(),
- int nskip = 0, int guess_max = 1000) {
+ int nskip = 0, int guess_max = 1000,
+ bool sheetHasColumnNames = false) {
XlsxWorkSheet ws(path, sheet, nskip);
- std::vector types = ws.colTypes(na, nskip, guess_max);
+ std::vector types = ws.colTypes(na, guess_max, sheetHasColumnNames);
CharacterVector out(types.size());
for (size_t i = 0; i < types.size(); ++i) {
@@ -43,11 +45,12 @@ List read_xlsx_(std::string path, int sheet, RObject col_names,
XlsxWorkSheet ws(path, sheet, nskip);
+ // catches empty sheets and sheets where we skip past all data
if (ws.nrow() == 0 && ws.ncol() == 0) {
return Rcpp::List(0);
}
- // Standardise column names --------------------------------------------------
+ // Get column names --------------------------------------------------
CharacterVector colNames;
bool sheetHasColumnNames = false;
switch(TYPEOF(col_names)) {
@@ -64,7 +67,7 @@ List read_xlsx_(std::string path, int sheet, RObject col_names,
Rcpp::stop("`col_names` must be a logical or character vector");
}
- // Standardise column types --------------------------------------------------
+ // Get column types --------------------------------------------------
std::vector colTypes;
switch(TYPEOF(col_types)) {
case NILSXP:
@@ -72,10 +75,27 @@ List read_xlsx_(std::string path, int sheet, RObject col_names,
break;
case STRSXP:
colTypes = cellTypes(as(col_types));
+ colTypes = recycleTypes(colTypes, ws.ncol());
break;
default:
Rcpp::stop("`col_types` must be a character vector or NULL");
}
+ if ((int) colTypes.size() != ws.ncol()) {
+ Rcpp::stop("Sheet %d has %d columns, but `col_types` has length %d.",
+ sheet + 1, ws.ncol(), colTypes.size());
+ }
+
+ // convert blank columns to a default type (numeric today, but logical soon)
+ // can only happen when
+ // * col_types = NULL and we've learned them from data
+ // * all cells in column are empty or match one of the na strings
+ for (size_t i = 0; i < colTypes.size(); i++) {
+ if (colTypes[i] == CELL_BLANK) {
+ colTypes[i] = CELL_NUMERIC;
+ }
+ }
+
+ colNames = reconcileNames(colNames, colTypes, sheet);
return ws.readCols(colNames, colTypes, na, sheetHasColumnNames);
}
diff --git a/src/XlsxWorkSheet.h b/src/XlsxWorkSheet.h
index b5701ecd..192947e7 100644
--- a/src/XlsxWorkSheet.h
+++ b/src/XlsxWorkSheet.h
@@ -31,7 +31,7 @@ class XlsxWorkSheet {
{
rapidxml::xml_node<>* rootNode;
- if (sheet_i > wb.n_sheets()) {
+ if (sheet_i >= wb.n_sheets()) {
Rcpp::stop("Can't retrieve sheet in position %d, only %d sheet(s) found.",
sheet_i + 1, wb.n_sheets());
}
@@ -68,79 +68,49 @@ class XlsxWorkSheet {
return sheetName_;
}
- // JB: this should either take colNames as an argument or have a bit of code
- // moved out of here, so we don't read column names again inside this fxn.
- // More comments near end of fxn.
std::vector colTypes(const StringSet& na,
int guess_max = 1000,
bool has_col_names = false) {
std::vector types;
types.resize(ncol_);
- std::vector::const_iterator it, row_end;
- it = has_col_names ? secondRow_: firstRow_;
+ std::vector::const_iterator xcell;
+ xcell = has_col_names ? secondRow_ : firstRow_;
// no cell data to consult re: types
- if (it == cells_.end()) {
+ if (xcell == cells_.end()) {
for (size_t i = 0; i < types.size(); i++) {
- types[i] = CELL_NUMERIC;
+ types[i] = CELL_BLANK;
}
return types;
}
+ // base is row the data starts on **in the spreadsheet**
int base = firstRow_->row() + has_col_names;
- // we have consulted i rows re: determining col types
- int i;
- // account for any empty rows between column headers and data start
- i = it->row() - base;
-
- while (i < guess_max && it != cells_.end()) {
- // find the end of current row
- row_end = it;
- while(row_end != cells_.end() && row_end->row() == it->row()) {
- row_end++;
- }
-
- while (it != row_end) {
- XlsxCell xcell = *it;
- if (xcell.col() < ncol_) {
- CellType type = xcell.type(na, wb_.stringTable(), wb_.dateStyles());
- if (type > types[xcell.col()]) {
- types[xcell.col()] = type;
- }
+ while (xcell != cells_.end() && xcell->row() - base < guess_max) {
+ if (xcell->col() < ncol_) {
+ CellType type = xcell->type(na, wb_.stringTable(), wb_.dateStyles());
+ if (type > types[xcell->col()]) {
+ types[xcell->col()] = type;
}
- it++;
}
-
- i++;
+ xcell++;
}
- // JB: this usually rederives the column names, which seems unwise
- // I propose to move such reconciliation of column names and types out
- // of here and into the parent function , read_xlsx_()
- if (has_col_names) {
- // blank columns with a name aren't blank
- Rcpp::CharacterVector names = colNames();
- for (size_t i = 0; i < types.size(); i++) {
- if (types[i] == CELL_BLANK && names[i] != NA_STRING && names[i] != "")
- types[i] = CELL_NUMERIC;
- }
- }
return types;
}
Rcpp::CharacterVector colNames() {
Rcpp::CharacterVector out(ncol_);
- std::vector::const_iterator it = firstRow_;
- int base = it->row();
+ std::vector::const_iterator xcell = firstRow_;
+ int base = xcell->row();
- while(it != cells_.end() && it->row() == base) {
- XlsxCell xcell = *it;
- if (xcell.col() >= ncol_) {
+ while(xcell != cells_.end() && xcell->row() == base) {
+ if (xcell->col() >= ncol_) {
break;
}
- out[xcell.col()] = xcell.asCharSxp("", wb_.stringTable());
- it++;
+ out[xcell->col()] = xcell->asCharSxp("", wb_.stringTable());
+ xcell++;
}
return out;
}
@@ -149,111 +119,94 @@ class XlsxWorkSheet {
const std::vector& types,
const StringSet& na,
bool has_col_names = false) {
- // JB: suspect this should move out of here and into a function that does
- // this and the last rationalization re col names and types in colTypes
- if ((int) names.size() != ncol_ || (int) types.size() != ncol_)
- Rcpp::stop("Need one name and type for each column");
- std::vector::const_iterator it, row_end;
- it = has_col_names ? secondRow_: firstRow_;
-
- // no cell data to read
- if (it == cells_.end()) {
- Rcpp::List cols(ncol_);
- for (int j = 0; j < ncol_; ++j) {
- cols[j] = makeCol(types[j], 0);
- }
- return removeBlankColumns(cols, names, types);
- }
+ std::vector::const_iterator xcell;
+ xcell = has_col_names ? secondRow_: firstRow_;
+ // base is row the data starts on **in the spreadsheet**
int base = firstRow_->row() + has_col_names;
- // we have read i rows of data
- int i;
- // account for any empty rows between column headers and data start
- i = it->row() - base;
-
- // Initialise columns, accounting for leading skipped or blank rows
- int n = nrow_ - base;
+ int n = (xcell == cells_.end()) ? 0 : nrow_ - base;
Rcpp::List cols(ncol_);
+ cols.attr("names") = names;
for (int j = 0; j < ncol_; ++j) {
cols[j] = makeCol(types[j], n);
}
- while (it != cells_.end()) {
+ if (n == 0) {
+ return cols;
+ }
+ while (xcell != cells_.end()) {
+
+ int i = xcell->row();
+ int j = xcell->col();
if ((i + 1) % 1000 == 0) {
Rcpp::checkUserInterrupt();
}
-
- // find the end of this row
- row_end = it;
- while(row_end != cells_.end() && row_end->row() == it->row()) {
- row_end++;
+ if (types[j] == CELL_SKIP || j >= ncol_) {
+ xcell++;
+ continue;
}
- while (it != row_end) {
- XlsxCell xcell = *it;
- if (xcell.col() >= ncol_) {
- continue;
- }
- CellType type = xcell.type(na, wb_.stringTable(), wb_.dateStyles());
- Rcpp::RObject col = cols[xcell.col()];
- // row to write into
- int row = xcell.row() - base;
- // Needs to compare to actual cell type to give warnings
- switch(types[xcell.col()]) {
- case CELL_BLANK:
+ CellType type = xcell->type(na, wb_.stringTable(), wb_.dateStyles());
+ Rcpp::RObject col = cols[j];
+ // row to write into
+ int row = i - base;
+ // Needs to compare to actual cell type to give warnings
+ switch(types[j]) {
+ case CELL_SKIP:
+ break;
+ case CELL_BLANK:
+ break;
+ case CELL_NUMERIC:
+ switch(type) {
+ case CELL_SKIP:
break;
case CELL_NUMERIC:
- switch(type) {
- case CELL_NUMERIC:
- case CELL_DATE:
- REAL(col)[row] = xcell.asDouble(na);
- break;
- case CELL_BLANK:
- REAL(col)[row] = NA_REAL;
- break;
- case CELL_TEXT:
- Rcpp::warning("[%i, %i]: expecting numeric: got '%s'",
- xcell.row() + 1, xcell.col() + 1,
- xcell.asStdString(wb_.stringTable()));
- REAL(col)[row] = NA_REAL;
- }
+ case CELL_DATE:
+ REAL(col)[row] = xcell->asDouble(na);
+ break;
+ case CELL_BLANK:
+ REAL(col)[row] = NA_REAL;
+ break;
+ case CELL_TEXT:
+ Rcpp::warning("[%i, %i]: expecting numeric: got '%s'",
+ i + 1, j + 1, xcell->asStdString(wb_.stringTable()));
+ REAL(col)[row] = NA_REAL;
+ }
+ break;
+ case CELL_DATE:
+ switch(type) {
+ case CELL_SKIP:
break;
case CELL_DATE:
- switch(type) {
- case CELL_DATE:
- REAL(col)[row] = xcell.asDate(na, wb_.offset());
- break;
- case CELL_BLANK:
- REAL(col)[row] = NA_REAL;
- break;
- case CELL_NUMERIC:
- case CELL_TEXT:
- Rcpp::warning("[%i, %i]: expecting date: got '%s'",
- xcell.row() + 1, xcell.col() + 1,
- xcell.asStdString(wb_.stringTable()));
- REAL(col)[row] = NA_REAL;
- break;
- }
+ REAL(col)[row] = xcell->asDate(na, wb_.offset());
break;
+ case CELL_BLANK:
+ REAL(col)[row] = NA_REAL;
+ break;
+ case CELL_NUMERIC:
case CELL_TEXT:
- if (type == CELL_BLANK) {
- SET_STRING_ELT(col, row, NA_STRING);
- } else {
- SET_STRING_ELT(col, row, xcell.asCharSxp(na, wb_.stringTable()));
- }
+ Rcpp::warning("[%i, %i]: expecting date: got '%s'",
+ i + 1, j + 1, xcell->asStdString(wb_.stringTable()));
+ REAL(col)[row] = NA_REAL;
break;
}
- it++;
+ break;
+ case CELL_TEXT:
+ if (type == CELL_BLANK) {
+ SET_STRING_ELT(col, row, NA_STRING);
+ } else {
+ SET_STRING_ELT(col, row, xcell->asCharSxp(na, wb_.stringTable()));
+ }
+ break;
}
- ++i;
+ xcell++;
}
- return removeBlankColumns(cols, names, types);
+ return removeSkippedColumns(cols, names, types);
}
-
private:
void loadCells() {
diff --git a/src/benchmarks.cpp b/src/benchmarks.cpp
deleted file mode 100644
index 23b07f8f..00000000
--- a/src/benchmarks.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-#include
-#include "rapidxml.h"
-#include "XlsxWorkBook.h"
-using namespace Rcpp;
-
-// [[Rcpp::export]]
-void parseXml(std::string base, std::string internal) {
- std::string file = zip_buffer(base, internal);
- Rcout << "File size: " << file.size() << " bytes\n";
-
- rapidxml::xml_document<> xml;
- xml.parse<0>(&file[0]);
-}
-
-// [[Rcpp::export]]
-int countRows(std::string base, int sheet) {
- // FYI: sheet lookup logic has changed in main package
- // this is not generally correct
- std::string sheetPath = tfm::format("xl/worksheets/sheet%i.xml", sheet + 1);
-
- std::string file = zip_buffer(base, sheetPath);
- Rcout << "File size: " << file.size() << " bytes\n";
-
- rapidxml::xml_document<> xml;
- xml.parse<0>(&file[0]);
-
- rapidxml::xml_node<>* rootNode_ = xml.first_node("worksheet");
- if (rootNode_ == NULL)
- return 0;
-
- rapidxml::xml_node<>* sheetData_ = rootNode_->first_node("sheetData");
- if (sheetData_ == NULL)
- return 0;
-
- int i = 0;
- for (rapidxml::xml_node<>* row = sheetData_->first_node("row");
- row; row = row->next_sibling("row")) {
- i++;
- }
-
- return i;
-}
diff --git a/tests/testthat/test-col-names.R b/tests/testthat/test-col-names.R
index 1b104377..efc5521d 100644
--- a/tests/testthat/test-col-names.R
+++ b/tests/testthat/test-col-names.R
@@ -57,3 +57,28 @@ test_that("column names are de-duplicated", {
df <- read_excel(test_sheet("unnamed-duplicated-columns.xls"))
expect_identical(names(df)[4], "var2__1")
})
+
+test_that("wrong length column names are rejected [xlsx]", {
+ expect_error(
+ read_excel(test_sheet("iris-excel.xlsx"),col_names = LETTERS[1:3]),
+ "Sheet 1 has 5 columns (5 unskipped), but `col_names` has length 3.",
+ fixed = TRUE
+ )
+})
+
+test_that("wrong length column names are rejected [xls]", {
+ expect_error(
+ read_excel(test_sheet("iris-excel.xls"), col_names = LETTERS[1:3]),
+ "Received 3 names but 5 types."
+ )
+})
+
+test_that("column_names can anticipate skipping [xlsx]", {
+ expect_silent(
+ df <- read_excel(test_sheet("iris-excel.xlsx"),
+ col_names = c("one", "two", "three"), skip = 1,
+ col_types = c("numeric", "numeric", "skip", "skip", "text"))
+ )
+ expect_identical(dim(df), c(150L, 3L))
+ expect_identical(names(df), c("one", "two", "three"))
+})
diff --git a/tests/testthat/test-col-types.R b/tests/testthat/test-col-types.R
index 3cb00d52..df9a4437 100644
--- a/tests/testthat/test-col-types.R
+++ b/tests/testthat/test-col-types.R
@@ -1,4 +1,55 @@
-context("Col types")
+context("Column types")
+
+test_that("illegal col_types are rejected", {
+ expect_error(
+ read_excel(test_sheet("types.xlsx"),
+ col_types = c("foo", "numeric", "text", "bar")),
+ "Illegal column type"
+ )
+})
+
+test_that("request for 'blank' col type gets deprecation message and fix", {
+ expect_message(
+ read_excel(test_sheet("types.xlsx"),
+ col_types = rep_len(c("blank", "text"), length.out = 5)),
+ "`col_type = \"blank\"` deprecated. Use \"skip\" instead.",
+ fixed = TRUE
+ )
+})
+
+test_that("invalid col_types are rejected", {
+ expect_error(
+ read_excel(test_sheet("types.xlsx"), col_types = character()),
+ "length(col_types) > 0 is not TRUE", fixed = TRUE
+ )
+ expect_error(
+ read_excel(test_sheet("types.xlsx"), col_types = 1:3),
+ "is.character(col_types) is not TRUE", fixed = TRUE
+ )
+ expect_error(
+ read_excel(test_sheet("types.xlsx"), col_types = c(NA, "text", "numeric")),
+ "!anyNA(col_types) is not TRUE", fixed = TRUE
+ )
+})
+
+test_that("col_types can be specified", {
+ df <- read_excel(test_sheet("iris-excel.xlsx"),
+ col_types = c("numeric", "text", "numeric", "numeric", "text"))
+ expect_is(df[[2]], "character")
+})
+
+test_that("col_types are recycled", {
+ df <- read_excel(test_sheet("types.xlsx"), col_types = "text")
+ expect_match(vapply(df, class, character(1)), "character")
+})
+
+test_that("inappropriate col_types generate warning", {
+ expect_warning(
+ read_excel(test_sheet("iris-excel.xlsx"),
+ col_types = c("numeric", "text", "numeric", "numeric", "numeric")),
+ "expecting numeric"
+ )
+})
test_that("types imputed & read correctly [xlsx]", {
types <- read_excel(test_sheet("types.xlsx"))
@@ -12,8 +63,15 @@ test_that("types imputed & read correctly [xlsx]", {
test_that("types imputed & read correctly [xls]", {
expect_output(
+ ## valgrind reports this
+ ## Conditional jump or move depends on uninitialised value(s)
types <- read_excel(test_sheet("types.xls")),
"Unknown type: 517"
+ ## definitely due to these 'Unknown type: 517' msgs
+ ## line 102 in CellType.h
+ ## Rcpp::Rcout << "Unknown type: " << cell.id << "\n";
+ ## if I skip this test, memcheck report is as clean as it ever gets
+ ## https://github.com/tidyverse/readxl/issues/259
)
expect_is(types$number, "numeric")
expect_is(types$string, "character")
@@ -23,7 +81,7 @@ test_that("types imputed & read correctly [xls]", {
skip("revisit these expectations as xls problems are fixed")
})
-test_that("max_guess is honored for col_types [xlsx]", {
+test_that("guess_max is honored for col_types [xlsx]", {
expect_warning(
types <- read_excel(test_sheet("types.xlsx"), guess_max = 2),
"expecting numeric"
@@ -31,6 +89,19 @@ test_that("max_guess is honored for col_types [xlsx]", {
expect_identical(types$string_in_row_3, c(1, 2, NA))
})
-test_that("max_guess is honored for col_types [xls]", {
+test_that("guess_max is honored for col_types [xls]", {
skip("write this test as xls problems are fixed")
})
+
+test_that("wrong length col types generates error", {
+ expect_error(
+ read_excel(test_sheet("iris-excel.xlsx"), col_types = c("numeric", "text")),
+ "Sheet 1 has 5 columns, but `col_types` has length 2."
+ )
+ expect_error(
+ read_excel(test_sheet("iris-excel.xls"), col_types = c("numeric", "text")),
+ "Received 5 names but 2 types."
+ )
+})
+
+
diff --git a/tests/testthat/test-empty.R b/tests/testthat/test-empty.R
index b0dec8ea..655f4bd2 100644
--- a/tests/testthat/test-empty.R
+++ b/tests/testthat/test-empty.R
@@ -23,33 +23,3 @@ test_that("non-empty sheets act that way if we skip past everything", {
out <- read_excel(test_sheet("skipping.xlsx"), skip = 10)
expect_identical(out, tibble::tibble())
})
-
-test_that("empty cells with a style are not loaded", {
- ## what's important about this sheet?
- ## contains empty cells with a custom format
- ## therefore they appear in the xml and have a style attribute
- ## where are they?
- ## in the embedded empty columns, w/ and w/o a name
- ## in a trailing empty column
- ## in some trailing rows
- out <- read_excel(test_sheet("style-only-cells.xlsx"))
- df <- tibble::tibble(var1 = c("val1,1", "val2,1", "val3,1"),
- var2 = NA_real_,
- var3 = c("aa", "bb", "cc"),
- var5 = c(1, 2, 3))
- expect_equal(out, df)
- skip("revisit this when dust settles re: treatment of empty columns")
-})
-
-test_that("user-supplied column names play nicely with empty columns", {
- skip("waiting for dust to settle re: treatment of empty columns")
- ## do stuff like this:
- out <- read_excel(
- test_sheet("style-only-cells.xlsx"),
- col_names = LETTERS[1:4]
- )
- out <- read_excel(
- test_sheet("style-only-cells.xlsx"),
- col_names = LETTERS[1:5]
- )
-})
diff --git a/tests/testthat/test-missing-values.R b/tests/testthat/test-missing-values.R
index e9d15e79..d43de413 100644
--- a/tests/testthat/test-missing-values.R
+++ b/tests/testthat/test-missing-values.R
@@ -70,7 +70,7 @@ test_that("text values in numeric column gives warning & NA", {
test_that("empty first column gives valid data.frame", {
df <- read_excel(test_sheet("missing-first-column.xlsx"), col_names = FALSE)
- expect_equal(nrow(df), length(df[[1]]))
+ expect_equal(nrow(df), length(df[[2]]))
})
test_that("empty named column gives NA column", {
@@ -78,6 +78,29 @@ test_that("empty named column gives NA column", {
df2 <- read_excel(test_sheet("empty-named-column.xls"), col_names = TRUE)
expect_equal(ncol(df1), 4)
expect_equal(names(df1)[2], "y")
+ expect_true(all(is.na(df1$y)))
+ expect_true(all(is.numeric(df1$y)))
expect_equal(ncol(df2), 4)
expect_equal(names(df2)[2], "y")
+ expect_true(all(is.na(df2$y)))
+ expect_true(all(is.numeric(df2$y)))
+})
+
+test_that("empty (styled) cells are not loaded, but can survive as NA", {
+ ## what's important about this sheet?
+ ## contains empty cells with a custom format
+ ## therefore they appear in the xml and have a style attribute
+ ## where are they?
+ ## in the embedded empty columns, w/ and w/o a name
+ ## in a trailing empty column WHICH SHOULD BE DROPPED
+ ## in some trailing rows WHICH SHOULD BE DROPPED
+ out <- read_excel(test_sheet("style-only-cells.xlsx"))
+ df <- tibble::tibble(
+ var1 = c("val1,1", "val2,1", "val3,1"),
+ var2 = NA_real_,
+ var3 = c("aa", "bb", "cc"),
+ X__1 = NA_real_,
+ var5 = c(1, 2, 3)
+ )
+ expect_equal(out, df)
})
diff --git a/tests/testthat/test-sheets.R b/tests/testthat/test-sheets.R
index 2f2d170f..24f2737e 100644
--- a/tests/testthat/test-sheets.R
+++ b/tests/testthat/test-sheets.R
@@ -19,8 +19,14 @@ test_that("informative error when requesting non-existent sheet by name", {
test_that("informative error when requesting non-existent sheet by position", {
expect_error(
- read_excel(test_sheet("iris-excel.xlsx"), sheet = 200),
- "Can't retrieve sheet in position 200"
+ read_excel(test_sheet("iris-excel.xlsx"), sheet = 2),
+ "Can't retrieve sheet in position 2, only 1 sheet(s) found.",
+ fixed = TRUE
+ )
+ expect_error(
+ read_excel(test_sheet("iris-excel.xls"), sheet = 2),
+ "Can't retrieve sheet in position 2, only 1 sheet(s) found.",
+ fixed = TRUE
)
})