diff --git a/.Rbuildignore b/.Rbuildignore
index 972db163..8a3bce13 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -13,3 +13,5 @@
 ^_pkgdown\.yml$
 ^docs$
 ^clippy$
+^\[MS-XLS\]\.pdf$
+^excelfileformat\.pdf$
diff --git a/.gitignore b/.gitignore
index 59723256..5f690587 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@
 .RData
 ~*
 clippy
+\[MS-XLS\].pdf
+excelfileformat.pdf
diff --git a/NEWS.md b/NEWS.md
index ab0da5e7..c261642f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,15 @@
 # readxl 0.1.1.9000
 
+*currently much of this applies only to xlsx, but will be extended to xls*
+
+* A user-specified `col_types` of length one will be replicated to have length equal to the number of columns. (#127, #114, #261 @jennybc)
+
+* Column type `"blank"` has been deprecated in favor of the more descriptive `"skip"`, which also supports the goal to become more consistent with readr. (#260, #193, #261 @jennybc)
+
+* User-supplied `col_names` are processed relative to user-supplied `col_types`, if given. Specifically, `col_names` is considered valid if it has the same length as `col_types`, before *or after* removing skipped columns. (#81, #261 @jennybc)
+
+* Leading or embedded empty columns are no longer dropped, regardless of whether there is a column name. (#157, #261 @jennybc)
+
 * New argument `guess_max` lets user adjust the number of rows used to guess column types, similar to functions in readr. (#223, #257 @tklebel, @jennybc)
 
 * Improved handling of empty cells for xlsx. (#248 @jennybc)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 14700877..2b1c41e2 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -41,8 +41,8 @@ parse_ref <- function(ref) {
     .Call('readxl_parse_ref', PACKAGE = 'readxl', ref)
 }
 
-xlsx_col_types <- function(path, sheet = 0L, na = character(), nskip = 0L, guess_max = 1000L) {
-    .Call('readxl_xlsx_col_types', PACKAGE = 'readxl', path, sheet, na, nskip, guess_max)
+xlsx_col_types <- function(path, sheet = 0L, na = character(), nskip = 0L, guess_max = 1000L, sheetHasColumnNames = FALSE) {
+    .Call('readxl_xlsx_col_types', PACKAGE = 'readxl', path, sheet, na, nskip, guess_max, sheetHasColumnNames)
 }
 
 xlsx_col_names <- function(path, sheet = 0L, nskip = 0L) {
@@ -53,14 +53,6 @@ read_xlsx_ <- function(path, sheet, col_names, col_types, na, nskip = 0L, guess_
     .Call('readxl_read_xlsx_', PACKAGE = 'readxl', path, sheet, col_names, col_types, na, nskip, guess_max)
 }
 
-parseXml <- function(base, internal) {
-    invisible(.Call('readxl_parseXml', PACKAGE = 'readxl', base, internal))
-}
-
-countRows <- function(base, sheet) {
-    .Call('readxl_countRows', PACKAGE = 'readxl', base, sheet)
-}
-
 zip_xml <- function(zip_path, file_path) {
     invisible(.Call('readxl_zip_xml', PACKAGE = 'readxl', zip_path, file_path))
 }
diff --git a/R/read_excel.R b/R/read_excel.R
index 42a6fe3f..5b9e5c68 100644
--- a/R/read_excel.R
+++ b/R/read_excel.R
@@ -7,14 +7,19 @@ NULL
 #' @param path Path to the xls/xlsx file
 #' @param sheet Sheet to read. Either a string (the name of a sheet), or an
 #'   integer (the position of the sheet). Defaults to the first sheet.
-#' @param col_names `TRUE` to use the first row as column names, `FALSE`
-#'   to get default names, or a character vector giving a name for each column.
+#' @param col_names `TRUE` to use the first row as column names, `FALSE` to get
+#'   default names, or a character vector giving a name for each column. If user
+#'   provides `col_types` as a vector, `col_names` can have one entry per
+#'   column, i.e. have the same length as `col_types`, or one entry per
+#'   unskipped column.
 #' @param col_types Either `NULL` to guess from the spreadsheet or a character
-#'   vector containing one entry per column from these options: "blank",
-#'   "numeric", "date" or "text".
-#' @param na Character vector of strings to use for missing values. By default
+#'   vector containing one entry per column from these options: "skip",
+#'   "numeric", "date" or "text". The content of a cell in a skipped column is
+#'   never read and that column will not appear in the data frame output.
+#' @param na Character vector of strings to use for missing values. By default,
 #'   readxl treats blank cells as missing data.
-#' @param skip Number of rows to skip before reading any data.
+#' @param skip Number of rows to skip before reading any data. Leading blank
+#'   rows are automatically skipped.
 #' @param guess_max Maximum number of rows to use for guessing column types.
 #' @export
 #' @examples
@@ -32,6 +37,7 @@ read_excel <- function(path, sheet = 1L, col_names = TRUE, col_types = NULL,
 
   path <- check_file(path)
   guess_max <- check_guess_max(guess_max)
+  col_types <- check_col_types(col_types)
 
   switch(excel_format(path),
     xls =  read_xls(path, sheet, col_names, col_types, na, skip, guess_max),
@@ -128,6 +134,30 @@ standardise_sheet <- function(sheet, sheet_names) {
   }
 }
 
+check_col_types <- function(col_types) {
+  if (is.null(col_types)) {
+    return(col_types)
+  }
+  stopifnot(is.character(col_types), length(col_types) > 0, !anyNA(col_types))
+
+  blank <- col_types == "blank"
+  if (any(blank)) {
+    message("`col_type = \"blank\"` deprecated. Use \"skip\" instead.")
+    col_types[blank] <- "skip"
+  }
+
+  accepted_types <- c("skip", "numeric", "date", "text")
+  ok <- col_types %in% accepted_types
+  if (any(!ok)) {
+    info <- paste(
+      paste0("'", col_types[!ok], "' [", seq_along(col_types)[!ok], "]"),
+      collapse = ", "
+    )
+    stop(paste("Illegal column type:", info), call. = FALSE)
+  }
+  col_types
+}
+
 ## from readr
 check_guess_max <- function(guess_max, max_limit = .Machine$integer.max %/% 100) {
 
diff --git a/README.Rmd b/README.Rmd
index c1619a93..74acf6dc 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -94,6 +94,8 @@ If you are new to the tidyverse conventions for data import, you may want to con
 * Loads datetimes into POSIXct columns. Both Windows (1900) and Mac (1904) 
   date specifications are processed correctly.
 
-* Blank columns are automatically dropped (*but this is changing!*). Blank rows that appear before the data are automatically dropped; embedded blank rows are not.
+* Blank rows that appear before the data are automatically dropped; embedded blank rows are not. User can exert more control of this with `skip`.
+
+* Column names and types are determined from the data in the sheet, by default, but user can also supply via `col_names` and `col_types`.
 
 * It returns a tibble, i.e. a data frame with an additional `tbl_df` class. Among other things, this provide nicer printing.
diff --git a/README.md b/README.md
index 02f962ed..0a6bef13 100644
--- a/README.md
+++ b/README.md
@@ -117,6 +117,8 @@ Features
 
 -   Loads datetimes into POSIXct columns. Both Windows (1900) and Mac (1904) date specifications are processed correctly.
 
--   Blank columns are automatically dropped (*but this is changing!*). Blank rows that appear before the data are automatically dropped; embedded blank rows are not.
+-   Blank rows that appear before the data are automatically dropped; embedded blank rows are not. User can exert more control of this with `skip`.
+
+-   Column names and types are determined from the data in the sheet, by default, but user can also supply via `col_names` and `col_types`.
 
 -   It returns a tibble, i.e. a data frame with an additional `tbl_df` class. Among other things, this provide nicer printing.
diff --git a/docs/index.html b/docs/index.html
index 0f550515..2d30db2d 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -142,7 +142,8 @@ <h2 class="hasAnchor">
 <ul>
 <li><p>Re-encodes non-ASCII characters to UTF-8.</p></li>
 <li><p>Loads datetimes into POSIXct columns. Both Windows (1900) and Mac (1904) date specifications are processed correctly.</p></li>
-<li><p>Blank columns are automatically dropped (<em>but this is changing!</em>). Blank rows that appear before the data are automatically dropped; embedded blank rows are not.</p></li>
+<li><p>Blank rows that appear before the data are automatically dropped; embedded blank rows are not. User can exert more control of this with <code>skip</code>.</p></li>
+<li><p>Column names and types are determined from the data in the sheet, by default, but user can also supply via <code>col_names</code> and <code>col_types</code>.</p></li>
 <li><p>It returns a tibble, i.e. a data frame with an additional <code>tbl_df</code> class. Among other things, this provide nicer printing.</p></li>
 </ul>
 </div>
diff --git a/docs/news/index.html b/docs/news/index.html
index 479d7f42..925a0309 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -82,7 +82,13 @@ <h1>Change log <small>All releases</small></h1>
     <div id="readxl-0-1-1-9000" class="section level1">
 <h1 class="hasAnchor">
 <a href="#readxl-0-1-1-9000" class="anchor"> </a>readxl 0.1.1.9000</h1>
+<p><em>currently much of this applies only to xlsx, but will be extended to xls</em></p>
 <ul>
+<li><p>A user-specified <code>col_types</code> of length one will be replicated to have length equal to the number of columns. (#127, #114, #261 @jennybc)</p></li>
+<li><p>Column type <code>"blank"</code> has been deprecated in favor of the more descriptive <code>"skip"</code>, which also support the goal to become more consistent with readr. (#260, #193, #261 @jennybc)</p></li>
+<li><p>User-supplied <code>col_names</code> are processed relative to user-supplied <code>col_types</code>, if given. Specifically, <code>col_names</code> are considered valid if they have length equal to <code>col_types</code> or equal to <code>col_types</code> after removing skipped columns. (#81, #261 @jennybc)</p></li>
+<li><p>Leading or embedded empty columns are no longer dropped, regardless of whether there is a column name. (#157, #261 @jennybc)</p></li>
+<li><p>New argument <code>guess_max</code> lets user adjust the number of rows used to guess column types, similar to functions in readr. (#223, #257 @tklebel, @jennybc)</p></li>
 <li>
 <p>Improved handling of empty cells for xlsx. (#248 @jennybc)</p>
 <ul>
diff --git a/docs/reference/read_excel.html b/docs/reference/read_excel.html
index 652773d4..fa4cfa76 100644
--- a/docs/reference/read_excel.html
+++ b/docs/reference/read_excel.html
@@ -86,13 +86,13 @@ <h1>Read xls and xlsx files.</h1>
     
 
     <pre><span class='fu'>read_excel</span>(<span class='no'>path</span>, <span class='kw'>sheet</span> <span class='kw'>=</span> <span class='fl'>1L</span>, <span class='kw'>col_names</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>, <span class='kw'>col_types</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
-  <span class='kw'>na</span> <span class='kw'>=</span> <span class='st'>""</span>, <span class='kw'>skip</span> <span class='kw'>=</span> <span class='fl'>0</span>)
+  <span class='kw'>na</span> <span class='kw'>=</span> <span class='st'>""</span>, <span class='kw'>skip</span> <span class='kw'>=</span> <span class='fl'>0</span>, <span class='kw'>guess_max</span> <span class='kw'>=</span> <span class='fl'>1000</span>)
 
 <span class='fu'>read_xls</span>(<span class='no'>path</span>, <span class='kw'>sheet</span> <span class='kw'>=</span> <span class='fl'>1L</span>, <span class='kw'>col_names</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>, <span class='kw'>col_types</span> <span class='kw'>=</span> <span class='kw'>NULL</span>, <span class='kw'>na</span> <span class='kw'>=</span> <span class='st'>""</span>,
-  <span class='kw'>skip</span> <span class='kw'>=</span> <span class='fl'>0</span>)
+  <span class='kw'>skip</span> <span class='kw'>=</span> <span class='fl'>0</span>, <span class='kw'>guess_max</span> <span class='kw'>=</span> <span class='fl'>1000</span>)
 
 <span class='fu'>read_xlsx</span>(<span class='no'>path</span>, <span class='kw'>sheet</span> <span class='kw'>=</span> <span class='fl'>1L</span>, <span class='kw'>col_names</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>, <span class='kw'>col_types</span> <span class='kw'>=</span> <span class='kw'>NULL</span>, <span class='kw'>na</span> <span class='kw'>=</span> <span class='st'>""</span>,
-  <span class='kw'>skip</span> <span class='kw'>=</span> <span class='fl'>0</span>)</pre>
+  <span class='kw'>skip</span> <span class='kw'>=</span> <span class='fl'>0</span>, <span class='kw'>guess_max</span> <span class='kw'>=</span> <span class='fl'>1000</span>)</pre>
     
     <h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a> Arguments</h2>
     <dl class="dl-horizontal">
@@ -102,17 +102,24 @@ <h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a> Ar
       <dd>Sheet to read. Either a string (the name of a sheet), or an
 integer (the position of the sheet). Defaults to the first sheet.</dd>
       <dt>col_names</dt>
-      <dd><code>TRUE</code> to use the first row as column names, <code>FALSE</code>
-to get default names, or a character vector giving a name for each column.</dd>
+      <dd><code>TRUE</code> to use the first row as column names, <code>FALSE</code> to get
+default names, or a character vector giving a name for each column. If user
+provides <code>col_types</code> as a vector, <code>col_names</code> can have one entry per
+column, i.e. have the same length as <code>col_types</code>, or one entry per
+unskipped column.</dd>
       <dt>col_types</dt>
       <dd>Either <code>NULL</code> to guess from the spreadsheet or a character
-vector containing one entry per column from these options: &quot;blank&quot;,
-&quot;numeric&quot;, &quot;date&quot; or &quot;text&quot;.</dd>
+vector containing one entry per column from these options: &quot;skip&quot;,
+&quot;numeric&quot;, &quot;date&quot; or &quot;text&quot;. The content of a cell in a skipped column is
+never read and that column will not appear in the data frame output.</dd>
       <dt>na</dt>
-      <dd>Character vector of strings to use for missing values. By default
+      <dd>Character vector of strings to use for missing values. By default,
 readxl treats blank cells as missing data.</dd>
       <dt>skip</dt>
-      <dd>Number of rows to skip before reading any data.</dd>
+      <dd>Number of rows to skip before reading any data. Leading blank
+rows are automatically skipped.</dd>
+      <dt>guess_max</dt>
+      <dd>Maximum number of rows to use for guessing column types.</dd>
     </dl>
     
 
diff --git a/man/read_excel.Rd b/man/read_excel.Rd
index c5ec8ade..36f29159 100644
--- a/man/read_excel.Rd
+++ b/man/read_excel.Rd
@@ -6,8 +6,8 @@
 \alias{read_xlsx}
 \title{Read xls and xlsx files.}
 \usage{
-read_excel(path, sheet = 1, col_names = TRUE, col_types = NULL, na = "",
-  skip = 0, guess_max = 1000)
+read_excel(path, sheet = 1L, col_names = TRUE, col_types = NULL,
+  na = "", skip = 0, guess_max = 1000)
 
 read_xls(path, sheet = 1L, col_names = TRUE, col_types = NULL, na = "",
   skip = 0, guess_max = 1000)
@@ -21,17 +21,22 @@ read_xlsx(path, sheet = 1L, col_names = TRUE, col_types = NULL, na = "",
 \item{sheet}{Sheet to read. Either a string (the name of a sheet), or an
 integer (the position of the sheet). Defaults to the first sheet.}
 
-\item{col_names}{\code{TRUE} to use the first row as column names, \code{FALSE}
-to get default names, or a character vector giving a name for each column.}
+\item{col_names}{\code{TRUE} to use the first row as column names, \code{FALSE} to get
+default names, or a character vector giving a name for each column. If user
+provides \code{col_types} as a vector, \code{col_names} can have one entry per
+column, i.e. have the same length as \code{col_types}, or one entry per
+unskipped column.}
 
 \item{col_types}{Either \code{NULL} to guess from the spreadsheet or a character
-vector containing one entry per column from these options: "blank",
-"numeric", "date" or "text".}
+vector containing one entry per column from these options: "skip",
+"numeric", "date" or "text". The content of a cell in a skipped column is
+never read and that column will not appear in the data frame output.}
 
-\item{na}{Character vector of strings to use for missing values. By default
+\item{na}{Character vector of strings to use for missing values. By default,
 readxl treats blank cells as missing data.}
 
-\item{skip}{Number of rows to skip before reading any data.}
+\item{skip}{Number of rows to skip before reading any data. Leading blank
+rows are automatically skipped.}
 
 \item{guess_max}{Maximum number of rows to use for guessing column types.}
 }
diff --git a/src/CellType.h b/src/CellType.h
index 746d75de..b593d925 100644
--- a/src/CellType.h
+++ b/src/CellType.h
@@ -5,8 +5,12 @@
 #include <libxls/xls.h>
 #include "StringSet.h"
 
+// CELL_BLANK can arise only from an individual cell during type guessing
+// important that it be the first entry = default type, instead of ...
+// CELL_SKIP  which can arise only as a user-specified column type
 enum CellType {
   CELL_BLANK,
+  CELL_SKIP,
   CELL_DATE,
   CELL_NUMERIC,
   CELL_TEXT
@@ -26,11 +30,12 @@ inline std::vector<CellType> cellTypes(Rcpp::CharacterVector x) {
       types.push_back(CELL_DATE);
     } else if (type == "numeric") {
       types.push_back(CELL_NUMERIC);
+    } else if (type == "skip") {
+      types.push_back(CELL_SKIP);
     } else if (type == "text") {
       types.push_back(CELL_TEXT);
     } else {
-      Rcpp::warning("Unknown type '%s' at position %i. Using text instead.",
-        type, i + 1);
+      Rcpp::stop("Unknown type '%s' at position %i", type, i + 1);
     }
   }
 
@@ -42,6 +47,7 @@ inline std::string cellTypeDesc(CellType type) {
   case CELL_BLANK:   return "blank";
   case CELL_DATE:    return "date";
   case CELL_NUMERIC: return "numeric";
+  case CELL_SKIP:    return "skip";
   case CELL_TEXT:    return "text";
   }
   return "???";
@@ -153,6 +159,9 @@ inline Rcpp::RObject makeCol(CellType type, int n) {
   case CELL_NUMERIC:
     return Rcpp::NumericVector(n, NA_REAL);
     break;
+  case CELL_SKIP:
+    return R_NilValue;
+    break;
   case CELL_TEXT:
     return Rcpp::CharacterVector(n, NA_STRING);
     break;
@@ -161,15 +170,14 @@ inline Rcpp::RObject makeCol(CellType type, int n) {
   return R_NilValue;
 }
 
-// Drop blanks from list of columns
-inline Rcpp::List removeBlankColumns(Rcpp::List cols,
-                                     Rcpp::CharacterVector names,
-                                     std::vector<CellType> types) {
+inline Rcpp::List removeSkippedColumns(Rcpp::List cols,
+                                       Rcpp::CharacterVector names,
+                                       std::vector<CellType> types) {
   int p = cols.size();
 
   int p_out = 0;
   for (int j = 0; j < p; ++j) {
-    if (types[j] != CELL_BLANK)
+    if (types[j] != CELL_SKIP)
       p_out++;
   }
 
@@ -177,8 +185,9 @@ inline Rcpp::List removeBlankColumns(Rcpp::List cols,
   Rcpp::CharacterVector names_out(p_out);
   int j_out = 0;
   for (int j = 0; j < p; ++j) {
-    if (types[j] == CELL_BLANK)
+    if (types[j] == CELL_SKIP) {
       continue;
+    }
 
     out[j_out] = cols[j];
     names_out[j_out] = names[j];
diff --git a/src/ColSpec.h b/src/ColSpec.h
new file mode 100644
index 00000000..98facdd5
--- /dev/null
+++ b/src/ColSpec.h
@@ -0,0 +1,49 @@
+#ifndef READXL_COLSPEC_
+#define READXL_COLSPEC_
+
+#include <Rcpp.h>
+#include "CellType.h"
+
+inline std::vector<CellType> recycleTypes(std::vector<CellType> types,
+                                          int ncol) {
+  if (types.size() == 1) {
+    types.resize(ncol);
+    std::fill(types.begin(), types.end(), types[0]);
+  }
+  return types;
+}
+
+inline Rcpp::CharacterVector reconcileNames(Rcpp::CharacterVector names,
+                                            const std::vector<CellType>& types,
+                                            int sheet) {
+  size_t ncol_names = names.size();
+  size_t ncol_types = types.size();
+
+  if (ncol_names == ncol_types) {
+    return names;
+  }
+
+  size_t ncol_noskip = 0;
+  for (size_t i = 0; i < types.size(); i++) {
+    if (types[i] != CELL_SKIP) {
+      ncol_noskip++;
+    }
+  }
+  if (ncol_names != ncol_noskip) {
+    Rcpp::stop("Sheet %d has %d columns (%d unskipped), but `col_names` has length %d.",
+               sheet + 1, ncol_types, ncol_noskip, ncol_names);
+  }
+
+  Rcpp::CharacterVector newNames(ncol_types, "");
+  size_t j_short = 0;
+  for (size_t j_long = 0; j_long < ncol_types; ++j_long) {
+    if (types[j_long] == CELL_SKIP) {
+      continue;
+    }
+    newNames[j_long] = names[j_short];
+    j_short++;
+  }
+  return newNames;
+}
+
+#endif
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 17bcf3aa..614c616f 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -130,8 +130,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // xlsx_col_types
-CharacterVector xlsx_col_types(std::string path, int sheet, CharacterVector na, int nskip, int guess_max);
-RcppExport SEXP readxl_xlsx_col_types(SEXP pathSEXP, SEXP sheetSEXP, SEXP naSEXP, SEXP nskipSEXP, SEXP guess_maxSEXP) {
+CharacterVector xlsx_col_types(std::string path, int sheet, CharacterVector na, int nskip, int guess_max, bool sheetHasColumnNames);
+RcppExport SEXP readxl_xlsx_col_types(SEXP pathSEXP, SEXP sheetSEXP, SEXP naSEXP, SEXP nskipSEXP, SEXP guess_maxSEXP, SEXP sheetHasColumnNamesSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -140,7 +140,8 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< CharacterVector >::type na(naSEXP);
     Rcpp::traits::input_parameter< int >::type nskip(nskipSEXP);
     Rcpp::traits::input_parameter< int >::type guess_max(guess_maxSEXP);
-    rcpp_result_gen = Rcpp::wrap(xlsx_col_types(path, sheet, na, nskip, guess_max));
+    Rcpp::traits::input_parameter< bool >::type sheetHasColumnNames(sheetHasColumnNamesSEXP);
+    rcpp_result_gen = Rcpp::wrap(xlsx_col_types(path, sheet, na, nskip, guess_max, sheetHasColumnNames));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -174,29 +175,6 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// parseXml
-void parseXml(std::string base, std::string internal);
-RcppExport SEXP readxl_parseXml(SEXP baseSEXP, SEXP internalSEXP) {
-BEGIN_RCPP
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< std::string >::type base(baseSEXP);
-    Rcpp::traits::input_parameter< std::string >::type internal(internalSEXP);
-    parseXml(base, internal);
-    return R_NilValue;
-END_RCPP
-}
-// countRows
-int countRows(std::string base, int sheet);
-RcppExport SEXP readxl_countRows(SEXP baseSEXP, SEXP sheetSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< std::string >::type base(baseSEXP);
-    Rcpp::traits::input_parameter< int >::type sheet(sheetSEXP);
-    rcpp_result_gen = Rcpp::wrap(countRows(base, sheet));
-    return rcpp_result_gen;
-END_RCPP
-}
 // zip_xml
 void zip_xml(const std::string& zip_path, const std::string& file_path);
 RcppExport SEXP readxl_zip_xml(SEXP zip_pathSEXP, SEXP file_pathSEXP) {
diff --git a/src/XlsWorkSheet.cpp b/src/XlsWorkSheet.cpp
index 9dab42ff..cd22f280 100644
--- a/src/XlsWorkSheet.cpp
+++ b/src/XlsWorkSheet.cpp
@@ -42,8 +42,10 @@ List xls_cols(std::string path, int i, CharacterVector col_names,
   XlsWorkBook wb = XlsWorkBook(path);
   XlsWorkSheet sheet = wb.sheet(i);
 
-  if (col_names.size() != col_types.size())
-    stop("`col_names` and `col_types` must have the same length");
+  if (col_names.size() != col_types.size()) {
+    Rcpp::stop("Received %d names but %d types.",
+               col_names.size(), col_types.size());
+  }
 
   std::vector<CellType> types = cellTypes(col_types);
   return sheet.readCols(col_names, types, na, nskip);
diff --git a/src/XlsWorkSheet.h b/src/XlsWorkSheet.h
index 4883a2c1..c50211ae 100644
--- a/src/XlsWorkSheet.h
+++ b/src/XlsWorkSheet.h
@@ -18,8 +18,10 @@ class XlsWorkSheet {
   XlsWorkSheet(const XlsWorkBook& wb, int i) {
     offset_ = dateOffset(wb.workbook()->is1904);
 
-    if (i < 0 || i >= wb.nSheets())
-      Rcpp::stop("Invalid sheet index");
+    if (i >= wb.nSheets()) {
+      Rcpp::stop("Can't retrieve sheet in position %d, only %d sheet(s) found.",
+                 i + 1, wb.nSheets());
+    }
 
     pWS_ = xls_getWorkSheet(wb.workbook(), i);
     if (pWS_ == NULL)
@@ -90,8 +92,10 @@ class XlsWorkSheet {
 
   Rcpp::List readCols(Rcpp::CharacterVector names, std::vector<CellType> types,
                       const StringSet &na, int nskip = 0) {
-    if ((int) names.size() != ncol_ || (int) types.size() != ncol_)
-      Rcpp::stop("Need one name and type for each column");
+    if ((int) names.size() != ncol_ || (int) types.size() != ncol_){
+      Rcpp::stop("Received %d names and %d types, but worksheet contains %d columns.",
+                 names.size(), types.size(),  ncol_);
+    }
 
     Rcpp::List cols(ncol_);
 
@@ -115,12 +119,15 @@ class XlsWorkSheet {
         switch(types[j]) {
         case CELL_BLANK:
           break;
+        case CELL_SKIP:
+          break;
         case CELL_NUMERIC:
           switch(type) {
           case CELL_BLANK:
             REAL(col)[i] = NA_REAL;
             break;
           case CELL_NUMERIC:
+          case CELL_SKIP:
           case CELL_DATE:
             REAL(col)[i] = cell.d;
             break;
@@ -132,6 +139,7 @@ class XlsWorkSheet {
           break;
         case CELL_DATE:
           switch(type) {
+          case CELL_SKIP:
           case CELL_BLANK:
             REAL(col)[i] = NA_REAL;
             break;
@@ -163,7 +171,7 @@ class XlsWorkSheet {
       }
     }
 
-    return removeBlankColumns(cols, names, types);
+    return removeSkippedColumns(cols, names, types);
   }
 };
 
diff --git a/src/XlsxCell.h b/src/XlsxCell.h
index 8039c4ea..7985c967 100644
--- a/src/XlsxCell.h
+++ b/src/XlsxCell.h
@@ -50,11 +50,11 @@ class XlsxCell {
     return location_.first;
   }
 
-  int col()  const {
+  int col() const {
     return location_.second;
   }
 
-  std::string asStdString(const std::vector<std::string>& stringTable) {
+  std::string asStdString(const std::vector<std::string>& stringTable) const {
     rapidxml::xml_node<>* v = cell_->first_node("v");
     if (v == NULL)
       return "[NULL]";
@@ -67,7 +67,7 @@ class XlsxCell {
     return stringTable.at(id);
   }
 
-  double asDouble(const StringSet& na) {
+  double asDouble(const StringSet& na) const {
     rapidxml::xml_node<>* v = cell_->first_node("v");
     if (v == NULL || na.contains(v->value()))
       return NA_REAL;
@@ -75,7 +75,7 @@ class XlsxCell {
     return (v == NULL) ? 0 : atof(v->value());
   }
 
-  double asDate(const StringSet& na, int offset) {
+  double asDate(const StringSet& na, int offset) const {
     rapidxml::xml_node<>* v = cell_->first_node("v");
     if (v == NULL || na.contains(v->value()))
       return NA_REAL;
@@ -85,7 +85,7 @@ class XlsxCell {
   }
 
   Rcpp::RObject asCharSxp(const StringSet& na,
-                          const std::vector<std::string>& stringTable) {
+                          const std::vector<std::string>& stringTable) const {
 
     // Is it an inline string?  // 18.3.1.53 is (Rich Text Inline) [p1649]
     rapidxml::xml_node<>* is = cell_->first_node("is");
@@ -98,7 +98,6 @@ class XlsxCell {
       }
     }
 
-
     rapidxml::xml_node<>* v = cell_->first_node("v");
     if (v == NULL)
       return NA_STRING;
@@ -118,7 +117,7 @@ class XlsxCell {
 
   CellType type(const StringSet& na,
                 const std::vector<std::string>& stringTable,
-                const std::set<int>& dateStyles) {
+                const std::set<int>& dateStyles) const {
     rapidxml::xml_attribute<>* t = cell_->first_attribute("t");
 
     if (t == NULL || strncmp(t->value(), "n", 5) == 0) {
@@ -164,7 +163,7 @@ class XlsxCell {
 
 
   Rcpp::RObject stringFromTable(const char* val, const StringSet& na,
-                                const std::vector<std::string>& stringTable) {
+                                const std::vector<std::string>& stringTable) const {
     int id = atoi(val);
     if (id < 0 || id >= (int) stringTable.size()) {
       Rcpp::warning("[%i, %i]: Invalid string id %i", row() + 1, col() + 1, id);
diff --git a/src/XlsxWorkSheet.cpp b/src/XlsxWorkSheet.cpp
index 7848ab04..ba88080f 100644
--- a/src/XlsxWorkSheet.cpp
+++ b/src/XlsxWorkSheet.cpp
@@ -1,5 +1,6 @@
 #include <Rcpp.h>
 #include "XlsxWorkSheet.h"
+#include "ColSpec.h"
 using namespace Rcpp;
 
 // [[Rcpp::export]]
@@ -18,10 +19,11 @@ IntegerVector parse_ref(std::string ref) {
 // [[Rcpp::export]]
 CharacterVector xlsx_col_types(std::string path, int sheet = 0,
                                CharacterVector na = CharacterVector(),
-                               int nskip = 0, int guess_max = 1000) {
+                               int nskip = 0, int guess_max = 1000,
+                               bool sheetHasColumnNames = false) {
 
   XlsxWorkSheet ws(path, sheet, nskip);
-  std::vector<CellType> types = ws.colTypes(na, nskip, guess_max);
+  std::vector<CellType> types = ws.colTypes(na, guess_max, sheetHasColumnNames);
 
   CharacterVector out(types.size());
   for (size_t i = 0; i < types.size(); ++i) {
@@ -43,11 +45,12 @@ List read_xlsx_(std::string path, int sheet, RObject col_names,
 
   XlsxWorkSheet ws(path, sheet, nskip);
 
+  // catches empty sheets and sheets where we skip past all data
   if (ws.nrow() == 0 && ws.ncol() == 0) {
     return Rcpp::List(0);
   }
 
-  // Standardise column names --------------------------------------------------
+  // Get column names --------------------------------------------------
   CharacterVector colNames;
   bool sheetHasColumnNames = false;
   switch(TYPEOF(col_names)) {
@@ -64,7 +67,7 @@ List read_xlsx_(std::string path, int sheet, RObject col_names,
     Rcpp::stop("`col_names` must be a logical or character vector");
   }
 
-  // Standardise column types --------------------------------------------------
+  // Get column types --------------------------------------------------
   std::vector<CellType> colTypes;
   switch(TYPEOF(col_types)) {
   case NILSXP:
@@ -72,10 +75,27 @@ List read_xlsx_(std::string path, int sheet, RObject col_names,
     break;
   case STRSXP:
     colTypes = cellTypes(as<CharacterVector>(col_types));
+    colTypes = recycleTypes(colTypes, ws.ncol());
     break;
   default:
     Rcpp::stop("`col_types` must be a character vector or NULL");
   }
+  if ((int) colTypes.size() != ws.ncol()) {
+    Rcpp::stop("Sheet %d has %d columns, but `col_types` has length %d.",
+               sheet + 1, ws.ncol(), colTypes.size());
+  }
+
+  // convert blank columns to a default type (numeric today, but logical soon)
+  // can only happen when
+  //   * col_types = NULL and we've learned them from data
+  //   * all cells in column are empty or match one of the na strings
+  for (size_t i = 0; i < colTypes.size(); i++) {
+    if (colTypes[i] == CELL_BLANK) {
+      colTypes[i] = CELL_NUMERIC;
+    }
+  }
+
+  colNames = reconcileNames(colNames, colTypes, sheet);
 
   return ws.readCols(colNames, colTypes, na, sheetHasColumnNames);
 }
diff --git a/src/XlsxWorkSheet.h b/src/XlsxWorkSheet.h
index b5701ecd..192947e7 100644
--- a/src/XlsxWorkSheet.h
+++ b/src/XlsxWorkSheet.h
@@ -31,7 +31,7 @@ class XlsxWorkSheet {
   {
     rapidxml::xml_node<>* rootNode;
 
-    if (sheet_i > wb.n_sheets()) {
+    if (sheet_i >= wb.n_sheets()) {
       Rcpp::stop("Can't retrieve sheet in position %d, only %d sheet(s) found.",
                  sheet_i + 1,  wb.n_sheets());
     }
@@ -68,79 +68,49 @@ class XlsxWorkSheet {
     return sheetName_;
   }
 
-  // JB: this should either take colNames as an argument or have a bit of code
-  // moved out of here, so we don't read column names again inside this fxn.
-  // More comments near end of fxn.
   std::vector<CellType> colTypes(const StringSet& na,
                                  int guess_max = 1000,
                                  bool has_col_names = false) {
     std::vector<CellType> types;
     types.resize(ncol_);
 
-    std::vector<XlsxCell>::const_iterator it, row_end;
-    it = has_col_names ? secondRow_: firstRow_;
+    std::vector<XlsxCell>::const_iterator xcell;
+    xcell = has_col_names ? secondRow_ : firstRow_;
 
     // no cell data to consult re: types
-    if (it == cells_.end()) {
+    if (xcell == cells_.end()) {
       for (size_t i = 0; i < types.size(); i++) {
-        types[i] = CELL_NUMERIC;
+        types[i] = CELL_BLANK;
       }
       return types;
     }
 
+    // base is row the data starts on **in the spreadsheet**
     int base = firstRow_->row() + has_col_names;
-    // we have consulted i rows re: determining col types
-    int i;
-    // account for any empty rows between column headers and data start
-    i = it->row() - base;
-
-    while (i < guess_max && it != cells_.end()) {
-      // find the end of current row
-      row_end = it;
-      while(row_end != cells_.end() && row_end->row() == it->row()) {
-        row_end++;
-      }
-
-      while (it != row_end) {
-        XlsxCell xcell = *it;
-        if (xcell.col() < ncol_) {
-          CellType type = xcell.type(na, wb_.stringTable(), wb_.dateStyles());
-          if (type > types[xcell.col()]) {
-            types[xcell.col()] = type;
-          }
+    while (xcell != cells_.end() && xcell->row() - base < guess_max) {
+      if (xcell->col() < ncol_) {
+        CellType type = xcell->type(na, wb_.stringTable(), wb_.dateStyles());
+        if (type > types[xcell->col()]) {
+          types[xcell->col()] = type;
         }
-        it++;
       }
-
-      i++;
+      xcell++;
     }
 
-    // JB: this usually rederives the column names, which seems unwise
-    // I propose to move such reconciliation of column names and types out
-    // of here and into the parent function , read_xlsx_()
-    if (has_col_names) {
-      // blank columns with a name aren't blank
-      Rcpp::CharacterVector names = colNames();
-      for (size_t i = 0; i < types.size(); i++) {
-        if (types[i] == CELL_BLANK && names[i] != NA_STRING && names[i] != "")
-          types[i] = CELL_NUMERIC;
-      }
-    }
     return types;
   }
 
   Rcpp::CharacterVector colNames() {
     Rcpp::CharacterVector out(ncol_);
-    std::vector<XlsxCell>::const_iterator it = firstRow_;
-    int base = it->row();
+    std::vector<XlsxCell>::const_iterator xcell = firstRow_;
+    int base = xcell->row();
 
-    while(it != cells_.end() && it->row() == base) {
-      XlsxCell xcell = *it;
-      if (xcell.col() >= ncol_) {
+    while(xcell != cells_.end() && xcell->row() == base) {
+      if (xcell->col() >= ncol_) {
         break;
       }
-      out[xcell.col()] = xcell.asCharSxp("", wb_.stringTable());
-      it++;
+      out[xcell->col()] = xcell->asCharSxp("", wb_.stringTable());
+      xcell++;
     }
     return out;
   }
@@ -149,111 +119,94 @@ class XlsxWorkSheet {
                       const std::vector<CellType>& types,
                       const StringSet& na,
                       bool has_col_names = false) {
-    // JB: suspect this should move out of here and into a function that does
-    // this and the last rationalization re col names and types in colTypes
-    if ((int) names.size() != ncol_ || (int) types.size() != ncol_)
-      Rcpp::stop("Need one name and type for each column");
 
-    std::vector<XlsxCell>::const_iterator it, row_end;
-    it = has_col_names ? secondRow_: firstRow_;
-
-    // no cell data to read
-    if (it == cells_.end()) {
-      Rcpp::List cols(ncol_);
-      for (int j = 0; j < ncol_; ++j) {
-        cols[j] = makeCol(types[j], 0);
-      }
-      return removeBlankColumns(cols, names, types);
-    }
+    std::vector<XlsxCell>::const_iterator xcell;
+    xcell = has_col_names ? secondRow_: firstRow_;
 
+    // base is row the data starts on **in the spreadsheet**
     int base = firstRow_->row() + has_col_names;
-    // we have read i rows of data
-    int i;
-    // account for any empty rows between column headers and data start
-    i = it->row() - base;
-
-    // Initialise columns, accounting for leading skipped or blank rows
-    int n = nrow_ - base;
+    int n = (xcell == cells_.end()) ? 0 : nrow_ - base;
     Rcpp::List cols(ncol_);
+    cols.attr("names") = names;
     for (int j = 0; j < ncol_; ++j) {
       cols[j] = makeCol(types[j], n);
     }
 
-    while (it != cells_.end()) {
+    if (n == 0) {
+      return cols;
+    }
 
+    while (xcell != cells_.end()) {
+
+      int i = xcell->row();
+      int j = xcell->col();
       if ((i + 1) % 1000 == 0) {
         Rcpp::checkUserInterrupt();
       }
-
-      // find the end of this row
-      row_end = it;
-      while(row_end != cells_.end() && row_end->row() == it->row()) {
-        row_end++;
+      if (types[j] == CELL_SKIP || j >= ncol_) {
+        xcell++;
+        continue;
       }
 
-      while (it != row_end) {
-        XlsxCell xcell = *it;
-        if (xcell.col() >= ncol_) {
-          continue;
-        }
-        CellType type = xcell.type(na, wb_.stringTable(), wb_.dateStyles());
-        Rcpp::RObject col = cols[xcell.col()];
-        // row to write into
-        int row = xcell.row() - base;
-        // Needs to compare to actual cell type to give warnings
-        switch(types[xcell.col()]) {
-        case CELL_BLANK:
+      CellType type = xcell->type(na, wb_.stringTable(), wb_.dateStyles());
+      Rcpp::RObject col = cols[j];
+      // row to write into
+      int row = i - base;
+      // Needs to compare to actual cell type to give warnings
+      switch(types[j]) {
+      case CELL_SKIP:
+        break;
+      case CELL_BLANK:
+        break;
+      case CELL_NUMERIC:
+        switch(type) {
+        case CELL_SKIP:
           break;
         case CELL_NUMERIC:
-          switch(type) {
-          case CELL_NUMERIC:
-          case CELL_DATE:
-            REAL(col)[row] = xcell.asDouble(na);
-            break;
-          case CELL_BLANK:
-            REAL(col)[row] = NA_REAL;
-            break;
-          case CELL_TEXT:
-            Rcpp::warning("[%i, %i]: expecting numeric: got '%s'",
-                          xcell.row() + 1, xcell.col() + 1,
-                          xcell.asStdString(wb_.stringTable()));
-            REAL(col)[row] = NA_REAL;
-          }
+        case CELL_DATE:
+          REAL(col)[row] = xcell->asDouble(na);
+          break;
+        case CELL_BLANK:
+          REAL(col)[row] = NA_REAL;
+          break;
+        case CELL_TEXT:
+          Rcpp::warning("[%i, %i]: expecting numeric: got '%s'",
+                        i + 1, j + 1, xcell->asStdString(wb_.stringTable()));
+          REAL(col)[row] = NA_REAL;
+        }
+        break;
+      case CELL_DATE:
+        switch(type) {
+        case CELL_SKIP:
           break;
         case CELL_DATE:
-          switch(type) {
-          case CELL_DATE:
-            REAL(col)[row] = xcell.asDate(na, wb_.offset());
-            break;
-          case CELL_BLANK:
-            REAL(col)[row] = NA_REAL;
-            break;
-          case CELL_NUMERIC:
-          case CELL_TEXT:
-            Rcpp::warning("[%i, %i]: expecting date: got '%s'",
-                          xcell.row() + 1, xcell.col() + 1,
-                          xcell.asStdString(wb_.stringTable()));
-            REAL(col)[row] = NA_REAL;
-            break;
-          }
+          REAL(col)[row] = xcell->asDate(na, wb_.offset());
           break;
+        case CELL_BLANK:
+          REAL(col)[row] = NA_REAL;
+          break;
+        case CELL_NUMERIC:
         case CELL_TEXT:
-          if (type == CELL_BLANK) {
-            SET_STRING_ELT(col, row, NA_STRING);
-          } else {
-            SET_STRING_ELT(col, row, xcell.asCharSxp(na, wb_.stringTable()));
-          }
+          Rcpp::warning("[%i, %i]: expecting date: got '%s'",
+                        i + 1, j + 1, xcell->asStdString(wb_.stringTable()));
+          REAL(col)[row] = NA_REAL;
           break;
         }
-        it++;
+        break;
+      case CELL_TEXT:
+        if (type == CELL_BLANK) {
+          SET_STRING_ELT(col, row, NA_STRING);
+        } else {
+          SET_STRING_ELT(col, row, xcell->asCharSxp(na, wb_.stringTable()));
+        }
+        break;
       }
-      ++i;
+    xcell++;
     }
 
-    return removeBlankColumns(cols, names, types);
+    return removeSkippedColumns(cols, names, types);
   }
 
-
 private:
 
   void loadCells() {
diff --git a/src/benchmarks.cpp b/src/benchmarks.cpp
deleted file mode 100644
index 23b07f8f..00000000
--- a/src/benchmarks.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <Rcpp.h>
-#include "rapidxml.h"
-#include "XlsxWorkBook.h"
-using namespace Rcpp;
-
-// [[Rcpp::export]]
-void parseXml(std::string base, std::string internal) {
-  std::string file = zip_buffer(base, internal);
-  Rcout << "File size: " << file.size() << " bytes\n";
-
-  rapidxml::xml_document<> xml;
-  xml.parse<0>(&file[0]);
-}
-
-// [[Rcpp::export]]
-int countRows(std::string base, int sheet) {
-  // FYI: sheet lookup logic has changed in main package
-  // this is not generally correct
-  std::string sheetPath = tfm::format("xl/worksheets/sheet%i.xml", sheet + 1);
-
-  std::string file = zip_buffer(base, sheetPath);
-  Rcout << "File size: " << file.size() << " bytes\n";
-
-  rapidxml::xml_document<> xml;
-  xml.parse<0>(&file[0]);
-
-  rapidxml::xml_node<>* rootNode_ = xml.first_node("worksheet");
-  if (rootNode_ == NULL)
-    return 0;
-
-  rapidxml::xml_node<>* sheetData_ = rootNode_->first_node("sheetData");
-  if (sheetData_ == NULL)
-    return 0;
-
-  int i = 0;
-  for (rapidxml::xml_node<>* row = sheetData_->first_node("row");
-       row; row = row->next_sibling("row")) {
-    i++;
-  }
-
-  return i;
-}
diff --git a/tests/testthat/test-col-names.R b/tests/testthat/test-col-names.R
index 1b104377..efc5521d 100644
--- a/tests/testthat/test-col-names.R
+++ b/tests/testthat/test-col-names.R
@@ -57,3 +57,28 @@ test_that("column names are de-duplicated", {
   df <- read_excel(test_sheet("unnamed-duplicated-columns.xls"))
   expect_identical(names(df)[4], "var2__1")
 })
+
+test_that("wrong length column names are rejected [xlsx]", {
+  expect_error(
+    read_excel(test_sheet("iris-excel.xlsx"),col_names = LETTERS[1:3]),
+    "Sheet 1 has 5 columns (5 unskipped), but `col_names` has length 3.",
+    fixed = TRUE
+  )
+})
+
+test_that("wrong length column names are rejected [xls]", {
+  expect_error(
+    read_excel(test_sheet("iris-excel.xls"), col_names = LETTERS[1:3]),
+    "Received 3 names but 5 types."
+  )
+})
+
+test_that("column_names can anticipate skipping [xlsx]", {
+  expect_silent(
+    df <- read_excel(test_sheet("iris-excel.xlsx"),
+                     col_names = c("one", "two", "three"), skip = 1,
+                     col_types = c("numeric", "numeric", "skip", "skip", "text"))
+  )
+  expect_identical(dim(df), c(150L, 3L))
+  expect_identical(names(df), c("one", "two", "three"))
+})
diff --git a/tests/testthat/test-col-types.R b/tests/testthat/test-col-types.R
index 3cb00d52..df9a4437 100644
--- a/tests/testthat/test-col-types.R
+++ b/tests/testthat/test-col-types.R
@@ -1,4 +1,55 @@
-context("Col types")
+context("Column types")
+
+test_that("illegal col_types are rejected", {
+  expect_error(
+    read_excel(test_sheet("types.xlsx"),
+               col_types = c("foo", "numeric", "text", "bar")),
+    "Illegal column type"
+  )
+})
+
+test_that("request for 'blank' col type gets deprecation message and fix", {
+  expect_message(
+    read_excel(test_sheet("types.xlsx"),
+               col_types = rep_len(c("blank", "text"), length.out = 5)),
+    "`col_type = \"blank\"` deprecated. Use \"skip\" instead.",
+    fixed = TRUE
+  )
+})
+
+test_that("invalid col_types are rejected", {
+  expect_error(
+    read_excel(test_sheet("types.xlsx"), col_types = character()),
+    "length(col_types) > 0 is not TRUE", fixed = TRUE
+  )
+  expect_error(
+    read_excel(test_sheet("types.xlsx"), col_types = 1:3),
+    "is.character(col_types) is not TRUE", fixed = TRUE
+  )
+  expect_error(
+    read_excel(test_sheet("types.xlsx"), col_types = c(NA, "text", "numeric")),
+    "!anyNA(col_types) is not TRUE", fixed = TRUE
+  )
+})
+
+test_that("col_types can be specified", {
+  df <- read_excel(test_sheet("iris-excel.xlsx"),
+                   col_types = c("numeric", "text", "numeric", "numeric", "text"))
+  expect_is(df[[2]], "character")
+})
+
+test_that("col_types are recycled", {
+  df <- read_excel(test_sheet("types.xlsx"), col_types = "text")
+  expect_match(vapply(df, class, character(1)), "character")
+})
+
+test_that("inappropriate col_types generate warning", {
+  expect_warning(
+    read_excel(test_sheet("iris-excel.xlsx"),
+               col_types = c("numeric", "text", "numeric", "numeric", "numeric")),
+    "expecting numeric"
+  )
+})
 
 test_that("types imputed & read correctly [xlsx]", {
   types <- read_excel(test_sheet("types.xlsx"))
@@ -12,8 +63,15 @@ test_that("types imputed & read correctly [xlsx]", {
 
 test_that("types imputed & read correctly [xls]", {
   expect_output(
+    ## valgrind reports this
+    ## Conditional jump or move depends on uninitialised value(s)
     types <- read_excel(test_sheet("types.xls")),
     "Unknown type: 517"
+    ## definitely due to these 'Unknown type: 517' msgs
+    ## line 102 in CellType.h
+    ##   Rcpp::Rcout << "Unknown type: " << cell.id << "\n";
+    ## if I skip this test, memcheck report is as clean as it ever gets
+    ## https://github.com/tidyverse/readxl/issues/259
   )
   expect_is(types$number, "numeric")
   expect_is(types$string, "character")
@@ -23,7 +81,7 @@ test_that("types imputed & read correctly [xls]", {
   skip("revisit these expectations as xls problems are fixed")
 })
 
-test_that("max_guess is honored for col_types [xlsx]", {
+test_that("guess_max is honored for col_types [xlsx]", {
   expect_warning(
     types <- read_excel(test_sheet("types.xlsx"), guess_max = 2),
     "expecting numeric"
@@ -31,6 +89,19 @@ test_that("max_guess is honored for col_types [xlsx]", {
   expect_identical(types$string_in_row_3, c(1, 2, NA))
 })
 
-test_that("max_guess is honored for col_types [xls]", {
+test_that("guess_max is honored for col_types [xls]", {
   skip("write this test as xls problems are fixed")
 })
+
+test_that("wrong length col types generates error", {
+  expect_error(
+    read_excel(test_sheet("iris-excel.xlsx"), col_types = c("numeric", "text")),
+    "Sheet 1 has 5 columns, but `col_types` has length 2."
+  )
+  expect_error(
+    read_excel(test_sheet("iris-excel.xls"), col_types = c("numeric", "text")),
+    "Received 5 names but 2 types."
+  )
+})
+
+
diff --git a/tests/testthat/test-empty.R b/tests/testthat/test-empty.R
index b0dec8ea..655f4bd2 100644
--- a/tests/testthat/test-empty.R
+++ b/tests/testthat/test-empty.R
@@ -23,33 +23,3 @@ test_that("non-empty sheets act that way if we skip past everything", {
   out <- read_excel(test_sheet("skipping.xlsx"), skip = 10)
   expect_identical(out, tibble::tibble())
 })
-
-test_that("empty cells with a style are not loaded", {
-  ## what's important about this sheet?
-  ## contains empty cells with a custom format
-  ## therefore they appear in the xml and have a style attribute
-  ## where are they?
-  ## in the embedded empty columns, w/ and w/o a name
-  ## in a trailing empty column
-  ## in some trailing rows
-  out <- read_excel(test_sheet("style-only-cells.xlsx"))
-  df <- tibble::tibble(var1 = c("val1,1", "val2,1", "val3,1"),
-                       var2 = NA_real_,
-                       var3 = c("aa", "bb", "cc"),
-                       var5 = c(1, 2, 3))
-  expect_equal(out, df)
-  skip("revisit this when dust settles re: treatment of empty columns")
-})
-
-test_that("user-supplied column names play nicely with empty columns", {
-  skip("waiting for dust to settle re: treatment of empty columns")
-  ## do stuff like this:
-  out <- read_excel(
-    test_sheet("style-only-cells.xlsx"),
-    col_names = LETTERS[1:4]
-  )
-  out <- read_excel(
-    test_sheet("style-only-cells.xlsx"),
-    col_names = LETTERS[1:5]
-  )
-})
diff --git a/tests/testthat/test-missing-values.R b/tests/testthat/test-missing-values.R
index e9d15e79..d43de413 100644
--- a/tests/testthat/test-missing-values.R
+++ b/tests/testthat/test-missing-values.R
@@ -70,7 +70,7 @@ test_that("text values in numeric column gives warning & NA", {
 
 test_that("empty first column gives valid data.frame", {
   df <- read_excel(test_sheet("missing-first-column.xlsx"), col_names = FALSE)
-  expect_equal(nrow(df), length(df[[1]]))
+  expect_equal(nrow(df), length(df[[2]]))
 })
 
 test_that("empty named column gives NA column", {
@@ -78,6 +78,29 @@ test_that("empty named column gives NA column", {
   df2 <- read_excel(test_sheet("empty-named-column.xls"), col_names = TRUE)
   expect_equal(ncol(df1), 4)
   expect_equal(names(df1)[2], "y")
+  expect_true(all(is.na(df1$y)))
+  expect_true(all(is.numeric(df1$y)))
   expect_equal(ncol(df2), 4)
   expect_equal(names(df2)[2], "y")
+  expect_true(all(is.na(df2$y)))
+  expect_true(all(is.numeric(df2$y)))
+})
+
+test_that("empty (styled) cells are not loaded, but can survive as NA", {
+  ## what's important about this sheet?
+  ## contains empty cells with a custom format
+  ## therefore they appear in the xml and have a style attribute
+  ## where are they?
+  ## in the embedded empty columns, w/ and w/o a name
+  ## in a trailing empty column WHICH SHOULD BE DROPPED
+  ## in some trailing rows WHICH SHOULD BE DROPPED
+  out <- read_excel(test_sheet("style-only-cells.xlsx"))
+  df <- tibble::tibble(
+    var1 = c("val1,1", "val2,1", "val3,1"),
+    var2 = NA_real_,
+    var3 = c("aa", "bb", "cc"),
+    X__1 = NA_real_,
+    var5 = c(1, 2, 3)
+  )
+  expect_equal(out, df)
 })
diff --git a/tests/testthat/test-sheets.R b/tests/testthat/test-sheets.R
index 2f2d170f..24f2737e 100644
--- a/tests/testthat/test-sheets.R
+++ b/tests/testthat/test-sheets.R
@@ -19,8 +19,14 @@ test_that("informative error when requesting non-existent sheet by name", {
 
 test_that("informative error when requesting non-existent sheet by position", {
   expect_error(
-    read_excel(test_sheet("iris-excel.xlsx"), sheet = 200),
-    "Can't retrieve sheet in position 200"
+    read_excel(test_sheet("iris-excel.xlsx"), sheet = 2),
+    "Can't retrieve sheet in position 2, only 1 sheet(s) found.",
+    fixed = TRUE
+  )
+  expect_error(
+    read_excel(test_sheet("iris-excel.xls"), sheet = 2),
+    "Can't retrieve sheet in position 2, only 1 sheet(s) found.",
+    fixed = TRUE
   )
 })