diff --git a/NAMESPACE b/NAMESPACE index e0b176c..d114525 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -84,6 +84,7 @@ export(export_list) export(factorize) export(gather_attrs) export(get_ext) +export(get_info) export(import) export(import_list) export(install_formats) diff --git a/NEWS.md b/NEWS.md index bf2d2b5..1447dae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ * Add support for `qs` #275 h/t David Schoch * Use `arrow` to import / export `feather` #340 * `export_list` can write multiple data frames to a single archive file (e.g. zip, tar) or a directory #346 h/t David Schoch +* `get_info` is added #350 * Bug fixes - ... is correctly passed for exporting ODS and feather #318 - POTENTIALLY BREAKING: JSON are exported in UTF-8 by default; solved encoding issues on @@ -20,6 +21,7 @@ - remove all @importFrom #325 h/t David Schoch - rearrange "Package Philosophy" as a Vignette #320 - Create a single source of truth about all import and export functions #313 + - Clarify all concepts: now there is only `format` #351 * New authors - David Schoch @schochastics diff --git a/R/compression.R b/R/compression.R index f0691f6..3570042 100644 --- a/R/compression.R +++ b/R/compression.R @@ -1,18 +1,14 @@ find_compress <- function(f) { if (grepl("zip$", f)) { - file <- sub("\\.zip$", "", f) - compress <- "zip" - } else if (grepl("tar\\.gz$", f)) { - file <- sub("\\.tar\\.gz$", "", f) - compress <- "tar" - } else if (grepl("tar$", f)) { - file <- sub("\\.tar$", "", f) - compress <- "tar" - } else { - file <- f - compress <- NA_character_ + return(list(file = sub("\\.zip$", "", f), compress = "zip")) + } + if (grepl("tar\\.gz$", f)) { + return(list(file = sub("\\.tar\\.gz$", "", f), compress = "tar")) + } + if (grepl("tar$", f)) { + return(list(file = sub("\\.tar$", "", f), compress = "tar")) } - return(list(file = file, compress = compress)) + return(list(file = f, compress = NA_character_)) } compress_out <- function(cfile, filename, type = c("zip", "tar", "gzip", "bzip2", "xz")) { diff --git a/R/export.R b/R/export.R index 10d73f4..d4b691d 100644 --- a/R/export.R +++ b/R/export.R @@ -82,41 +82,41 @@ export <- function(x, file, format, ...) { .check_file(file, single_only = TRUE) if (missing(file) && missing(format)) { stop("Must specify 'file' and/or 'format'") - } else if (!missing(file) && !missing(format)) { - fmt <- tolower(format) + } + if (!missing(file) && !missing(format)) { + format <- tolower(format) cfile <- file f <- find_compress(file) file <- f$file compress <- f$compress - } else if (!missing(file) && missing(format)) { + } + if (!missing(file) && missing(format)) { cfile <- file f <- find_compress(file) file <- f$file compress <- f$compress - fmt <- get_ext(file) - } else if (!missing(format)) { - fmt <- get_type(format) - file <- paste0(as.character(substitute(x)), ".", fmt) + format <- get_info(file)$input ## this line is slight confusing + } + if (!missing(format) && missing(file)) { + format <- .standardize_format(format) + file <- paste0(as.character(substitute(x)), ".", format) compress <- NA_character_ } - fmt <- get_type(fmt) + format <- .standardize_format(format) outfile <- file - - data_name <- as.character(substitute(x)) - if (!is.data.frame(x) & !is.matrix(x)) { - if (!fmt %in% c("xlsx", "html", "rdata", "rds", "json")) { - stop("'x' is not a data.frame or matrix") - } - } else if (is.matrix(x)) { + if (is.matrix(x)) { x <- as.data.frame(x) } + if (!is.data.frame(x) && !format %in% c("xlsx", "html", "rdata", "rds", "json", "qs")) { + stop("'x' is not a data.frame or matrix", call. = FALSE) + } .create_directory_if_not_exists(file = file) ## fix 347 - if (fmt %in% c("gz", "gzip")) { - fmt <- tools::file_ext(tools::file_path_sans_ext(file, compression = FALSE)) + if (format %in% c("gz", "gzip")) { + format <- get_info(tools::file_path_sans_ext(file, compression = FALSE))$format file <- gzfile(file, "w") on.exit(close(file)) } - class(file) <- c(paste0("rio_", fmt), class(file)) + class(file) <- c(paste0("rio_", format), class(file)) .export(file = file, x = x, ...) if (!is.na(compress)) { cfile <- compress_out(cfile = cfile, filename = file, type = compress) diff --git a/R/extensions.R b/R/extensions.R index 723de69..2a40369 100644 --- a/R/extensions.R +++ b/R/extensions.R @@ -18,31 +18,19 @@ ## @rdname extensions .import.default <- function(file, ...) { - x <- gettext("%s format not supported. Consider using the '%s()' function") - xA <- gettext("Import support for the %s format is exported by the %s package. Run 'library(%s)' then try again.") - fmt <- tools::file_ext(file) - out <- switch(fmt, - bean = sprintf(xA, fmt, "ledger", "ledger"), - beancount = sprintf(xA, fmt, "ledger", "ledger"), - bib = sprintf(x, fmt, "bib2df::bib2df"), - bmp = sprintf(x, fmt, "bmp::read.bmp"), - doc = sprintf(x, fmt, "docxtractr::docx_extract_all_tbls"), - docx = sprintf(x, fmt, "docxtractr::docx_extract_all_tbls"), - gexf = sprintf(x, fmt, "rgexf::read.gexf"), - gnumeric = sprintf(x, fmt, "gnumeric::read.gnumeric.sheet"), - hledger = sprintf(xA, fmt, "ledger", "ledger"), - jpeg = sprintf(x, fmt, "jpeg::readJPEG"), - jpg = sprintf(x, fmt, "jpeg::readJPEG"), - ledger = sprintf(xA, fmt, "ledger", "ledger"), - npy = sprintf(x, fmt, "RcppCNPy::npyLoad"), - pdf = sprintf(x, fmt, "tabulizer::extract_tables"), - png = sprintf(x, fmt, "png::readPNG"), - sdmx = sprintf(x, fmt, "sdmx::readSDMX"), - sss = sprintf(x, fmt, "sss::read.sss"), - tiff = sprintf(x, fmt, "tiff::readTIFF"), - gettext("Format not supported") - ) - stop(out, call. = FALSE) + fileinfo <- get_info(file) + if (is.na(fileinfo$type) || is.na(fileinfo$import_function) || fileinfo$import_function == "") { + stop("Format not supported", call. = FALSE) + } + if (fileinfo$type == "known") { + stop(sprintf(gettext("%s format not supported. Consider using the '%s()' function"), + fileinfo$format, fileinfo$import_function), call. = FALSE) + } + if (fileinfo$type == "enhance") { + pkg <- stringi::stri_extract_first(fileinfo$import_function, regex = "[a-zA-Z0-9\\.]+") + stop(sprintf(gettext("Import support for the %s format is exported by the %s package. Run 'library(%s)' then try again."), + fileinfo$format, pkg, pkg), call. = FALSE) + } } ## @rdname extensions @@ -52,16 +40,12 @@ ## @rdname extensions .export.default <- function(file, x, ...) { - x <- gettext("%s format not supported. Consider using the '%s()' function") - fmt <- tools::file_ext(file) - out <- switch(fmt, - gexf = sprintf(x, fmt, "rgexf::write.gexf"), - jpg = sprintf(x, fmt, "jpeg::writeJPEG"), - npy = sprintf(x, fmt, "RcppCNPy::npySave"), - png = sprintf(x, fmt, "png::writePNG"), - tiff = sprintf(x, fmt, "tiff::writeTIFF"), - xpt = sprintf(x, fmt, "SASxport::write.xport"), - gettext("Format not supported") - ) - stop(out, call. = FALSE) + fileinfo <- get_info(file) + if (is.na(fileinfo$type) || is.na(fileinfo$export_function) || fileinfo$export_function == "") { + stop("Format not supported", call. = FALSE) + } + if (fileinfo$type == "known") { + stop(sprintf(gettext("%s format not supported. Consider using the '%s()' function"), + fileinfo$format, fileinfo$export_function), call. = FALSE) + } } diff --git a/R/import.R b/R/import.R index a92af3c..d41f41c 100644 --- a/R/import.R +++ b/R/import.R @@ -110,20 +110,17 @@ import <- function(file, format, setclass, which, ...) { file <- parse_tar(file, which = which) } if (missing(format)) { - fmt <- get_ext(file) - if (fmt %in% c("gz", "gzip")) { - fmt <- tools::file_ext(tools::file_path_sans_ext(file, compression = FALSE)) + format <- get_info(file)$format + if (format %in% c("gz", "gzip")) { + format <- get_info(tools::file_path_sans_ext(file, compression = FALSE))$format file <- gzfile(file) - } else { - fmt <- get_type(fmt) } } else { - fmt <- get_type(format) + ## format such as "|" + format <- .standardize_format(format) } - args_list <- list(...) - - class(file) <- c(paste0("rio_", fmt), class(file)) + class(file) <- c(paste0("rio_", format), class(file)) if (missing(which)) { x <- .import(file = file, ...) } else { diff --git a/R/import_list.R b/R/import_list.R index e42ed80..81a6243 100644 --- a/R/import_list.R +++ b/R/import_list.R @@ -107,17 +107,17 @@ function(file, if (grepl("^http.*://", file)) { file <- remote_to_local(file) } - if (get_ext(file) == "rdata") { + if (get_info(file)$format == "rdata") { e <- new.env() load(file, envir = e) return(as.list(e)) } - if (!get_ext(file) %in% c("html", "xlsx", "xls", "zip")) { + if (!get_info(file)$format %in% c("html", "xlsx", "xls", "zip")) { which <- 1 whichnames <- NULL } ## getting list of `whichnames` - if (get_ext(file) == "html") { + if (get_info(file)$format == "html") { .check_pkg_availability("xml2") tables <- xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table") if (missing(which)) { @@ -128,7 +128,7 @@ function(file, FUN.VALUE = character(1)) names(which) <- whichnames } - if (get_ext(file) %in% c("xls","xlsx")) { + if (get_info(file)$format %in% c("xls","xlsx")) { ##.check_pkg_availability("readxl") whichnames <- readxl::excel_sheets(path = file) if (missing(which)) { @@ -140,7 +140,7 @@ function(file, whichnames <- whichnames[which] } } - if (get_ext(file) %in% c("zip")) { + if (get_info(file)$format %in% c("zip")) { if (missing(which)) { whichnames <- utils::unzip(file, list = TRUE)[, "Name"] which <- seq_along(whichnames) diff --git a/R/remote_to_local.R b/R/remote_to_local.R index 7cc7730..3b145a9 100644 --- a/R/remote_to_local.R +++ b/R/remote_to_local.R @@ -3,38 +3,38 @@ remote_to_local <- function(file, format) { # handle google sheets urls if (grepl("docs\\.google\\.com/spreadsheets", file)) { file <- convert_google_url(file, export_as = "csv") - fmt <- "csv" + format <- "csv" } else { # try to extract format from URL - fmt <- try(get_ext(file), silent = TRUE) - if (inherits(fmt, "try-error")) { - fmt <- "TMP" + format <- try(get_info(file)$format, silent = TRUE) + if (inherits(format, "try-error")) { + format <- "TMP" } } } else { # handle google sheets urls if (grepl("docs\\.google\\.com/spreadsheets", file)) { - fmt <- get_type(format) - if (fmt %in% c("csv", "tsv", "xlsx", "ods")) { - file <- convert_google_url(file, export_as = fmt) - fmt <- fmt + format <- .standardize_format(format) + if (format %in% c("csv", "tsv", "xlsx", "ods")) { + file <- convert_google_url(file, export_as = format) + format <- format } else { file <- convert_google_url(file, export_as = "csv") - fmt <- "csv" + format <- "csv" } } else { - fmt <- get_type(format) + format <- .standardize_format(format) } } # save file locally - temp_file <- tempfile(fileext = paste0(".", fmt)) + temp_file <- tempfile(fileext = paste0(".", format)) u <- curl::curl_fetch_memory(file) writeBin(object = u$content, con = temp_file) - if (fmt == "TMP") { + if (format == "TMP") { # try to extract format from curl's final URL - fmt <- try(get_ext(u$url), silent = TRUE) - if (inherits(fmt, "try-error")) { + format <- try(get_info(u$url)$format, silent = TRUE) + if (inherits(format, "try-error")) { # try to extract format from headers h1 <- curl::parse_headers(u$headers) # check `Content-Disposition` header @@ -59,7 +59,7 @@ remote_to_local <- function(file, format) { # ## PARSE MIME TYPE # } } else { - f <- sub("TMP$", fmt, temp_file) + f <- sub("TMP$", format, temp_file) file.copy(from = temp_file, to = f) unlink(temp_file) temp_file <- f diff --git a/R/sysdata.rda b/R/sysdata.rda index 11634b5..b575fe3 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/utils.R b/R/utils.R index 416a70e..45a53ad 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,119 +1,73 @@ -#' @title Get File Type from Extension -#' @description A utility function to retrieve the file type from a file extension (via its filename/path/URL) +#' @title Get File Info +#' @description A utility function to retrieve the file information of a filename, path, or URL. #' @param file A character string containing a filename, file path, or URL. -#' @return A characters string containing a file type recognized by rio. +#' @return For [get_info()], a list is return with the following slots +#' \itemize{ +#' \item `input` file extension or information used to identify the possible file format +#' \item `format` file format, see `format` argument of [import()] +#' \item `type` "import" (supported by default); "suggest" (supported by suggested packages, see [install_formats()]); "enhance" and "known " are not directly supported; `NA` is unsupported +#' \item `format_name` name of the format +#' \item `import_function` What function is used to import this file +#' \item `export_function` What function is used to export this file +#' \item `file` `file` +#' } +#' For [get_ext()], just `input` (usually file extension) is returned; retained for backward compatibility. #' @examples -#' get_ext("starwars.xlsx") -#' get_ext("starwars.ods") +#' get_info("starwars.xlsx") +#' get_info("starwars.ods") +#' get_info("https://github.com/ropensci/readODS/raw/v2.1/starwars.ods") +#' get_info("~/duran_duran_rio.mp3") #' get_ext("clipboard") ## "clipboard" #' get_ext("https://github.com/ropensci/readODS/raw/v2.1/starwars.ods") #' @export -get_ext <- function(file) { - if (!is.character(file)) { - stop("'file' is not a string") +get_info <- function(file) { + .check_file(file, single_only = TRUE) + if (tolower(file) == "clipboard") { + return(.query_format(input = "clipboard", file = "clipboard")) } if (!grepl("^http.*://", file)) { - fmt <- tools::file_ext(file) - } else if (grepl("^http.*://", file)) { + ext <- tolower(tools::file_ext(file)) + } else { parsed <- strsplit(strsplit(file, "?", fixed = TRUE)[[1]][1], "/", fixed = TRUE)[[1]] - file <- parsed[length(parsed)] - fmt <- tools::file_ext(file) - get_type(fmt) + url_file <- parsed[length(parsed)] + ext <- tolower(tools::file_ext(url_file)) } - if (file == "clipboard") { - return("clipboard") - } else if (fmt == "") { + if (ext == "") { stop("'file' has no extension", call. = FALSE) - } else { - return(tolower(fmt)) } + return(.query_format(input = ext, file = file)) } -get_type <- function(fmt) { - type_list <- list( - clipboard = "clipboard", - # supported formats - "," = "csv", - ";" = "csv2", - "\t" = "tsv", - "|" = "psv", - arff = "arff", - csv = "csv", - csv2 = "csv2", - csvy = "csvy", - dbf = "dbf", - dif = "dif", - dta = "dta", - dump = "dump", - epiinfo = "rec", - excel = "xlsx", - feather = "feather", - fortran = "fortran", - fst = "fst", - fwf = "fwf", - htm = "html", - html = "html", - json = "json", - mat = "matlab", - matlab = "matlab", - minitab = "mtp", - mtp = "mtp", - ods = "ods", - por = "spss", - psv = "psv", - qs = "qs", - r = "r", - rda = "rdata", - rdata = "rdata", - rds = "rds", - rec = "rec", - sas = "sas7bdat", - sas7bdat = "sas7bdat", - sav = "sav", - spss = "sav", - stata = "dta", - syd = "syd", - systat = "syd", - tsv = "tsv", - txt = "tsv", - weka = "arff", - xls = "xls", - xlsx = "xlsx", - xml = "xml", - xport = "xpt", - xpt = "xpt", - yaml = "yml", - yml = "yml", - eviews = "eviews", - wf1 = "eviews", - zsav = "zsav", - # compressed formats - csv.gz = "gzip", - csv.gzip = "gzip", - gz = "gzip", - gzip = "gzip", - tar = "tar", - zip = "zip", - # known but unsupported formats - bib = "bib", - bibtex = "bib", - bmp = "bmp", - gexf = "gexf", - gnumeric = "gnumeric", - jpeg = "jpg", - jpg = "jpg", - npy = "npy", - png = "png", - sdmx = "sdmx", - sss = "sss", - tif = "tiff", - tiff = "tiff" - ) - out <- type_list[[tolower(fmt)]] - if (is.null(out)) { - return(fmt) +#' @export +#' @rdname get_info +get_ext <- function(file) { + get_info(file)$input +} + + +.query_format <- function(input, file) { + unique_rio_formats <- unique(rio_formats[,colnames(rio_formats) != "note"]) + if (file == "clipboard") { + output <- as.list(unique_rio_formats[unique_rio_formats$format == "clipboard",]) + output$file <- file + return(output) + } + ## TODO google sheets + matched_formats <- unique_rio_formats[unique_rio_formats$input == input, ] + if (nrow(matched_formats) == 0) { + return(list(input = input, format = NA, type = NA, format_name = NA, import_function = NA, export_function = NA, file = file)) + } + output <- as.list(matched_formats) + output$file <- file + return(output) +} + +.standardize_format <- function(input) { + info <- .query_format(input, "") + if (is.na(info$format)) { + return(input) } - return(out) + info$format } twrap <- function(value, tag) { diff --git a/README.md b/README.md index 0f89675..88d9a77 100644 --- a/README.md +++ b/README.md @@ -130,49 +130,50 @@ install_formats() The full list of supported formats is below: -| Name | Extensions / “format” | Import Package | Export Package | Type | Note | -| :---------------------------------- | :--------------------------- | :------------- | :------------- | :------ | :---------------------- | -| Archive files (handled by tar) | bzip2 / xz / gz / gzip / tar | utils | utils | Default | | -| Zip files | zip | utils | utils | Default | | -| CSVY (CSV + YAML metadata header) | csvy | data.table | data.table | Default | | -| Comma-separated data | csv | data.table | data.table | Default | | -| Comma-separated data (European) | csv2 | data.table | data.table | Default | | -| Data Interchange Format | dif | utils | | Default | | -| Epiinfo | epiinfo / rec | foreign | | Default | | -| Excel | excel / xlsx | readxl | openxlsx | Default | | -| Excel (Legacy) | xls | readxl | | Default | | -| Fixed-width format data | fwf | utils | utils | Default | | -| Fortran data | fortran | utils | | Default | No recognized extension | -| Google Sheets | csv | data.table | | Default | As comma-separated data | -| Minitab | minitab / mtp | foreign | | Default | | -| Pipe-separated data | psv | data.table | data.table | Default | | -| R syntax | r | base | base | Default | | -| SAS | sas / sas7bdat | haven | haven | Default | Export is deprecated | -| SAS XPORT | xport / xpt | haven | haven | Default | | -| SPSS | sav / spss | haven | haven | Default | | -| SPSS (compressed) | zsav | haven | haven | Default | | -| SPSS Portable | por | haven | | Default | | -| Saved R objects | rda / rdata | base | base | Default | | -| Serialized R objects | rds | base | base | Default | | -| Stata | dta / stata | haven | haven | Default | | -| Systat | syd / systat | foreign | | Default | | -| Tab-separated data | tsv / txt | data.table | data.table | Default | | -| Text Representations of R Objects | dump | base | base | Default | | -| Weka Attribute-Relation File Format | arff / weka | foreign | foreign | Default | | -| XBASE database files | dbf | foreign | foreign | Default | | -| Apache Arrow (Parquet) | parquet | arrow | arrow | Suggest | | -| Clipboard | clipboard | clipr | clipr | Suggest | default is tsv | -| EViews | eviews / wf1 | hexView | | Suggest | | -| Fast Storage | fst | fst | fst | Suggest | | -| Feather R/Python interchange format | feather | arrow | arrow | Suggest | | -| Graphpad Prism | pzfx | pzfx | pzfx | Suggest | | -| HTML Tables | htm / html | xml2 | xml2 | Suggest | | -| JSON | json | jsonlite | jsonlite | Suggest | | -| Matlab | mat / matlab | rmatio | rmatio | Suggest | | -| OpenDocument Spreadsheet | ods | readODS | readODS | Suggest | | -| Serialized R objects (Quick) | qs | qs | qs | Suggest | | -| Shallow XML documents | xml | xml2 | xml2 | Suggest | | -| YAML | yaml / yml | yaml | yaml | Suggest | | +| Name | Extensions / “format” | Import Package | Export Package | Type | Note | +| :---------------------------------- | :-------------------- | :------------- | :------------- | :------ | :---------------------- | +| Archive files (handled by tar) | bzip2 / xz / tar | utils | utils | Default | | +| Gzip files | gz / gzip | base | base | Default | | +| Zip files | zip | utils | utils | Default | | +| CSVY (CSV + YAML metadata header) | csvy | data.table | data.table | Default | | +| Comma-separated data | csv | data.table | data.table | Default | | +| Comma-separated data (European) | csv2 | data.table | data.table | Default | | +| Data Interchange Format | dif | utils | | Default | | +| Epiinfo | epiinfo / rec | foreign | | Default | | +| Excel | excel / xlsx | readxl | openxlsx | Default | | +| Excel (Legacy) | xls | readxl | | Default | | +| Fixed-width format data | fwf | utils | utils | Default | | +| Fortran data | fortran | utils | | Default | No recognized extension | +| Google Sheets | googlesheets | data.table | | Default | As comma-separated data | +| Minitab | minitab / mtp | foreign | | Default | | +| Pipe-separated data | psv | data.table | data.table | Default | | +| R syntax | r | base | base | Default | | +| SAS | sas / sas7bdat | haven | haven | Default | Export is deprecated | +| SAS XPORT | xport / xpt | haven | haven | Default | | +| SPSS | sav / spss | haven | haven | Default | | +| SPSS (compressed) | zsav | haven | haven | Default | | +| SPSS Portable | por | haven | | Default | | +| Saved R objects | rda / rdata | base | base | Default | | +| Serialized R objects | rds | base | base | Default | | +| Stata | dta / stata | haven | haven | Default | | +| Systat | syd / systat | foreign | | Default | | +| Tab-separated data | / tsv / txt | data.table | data.table | Default | | +| Text Representations of R Objects | dump | base | base | Default | | +| Weka Attribute-Relation File Format | arff / weka | foreign | foreign | Default | | +| XBASE database files | dbf | foreign | foreign | Default | | +| Apache Arrow (Parquet) | parquet | arrow | arrow | Suggest | | +| Clipboard | clipboard | clipr | clipr | Suggest | default is tsv | +| EViews | eviews / wf1 | hexView | | Suggest | | +| Fast Storage | fst | fst | fst | Suggest | | +| Feather R/Python interchange format | feather | arrow | arrow | Suggest | | +| Graphpad Prism | pzfx | pzfx | pzfx | Suggest | | +| HTML Tables | htm / html | xml2 | xml2 | Suggest | | +| JSON | json | jsonlite | jsonlite | Suggest | | +| Matlab | mat / matlab | rmatio | rmatio | Suggest | | +| OpenDocument Spreadsheet | ods | readODS | readODS | Suggest | | +| Serialized R objects (Quick) | qs | qs | qs | Suggest | | +| Shallow XML documents | xml | xml2 | xml2 | Suggest | | +| YAML | yaml / yml | yaml | yaml | Suggest | | Additionally, any format that is not supported by **rio** but that has a known R implementation will produce an informative error message diff --git a/data-raw/single.json b/data-raw/single.json index 3b23ebc..135632f 100644 --- a/data-raw/single.json +++ b/data-raw/single.json @@ -28,20 +28,20 @@ }, { "input": "gz", - "format": "tar", + "format": "gzip", "type": "archive", - "format_name": "Archive files (handled by tar)", - "import_function": "utils::untar", - "export_function": "utils::tar", + "format_name": "Gzip files", + "import_function": "base::gzfile", + "export_function": "base::gzfile", "note": "" }, { "input": "gzip", - "format": "tar", + "format": "gzip", "type": "archive", - "format_name": "Archive files (handled by tar)", - "import_function": "utils::untar", - "export_function": "utils::tar", + "format_name": "Gzip files", + "import_function": "base::gzfile", + "export_function": "base::gzfile", "note": "" }, { @@ -243,7 +243,7 @@ "note": "" }, { - "input": "csv", + "input": "googlesheets", "format": "csv", "type": "import", "format_name": "Google Sheets", @@ -257,7 +257,7 @@ "type": "known", "format_name": "Graph Exchange XML Format", "import_function": "rgexf::read.gexf", - "export_function": "", + "export_function": "rgexf::write.gexf", "note": "" }, { @@ -293,7 +293,7 @@ "type": "known", "format_name": "JPEG images", "import_function": "jpeg::readJPEG", - "export_function": "", + "export_function": "jpeg::writeJPEG", "note": "" }, { @@ -302,7 +302,7 @@ "type": "known", "format_name": "JPEG images", "import_function": "jpeg::readJPEG", - "export_function": "", + "export_function": "jpeg::writeJPEG", "note": "" }, { @@ -374,7 +374,7 @@ "type": "known", "format_name": "PNG images", "import_function": "png::readPNG", - "export_function": "", + "export_function": "png::writePNG", "note": "" }, { @@ -383,7 +383,7 @@ "type": "known", "format_name": "Pickled Numpy arrays", "import_function": "RcppCNPy::npyLoad", - "export_function": "", + "export_function": "RcppCNPy::npySave", "note": "" }, { @@ -514,7 +514,7 @@ }, { "input": "por", - "format": "spss", + "format": "por", "type": "import", "format_name": "SPSS Portable", "import_function": "haven::read_por", @@ -626,7 +626,7 @@ "type": "known", "format_name": "TIFF images", "import_function": "tiff::readTIFF", - "export_function": "", + "export_function": "tiff::writeTIFF", "note": "" }, { @@ -635,11 +635,11 @@ "type": "known", "format_name": "TIFF images", "import_function": "tiff::readTIFF", - "export_function": "", + "export_function": "tiff::writeTIFF", "note": "" }, { - "input": "\\t", + "input": "\t", "format": "tsv", "type": "import", "format_name": "Tab-separated data", diff --git a/man/get_ext.Rd b/man/get_ext.Rd deleted file mode 100644 index 5ece047..0000000 --- a/man/get_ext.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{get_ext} -\alias{get_ext} -\title{Get File Type from Extension} -\usage{ -get_ext(file) -} -\arguments{ -\item{file}{A character string containing a filename, file path, or URL.} -} -\value{ -A characters string containing a file type recognized by rio. -} -\description{ -A utility function to retrieve the file type from a file extension (via its filename/path/URL) -} -\examples{ -get_ext("starwars.xlsx") -get_ext("starwars.ods") -get_ext("clipboard") ## "clipboard" -get_ext("https://github.com/ropensci/readODS/raw/v2.1/starwars.ods") -} diff --git a/man/get_info.Rd b/man/get_info.Rd new file mode 100644 index 0000000..964750c --- /dev/null +++ b/man/get_info.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_info} +\alias{get_info} +\alias{get_ext} +\title{Get File Info} +\usage{ +get_info(file) + +get_ext(file) +} +\arguments{ +\item{file}{A character string containing a filename, file path, or URL.} +} +\value{ +For \code{\link[=get_info]{get_info()}}, a list is return with the following slots +\itemize{ +\item \code{input} file extension or information used to identify the possible file format +\item \code{format} file format, see \code{format} argument of \code{\link[=import]{import()}} +\item \code{type} "import" (supported by default); "suggest" (supported by suggested packages, see \code{\link[=install_formats]{install_formats()}}); "enhance" and "known " are not directly supported; \code{NA} is unsupported +\item \code{format_name} name of the format +\item \code{import_function} What function is used to import this file +\item \code{export_function} What function is used to export this file +\item \code{file} \code{file} +} +For \code{\link[=get_ext]{get_ext()}}, just \code{input} (usually file extension) is returned; retained for backward compatibility. +} +\description{ +A utility function to retrieve the file information of a filename, path, or URL. +} +\examples{ +get_info("starwars.xlsx") +get_info("starwars.ods") +get_info("https://github.com/ropensci/readODS/raw/v2.1/starwars.ods") +get_info("~/duran_duran_rio.mp3") +get_ext("clipboard") ## "clipboard" +get_ext("https://github.com/ropensci/readODS/raw/v2.1/starwars.ods") +} diff --git a/tests/testthat/test_errors.R b/tests/testthat/test_errors.R index 7c11e37..9a70533 100644 --- a/tests/testthat/test_errors.R +++ b/tests/testthat/test_errors.R @@ -2,8 +2,8 @@ context("Errors") library("datasets") test_that("Function suggestions for unsupported export", { - expect_error(export(data.frame(1), "test.jpg"), - "jpg format not supported. Consider using the 'jpeg::writeJPEG()' function", + expect_error(export(data.frame(1), "test.jpg"), + "jpg format not supported. Consider using the 'jpeg::writeJPEG()' function", fixed = TRUE) }) @@ -11,7 +11,7 @@ test_that("Error for unsupported file types", { writeLines("123", con = "test.faketype") expect_error(import("test.faketype"), "Format not supported") expect_error(export(mtcars, "mtcars.faketype"), "Format not supported") - expect_equal(get_type("faketype"), "faketype") + expect_equal(.standardize_format("faketype"), "faketype") expect_error(get_ext("noextension"), "'file' has no extension") unlink("test.faketype") }) diff --git a/tests/testthat/test_guess.R b/tests/testthat/test_guess.R index 0043a3e..71c5046 100644 --- a/tests/testthat/test_guess.R +++ b/tests/testthat/test_guess.R @@ -10,14 +10,14 @@ test_that("File extension converted correctly", { }) test_that("Format converted correctly", { - expect_that(get_type(","), equals("csv")) - expect_that(get_type(";"), equals("csv2")) - expect_that(get_type("|"), equals("psv")) - expect_that(get_type("\t"), equals("tsv")) - expect_that(get_type("excel"), equals("xlsx")) - expect_that(get_type("stata"), equals("dta")) - expect_that(get_type("spss"), equals("sav")) - expect_that(get_type("sas"), equals("sas7bdat")) + expect_that(.standardize_format(","), equals("csv")) + expect_that(.standardize_format(";"), equals("csv2")) + expect_that(.standardize_format("|"), equals("psv")) + expect_that(.standardize_format("\t"), equals("tsv")) + expect_that(.standardize_format("excel"), equals("xlsx")) + expect_that(.standardize_format("stata"), equals("dta")) + expect_that(.standardize_format("spss"), equals("sav")) + expect_that(.standardize_format("sas"), equals("sas7bdat")) }) test_that("Export without file specified", { diff --git a/tests/testthat/test_import_list.R b/tests/testthat/test_import_list.R index a520a64..e7a55f0 100644 --- a/tests/testthat/test_import_list.R +++ b/tests/testthat/test_import_list.R @@ -27,6 +27,17 @@ test_that("Import multiple HTML tables in import_list()", { expect_true(identical(names(dat[[2]]), names(iris))) }) +test_that("Import multiple HTML tables in import_list() but with htm #350", { + temphtm <- tempfile(fileext = ".htm") + file.copy("../testdata/twotables.html", temphtm) + dat <- import_list(temphtm) + expect_true(identical(dim(dat[[1]]), dim(mtcars))) + expect_true(identical(names(dat[[1]]), names(mtcars))) + expect_true(identical(dim(dat[[2]]), dim(iris))) + expect_true(identical(names(dat[[2]]), names(iris))) +}) + + test_that("import_list() preserves 'which' names when specified", { export(list(a = mtcars, b = iris), "foo.xlsx") expect_true(identical(names(import_list("foo.xlsx")), c("a", "b")))