Merge pull request #188 from mountainMath/v0.5.4

V0.5.4
mountainMath · Nov 7, 2022 · 4da898e · 4da898e
2 parents 9374331 + af9ec76
commit 4da898e
Show file tree

Hide file tree

Showing 62 changed files with 383 additions and 103 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: cancensus
 Type: Package
 Title: Access, Retrieve, and Work with Canadian Census Data and Geography
-Version: 0.5.3
+Version: 0.5.4
 Authors@R: c(
     person("Jens", "von Bergmann", email = "[email protected]", role = c("aut"), comment = "API creator and maintainer"),
     person("Dmitry", "Shkolnik", email = "[email protected]", role = c("aut", "cre"), comment = "Package maintainer, responsible for correspondence"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(add_unique_names_to_region_list)
 export(as_census_region_list)
 export(census_vectors)
 export(child_census_vectors)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+# cancensus - 0.5.4
+- added ability to query census datasets by year
+- added a convenience function for creating unique names within given selection of regions from `list_census_regions()`
+- added a check and context menu to install `sf` package when user requests spatial data but does not have the required package installed as opposed to erroring out.
+- improved checking that correct spatial formats are requested
+- preparing for 'sp' spatial format usage deprecation in future versions
+
 # cancensus - 0.5.3
 - Added a check and context menu to install `sf` package when user requests spatial data but does not have the required package installed as opposed to stopping with an error. 
 - fixes a bug in the local data recall check

diff --git a/R/cancensus.R b/R/cancensus.R
@@ -62,6 +62,8 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
   data_version<-NULL
   geo_version<-NULL
 
+  dataset <- translate_dataset(dataset)
+
   # Check region selection validity
   if (is.na(level)) level="Regions"
 
@@ -92,14 +94,31 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
     stop("The `sf` package is required to return geographies.")
   }
 
-  # Check if SF is installed when asking for spatial data
-  if(geo_format == "sf" && !("sf" %in% utils::installed.packages())) {
-    if (utils::menu(c("Yes", "No"),
-             title= paste("The `sf` package is required to return geographies. Would you like to install?")) == "1") {
-      utils::install.packages('sf')
-    } else {
-      print("Cancelling installation and retrieving tabular data only.")
-      geo_format <- NA
+  # --------- Spatial format checks --------------------------------------------------------------------#
+  # This section checks that proper spatial formats are requested. If users select spatial data and
+  # don't have the 'sf' package installed, will prompt them with a menu to install it, otherwise we will
+  # return spatial data only. If users select 'sp' format, will advise them that usage is deprecated and nudge
+  # to install 'sf' package.
+  if (!is.na(geo_format)) {
+    if(!geo_format %in% c("sf","sp")) {
+      stop("the `geo_format` parameter should be 'sf', 'sp', or NA")
+    } else if(geo_format == "sf" && !("sf" %in% utils::installed.packages())) {
+      if (utils::menu(c("Install package", "Return tabular data without geo"),
+                      title= paste("The `sf` package is required to return geographies. Would you like to install?")) == "1") {
+        utils::install.packages('sf')
+      } else  {
+        message("Retrieving tabular data only. Please install 'sf' package if you wish to use Census data as spatial data.")
+        geo_format <- NA
+      }
+    } else if(geo_format == "sp" && !("sf" %in% utils::installed.packages())) {
+      message("The use of 'sp' format in cancensus package is now deprecated.\nPlease install 'sf' package to return spatial format data.")
+      if (utils::menu(c("Install package", "Return tabular data without geo"),
+                      title= paste("Would you like to install 'sf' to continue?")) == "1") {
+        utils::install.packages('sf')
+      } else  {
+        message("Retrieving tabular data only. Please install 'sf' package if you wish to use Census data as spatial data.")
+        geo_format <- NA
+      }
     }
   }
 
@@ -360,6 +379,7 @@ list_census_datasets <- function(use_cache = TRUE, quiet = FALSE) {
 #' # Attribution string for the 2006 and 2016 census datasets
 #' dataset_attribution(c('CA06','CA16'))
 dataset_attribution <- function(datasets){
+  datasets <-   lapply(datasets,translate_dataset) %>% unlist()
   attribution <-list_census_datasets(quiet=TRUE) %>%
     dplyr::filter(.data$dataset %in% datasets) %>%
     dplyr::pull(.data$attribution)

diff --git a/R/census_regions.R b/R/census_regions.R
@@ -36,6 +36,7 @@
 #' @examples
 #' list_census_regions('CA16')
 list_census_regions <- function(dataset, use_cache = TRUE, quiet = FALSE) {
+  dataset <- translate_dataset(dataset)
   cache_file <- file.path(tempdir(),paste0(dataset, "_regions.rda"))
 
   if (!use_cache || !file.exists(cache_file)) {
@@ -81,7 +82,7 @@ list_census_regions <- function(dataset, use_cache = TRUE, quiet = FALSE) {
 #' names matching specific queries. Users can optionally specify the target geography level
 #' (e.g. \code{level = 'CMA'}, \code{level = 'CSD'}, etc.). Alternatively, calling
 #' \code{explore_census_vectors()} will launch the interactive region selection tool on
-#' the Censusmapper site in a new web page or tab.
+#' the CensusMapper site in a new web page or tab.
 #'
 #' @param searchterm The term to search for e.g. \code{"Victoria"}.
 #' Search terms are case insensitive. If unable to find a given search term,
@@ -91,12 +92,11 @@ list_census_regions <- function(dataset, use_cache = TRUE, quiet = FALSE) {
 #' @param level One of \code{NA}, \code{'C'}, \code{'PR'}, \code{'CMA'}, \code{'CD'}, or \code{'CSD'}.
 #' If specified, only return variables of specified `level`.
 #' @param ... Further arguments passed on to \code{\link{list_census_regions}}.
+#' @return A census region list of the same format as `list_census_regions()` containing the matches.
 #'
 #' @export
 #'
 #' @examples
-#' search_census_regions('Victoria', 'CA16')
-#'
 #' \dontrun{
 #' # This will return a warning that no match was found, but will suggest similar named regions.
 #' search_census_regions('Victorea', 'CA16')
@@ -175,6 +175,46 @@ as_census_region_list <- function(tbl) {
   regions
 }
 
+#' Convenience function for creating unique names from region list
+#'
+#' @description Names of municipalities are not always unique, especially at the CSD level. This function
+#' takes as input a subset of a regions list as generated from `list_census_regions()` and de-duplicates names as
+#' needed by adding the municipal status in parenthesis. If this does not de-duplicate the name then the
+#' geographic identifier will be further added in parenthesis behind that.
+#'
+#' @param region_list a subset of a regions list as gotten from `list_census_regions()`
+#' @return The same list of regions with an extra column `Name` with de-duplicated names.
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' # This will return a warning that no match was found, but will suggest similar named regions.
+#' library(dplyr)
+#' list_census_regions("CA21") %>%
+#'   filter(level=="CSD", CMA_UID=="59933") %>%
+#'   add_unique_names_to_region_list()
+#' }
+add_unique_names_to_region_list <- function(region_list) {
+  gs <- dplyr::groups(region_list)
+  r<-region_list %>%
+    dplyr::group_by(.data$name) %>%
+    dplyr::mutate(count=dplyr::n()) %>%
+    dplyr::mutate(Name=dplyr::case_when(.data$count==1 ~ name,
+                                        TRUE ~ paste0(.data$name," (",.data$municipal_status,")"))) |>
+    dplyr::group_by(.data$Name) %>%
+    dplyr::mutate(count=dplyr::n()) %>%
+    dplyr::mutate(Name=dplyr::case_when(.data$count==1 ~ Name,
+                                        TRUE ~ paste0(.data$Name," (",.data$region,")"))) |>
+    dplyr::select(-.data$count) |>
+    dplyr::ungroup()
+
+  if (length(gs)>1) {
+    r <- r |>
+      dplyr::group_by(dplyr::across(dplyr::all_of(gs)))
+  }
+  r
+}
+
 
 #' Lookup a municipal geography type from code - BETA
 #'

diff --git a/R/census_vectors.R b/R/census_vectors.R
@@ -27,6 +27,7 @@
 #' list_census_vectors('CA16')
 #' }
 list_census_vectors <- function(dataset, use_cache = TRUE, quiet = TRUE) {
+  dataset <- translate_dataset(dataset)
   cache_file <- file.path(tempdir(),paste0(dataset, "_vectors.rda"))
   if (!use_cache || !file.exists(cache_file)) {
     url <- paste0(cancensus_base_url(),"/api/v1/vector_info/", dataset, ".csv")

diff --git a/R/helpers.R b/R/helpers.R
@@ -38,6 +38,19 @@ cache_path <- function(...) {
 }
 
 
+translate_dataset <- function(dataset) {
+  dataset <- as.character(dataset)
+  translations <- c("1996"="CA1996",
+                    "2001"="CA01",
+                    "2006"="CA06",
+                    "2011"="CA11",
+                    "2016"="CA16",
+                    "2021"="CA21")
+  #dataset <- toupper(dataset)
+  if (dataset %in% names(translations)) dataset=as.character(translations[dataset])
+  dataset
+}
+
 clean_vector_list <- function(vector_list,dataset=NULL){
   if (!inherits(vector_list,"data.frame")) {
     if (inherits(vector_list,"character")) {

diff --git a/R/intersect_geometry.R b/R/intersect_geometry.R
@@ -47,6 +47,7 @@ get_intersecting_geometries <- function(dataset, level, geometry, simplified = F
   api_key <- robust_api_key(api_key)
   have_api_key <- valid_api_key(api_key)
   result <- NULL
+  dataset <- translate_dataset(dataset)
 
     if ("sf" %in% class(geometry)) {
     geometry=sf::st_geometry(geometry)

diff --git a/R/vector_discovery.R b/R/vector_discovery.R
@@ -282,6 +282,7 @@ keyword_search <- function(query_terms, census_vector_list, interactive = TRUE)
 #'
 #' }
 explore_census_vectors <- function(dataset = "CA16") {
+  dataset <- translate_dataset(dataset)
   message("Opening interactive census variable explorer at censusmapper.ca/api in the browser")
   utils::browseURL(paste0("https://censusmapper.ca/api/",dataset,"#api_variable"))
 }
@@ -309,6 +310,7 @@ explore_census_vectors <- function(dataset = "CA16") {
 #'
 #' }
 explore_census_regions <- function(dataset = "CA16") {
+  dataset <- translate_dataset(dataset)
   message("Opening interactive census region explorer at censusmapper.ca/api in the browser")
   utils::browseURL(paste0("https://censusmapper.ca/api/",dataset,"#api_region"))
 }
diff --git a/README.md b/README.md
@@ -47,7 +47,7 @@ For larger quotas, please get in touch with Jens [directly](mailto:jens@censusma
 
 For performance reasons, and to avoid unnecessarily drawing down API quotas, **cancensus** caches data queries under the hood. By default, **cancensus** caches in R's temporary directory, but this cache is not persistent across sessions. In order to speed up performance, reduce quota usage, and reduce the need for unnecessary network calls, we recommend assigning a persistent local cache using `set_cancensus_cache_path(<local cache path>, install = TRUE)`, this enables more efficient loading and reuse of downloaded data. Users will be prompted with a suggestion to change their default cache location when making API calls if one has not been set yet. 
 
-Starting with version 0.5.2 **cancensus** will automatically check if for data that has been recalled by Statistics Canada and is stored in the local cache via the new data recall API implemented in [CensusMapper](https://censusmapper.ca). Statistics Canada occasionally detects and corrects errors in their census data releases, and **cancensus** will download a list of recalled data at the first invocation of `get_census()` in each session and emit a warning if it detected locally cached data that has been recalled. Removal of the cached recalled data has to be done explicitly by the user via the `remove_recalled_chached_data()` function. If data was cached with **cancenus** versions prior to version 0.5.0 there is insufficient metadata to determine all instances of recalled cached data, but the package will check every time cached data is loaded and can identify recalled data at this point at the latest and issues a warning if recalled data is loaded.
+Starting with version 0.5.2 **cancensus** will automatically check if for data that has been recalled by Statistics Canada and is stored in the local cache via the new data recall API implemented in [CensusMapper](https://censusmapper.ca). Statistics Canada occasionally detects and corrects errors in their census data releases, and **cancensus** will download a list of recalled data at the first invocation of `get_census()` in each session and emit a warning if it detected locally cached data that has been recalled. Removal of the cached recalled data has to be done explicitly by the user via the `remove_recalled_chached_data()` function. If data was cached with **cancensus** versions prior to version 0.5.0 there is insufficient metadata to determine all instances of recalled cached data, but the package will check every time cached data is loaded and can identify recalled data at this point at the latest and issues a warning if recalled data is loaded.
 
 ### Currently available datasets
 
@@ -164,7 +164,7 @@ The [tongfen package](https://mountainmath.github.io/tongfen/index.html) automat
 
 ### Statistics Canada Attribution
 
-Subject to the Statistics Canada Open Licence Agreement, licensed products using Statistics Canada data should employ the following aknowledgement of source:
+Subject to the Statistics Canada Open License Agreement, licensed products using Statistics Canada data should employ the following aknowledgement of source:
 
 ```
 Acknowledgment of Source

diff --git a/cran-comments.md b/cran-comments.md
@@ -1,5 +1,10 @@
+# Update - 0.5.4
+- added ability to query census datasets by year
+- add a convenience function for creating unique names within given selection of regions from `list_census_regions()`
+- added a check and context menu to install `sf` package when user requests spatial data but does not have the required package installed as opposed to erroring out.
+- improved checking that correct spatial formats are requested
+
 # Update - 0.5.3
-- Added a check and context menu to install `sf` package when user requests spatial data but does not have the required package installed as opposed to erroring out. 
 - fixes a bug in the local data recall check
 
 # Update - v0.5.2

diff --git a/docs/404.html b/docs/404.html
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html
diff --git a/docs/articles/Dwellings_by_document_type_cross_tabulation.html b/docs/articles/Dwellings_by_document_type_cross_tabulation.html
diff --git a/docs/articles/Making_maps_with_cancensus.html b/docs/articles/Making_maps_with_cancensus.html
diff --git a/docs/articles/Taxfiler_Data.html b/docs/articles/Taxfiler_Data.html
diff --git a/docs/articles/cancensus.html b/docs/articles/cancensus.html