Skip to content

Commit

Permalink
Merge pull request #188 from mountainMath/v0.5.4
Browse files Browse the repository at this point in the history
V0.5.4
  • Loading branch information
mountainMath authored Nov 7, 2022
2 parents 9374331 + af9ec76 commit 4da898e
Show file tree
Hide file tree
Showing 62 changed files with 383 additions and 103 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: cancensus
Type: Package
Title: Access, Retrieve, and Work with Canadian Census Data and Geography
Version: 0.5.3
Version: 0.5.4
Authors@R: c(
person("Jens", "von Bergmann", email = "[email protected]", role = c("aut"), comment = "API creator and maintainer"),
person("Dmitry", "Shkolnik", email = "[email protected]", role = c("aut", "cre"), comment = "Package maintainer, responsible for correspondence"),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(add_unique_names_to_region_list)
export(as_census_region_list)
export(census_vectors)
export(child_census_vectors)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# cancensus - 0.5.4
- added ability to query census datasets by year
- added a convenience function for creating unique names within given selection of regions from `list_census_regions()`
- added a check and context menu to install `sf` package when user requests spatial data but does not have the required package installed as opposed to erroring out.
- improved checking that correct spatial formats are requested
- preparing for 'sp' spatial format usage deprecation in future versions

# cancensus - 0.5.3
- Added a check and context menu to install `sf` package when user requests spatial data but does not have the required package installed as opposed to stopping with an error.
- fixes a bug in the local data recall check
Expand Down
36 changes: 28 additions & 8 deletions R/cancensus.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
data_version<-NULL
geo_version<-NULL

dataset <- translate_dataset(dataset)

# Check region selection validity
if (is.na(level)) level="Regions"

Expand Down Expand Up @@ -92,14 +94,31 @@ get_census <- function (dataset, regions, level=NA, vectors=c(), geo_format = NA
stop("The `sf` package is required to return geographies.")
}

# Check if SF is installed when asking for spatial data
if(geo_format == "sf" && !("sf" %in% utils::installed.packages())) {
if (utils::menu(c("Yes", "No"),
title= paste("The `sf` package is required to return geographies. Would you like to install?")) == "1") {
utils::install.packages('sf')
} else {
print("Cancelling installation and retrieving tabular data only.")
geo_format <- NA
# --------- Spatial format checks --------------------------------------------------------------------#
# This section checks that proper spatial formats are requested. If users select spatial data and
# don't have the 'sf' package installed, will prompt them with a menu to install it, otherwise we will
# return spatial data only. If users select 'sp' format, will advise them that usage is deprecated and nudge
# to install 'sf' package.
if (!is.na(geo_format)) {
if(!geo_format %in% c("sf","sp")) {
stop("the `geo_format` parameter should be 'sf', 'sp', or NA")
} else if(geo_format == "sf" && !("sf" %in% utils::installed.packages())) {
if (utils::menu(c("Install package", "Return tabular data without geo"),
title= paste("The `sf` package is required to return geographies. Would you like to install?")) == "1") {
utils::install.packages('sf')
} else {
message("Retrieving tabular data only. Please install 'sf' package if you wish to use Census data as spatial data.")
geo_format <- NA
}
} else if(geo_format == "sp" && !("sf" %in% utils::installed.packages())) {
message("The use of 'sp' format in cancensus package is now deprecated.\nPlease install 'sf' package to return spatial format data.")
if (utils::menu(c("Install package", "Return tabular data without geo"),
title= paste("Would you like to install 'sf' to continue?")) == "1") {
utils::install.packages('sf')
} else {
message("Retrieving tabular data only. Please install 'sf' package if you wish to use Census data as spatial data.")
geo_format <- NA
}
}
}

Expand Down Expand Up @@ -360,6 +379,7 @@ list_census_datasets <- function(use_cache = TRUE, quiet = FALSE) {
#' # Attribution string for the 2006 and 2016 census datasets
#' dataset_attribution(c('CA06','CA16'))
dataset_attribution <- function(datasets){
datasets <- lapply(datasets,translate_dataset) %>% unlist()
attribution <-list_census_datasets(quiet=TRUE) %>%
dplyr::filter(.data$dataset %in% datasets) %>%
dplyr::pull(.data$attribution)
Expand Down
46 changes: 43 additions & 3 deletions R/census_regions.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#' @examples
#' list_census_regions('CA16')
list_census_regions <- function(dataset, use_cache = TRUE, quiet = FALSE) {
dataset <- translate_dataset(dataset)
cache_file <- file.path(tempdir(),paste0(dataset, "_regions.rda"))

if (!use_cache || !file.exists(cache_file)) {
Expand Down Expand Up @@ -81,7 +82,7 @@ list_census_regions <- function(dataset, use_cache = TRUE, quiet = FALSE) {
#' names matching specific queries. Users can optionally specify the target geography level
#' (e.g. \code{level = 'CMA'}, \code{level = 'CSD'}, etc.). Alternatively, calling
#' \code{explore_census_vectors()} will launch the interactive region selection tool on
#' the Censusmapper site in a new web page or tab.
#' the CensusMapper site in a new web page or tab.
#'
#' @param searchterm The term to search for e.g. \code{"Victoria"}.
#' Search terms are case insensitive. If unable to find a given search term,
Expand All @@ -91,12 +92,11 @@ list_census_regions <- function(dataset, use_cache = TRUE, quiet = FALSE) {
#' @param level One of \code{NA}, \code{'C'}, \code{'PR'}, \code{'CMA'}, \code{'CD'}, or \code{'CSD'}.
#' If specified, only return variables of specified `level`.
#' @param ... Further arguments passed on to \code{\link{list_census_regions}}.
#' @return A census region list of the same format as `list_census_regions()` containing the matches.
#'
#' @export
#'
#' @examples
#' search_census_regions('Victoria', 'CA16')
#'
#' \dontrun{
#' # This will return a warning that no match was found, but will suggest similar named regions.
#' search_census_regions('Victorea', 'CA16')
Expand Down Expand Up @@ -175,6 +175,46 @@ as_census_region_list <- function(tbl) {
regions
}

#' Convenience function for creating unique names from region list
#'
#' @description Names of municipalities are not always unique, especially at the CSD level. This function
#' takes as input a subset of a regions list as generated from `list_census_regions()` and de-duplicates names as
#' needed by adding the municipal status in parenthesis. If this does not de-duplicate the name then the
#' geographic identifier will be further added in parenthesis behind that.
#'
#' @param region_list a subset of a regions list as gotten from `list_census_regions()`
#' @return The same list of regions with an extra column `Name` with de-duplicated names.
#' @export
#'
#' @examples
#' \dontrun{
#' # This will return a warning that no match was found, but will suggest similar named regions.
#' library(dplyr)
#' list_census_regions("CA21") %>%
#' filter(level=="CSD", CMA_UID=="59933") %>%
#' add_unique_names_to_region_list()
#' }
add_unique_names_to_region_list <- function(region_list) {
gs <- dplyr::groups(region_list)
r<-region_list %>%
dplyr::group_by(.data$name) %>%
dplyr::mutate(count=dplyr::n()) %>%
dplyr::mutate(Name=dplyr::case_when(.data$count==1 ~ name,
TRUE ~ paste0(.data$name," (",.data$municipal_status,")"))) |>
dplyr::group_by(.data$Name) %>%
dplyr::mutate(count=dplyr::n()) %>%
dplyr::mutate(Name=dplyr::case_when(.data$count==1 ~ Name,
TRUE ~ paste0(.data$Name," (",.data$region,")"))) |>
dplyr::select(-.data$count) |>
dplyr::ungroup()

if (length(gs)>1) {
r <- r |>
dplyr::group_by(dplyr::across(dplyr::all_of(gs)))
}
r
}


#' Lookup a municipal geography type from code - BETA
#'
Expand Down
1 change: 1 addition & 0 deletions R/census_vectors.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#' list_census_vectors('CA16')
#' }
list_census_vectors <- function(dataset, use_cache = TRUE, quiet = TRUE) {
dataset <- translate_dataset(dataset)
cache_file <- file.path(tempdir(),paste0(dataset, "_vectors.rda"))
if (!use_cache || !file.exists(cache_file)) {
url <- paste0(cancensus_base_url(),"/api/v1/vector_info/", dataset, ".csv")
Expand Down
13 changes: 13 additions & 0 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,19 @@ cache_path <- function(...) {
}


translate_dataset <- function(dataset) {
dataset <- as.character(dataset)
translations <- c("1996"="CA1996",
"2001"="CA01",
"2006"="CA06",
"2011"="CA11",
"2016"="CA16",
"2021"="CA21")
#dataset <- toupper(dataset)
if (dataset %in% names(translations)) dataset=as.character(translations[dataset])
dataset
}

clean_vector_list <- function(vector_list,dataset=NULL){
if (!inherits(vector_list,"data.frame")) {
if (inherits(vector_list,"character")) {
Expand Down
1 change: 1 addition & 0 deletions R/intersect_geometry.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ get_intersecting_geometries <- function(dataset, level, geometry, simplified = F
api_key <- robust_api_key(api_key)
have_api_key <- valid_api_key(api_key)
result <- NULL
dataset <- translate_dataset(dataset)

if ("sf" %in% class(geometry)) {
geometry=sf::st_geometry(geometry)
Expand Down
2 changes: 2 additions & 0 deletions R/vector_discovery.R
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ keyword_search <- function(query_terms, census_vector_list, interactive = TRUE)
#'
#' }
explore_census_vectors <- function(dataset = "CA16") {
dataset <- translate_dataset(dataset)
message("Opening interactive census variable explorer at censusmapper.ca/api in the browser")
utils::browseURL(paste0("https://censusmapper.ca/api/",dataset,"#api_variable"))
}
Expand Down Expand Up @@ -309,6 +310,7 @@ explore_census_vectors <- function(dataset = "CA16") {
#'
#' }
explore_census_regions <- function(dataset = "CA16") {
dataset <- translate_dataset(dataset)
message("Opening interactive census region explorer at censusmapper.ca/api in the browser")
utils::browseURL(paste0("https://censusmapper.ca/api/",dataset,"#api_region"))
}
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ For larger quotas, please get in touch with Jens [directly](mailto:jens@censusma

For performance reasons, and to avoid unnecessarily drawing down API quotas, **cancensus** caches data queries under the hood. By default, **cancensus** caches in R's temporary directory, but this cache is not persistent across sessions. In order to speed up performance, reduce quota usage, and reduce the need for unnecessary network calls, we recommend assigning a persistent local cache using `set_cancensus_cache_path(<local cache path>, install = TRUE)`, this enables more efficient loading and reuse of downloaded data. Users will be prompted with a suggestion to change their default cache location when making API calls if one has not been set yet.

Starting with version 0.5.2 **cancensus** will automatically check if for data that has been recalled by Statistics Canada and is stored in the local cache via the new data recall API implemented in [CensusMapper](https://censusmapper.ca). Statistics Canada occasionally detects and corrects errors in their census data releases, and **cancensus** will download a list of recalled data at the first invocation of `get_census()` in each session and emit a warning if it detected locally cached data that has been recalled. Removal of the cached recalled data has to be done explicitly by the user via the `remove_recalled_chached_data()` function. If data was cached with **cancenus** versions prior to version 0.5.0 there is insufficient metadata to determine all instances of recalled cached data, but the package will check every time cached data is loaded and can identify recalled data at this point at the latest and issues a warning if recalled data is loaded.
Starting with version 0.5.2 **cancensus** will automatically check if for data that has been recalled by Statistics Canada and is stored in the local cache via the new data recall API implemented in [CensusMapper](https://censusmapper.ca). Statistics Canada occasionally detects and corrects errors in their census data releases, and **cancensus** will download a list of recalled data at the first invocation of `get_census()` in each session and emit a warning if it detected locally cached data that has been recalled. Removal of the cached recalled data has to be done explicitly by the user via the `remove_recalled_chached_data()` function. If data was cached with **cancensus** versions prior to version 0.5.0 there is insufficient metadata to determine all instances of recalled cached data, but the package will check every time cached data is loaded and can identify recalled data at this point at the latest and issues a warning if recalled data is loaded.

### Currently available datasets

Expand Down Expand Up @@ -164,7 +164,7 @@ The [tongfen package](https://mountainmath.github.io/tongfen/index.html) automat

### Statistics Canada Attribution

Subject to the Statistics Canada Open Licence Agreement, licensed products using Statistics Canada data should employ the following aknowledgement of source:
Subject to the Statistics Canada Open License Agreement, licensed products using Statistics Canada data should employ the following aknowledgement of source:

```
Acknowledgment of Source
Expand Down
7 changes: 6 additions & 1 deletion cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Update - 0.5.4
- added ability to query census datasets by year
- add a convenience function for creating unique names within given selection of regions from `list_census_regions()`
- added a check and context menu to install `sf` package when user requests spatial data but does not have the required package installed as opposed to erroring out.
- improved checking that correct spatial formats are requested

# Update - 0.5.3
- Added a check and context menu to install `sf` package when user requests spatial data but does not have the required package installed as opposed to erroring out.
- fixes a bug in the local data recall check

# Update - v0.5.2
Expand Down
2 changes: 1 addition & 1 deletion docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/LICENSE-text.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions docs/articles/Making_maps_with_cancensus.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/articles/Taxfiler_Data.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions docs/articles/cancensus.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4da898e

Please sign in to comment.