Skip to content

Commit

Permalink
Merge pull request #192 from mountainMath/v0.5.5
Browse files Browse the repository at this point in the history
add ability to download original statcan geographies and query the WDS
  • Loading branch information
mountainMath authored Jan 23, 2023
2 parents a673f0e + 11e6bcb commit a39c509
Show file tree
Hide file tree
Showing 85 changed files with 2,611 additions and 219 deletions.
1 change: 0 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ lastMiKTeXException

^doc$

^R/geo_suite.R
^CRAN-SUBMISSION$
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: cancensus
Type: Package
Title: Access, Retrieve, and Work with Canadian Census Data and Geography
Version: 0.5.4
Version: 0.5.5
Authors@R: c(
person("Jens", "von Bergmann", email = "[email protected]", role = c("aut"), comment = "API creator and maintainer"),
person("Dmitry", "Shkolnik", email = "[email protected]", role = c("aut", "cre"), comment = "Package maintainer, responsible for correspondence"),
Expand Down Expand Up @@ -38,7 +38,8 @@ Suggests: knitr,
sf,
geojsonsf,
tidyr,
lwgeom
lwgeom,
xml2
VignetteBuilder: knitr
URL: https://github.com/mountainMath/cancensus, https://mountainmath.github.io/cancensus/, https://censusmapper.ca/api
BugReports: https://github.com/mountainMath/cancensus/issues
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ export(find_census_vectors)
export(get_census)
export(get_census_geometry)
export(get_intersecting_geometries)
export(get_statcan_geo_suite)
export(get_statcan_geographic_attributes)
export(get_statcan_geographies)
export(get_statcan_geography_relationships)
export(get_statcan_wds_data)
export(get_statcan_wds_metadata)
export(label_vectors)
export(list_cancensus_cache)
export(list_census_datasets)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# cancensus 0.5.5

- add functionality for direct access to StatCan census WDS for 2021
- add functionality to download original StatCan geographies for 2021
- update CODES_TABLE for 2021 census

# cancensus 0.5.4

- added ability to query census datasets by year
Expand Down
53 changes: 30 additions & 23 deletions R/geo_suite.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,26 @@



#' Read the geosuite data
#'
#' @description
#' Reads the geosuite data for the given level and census year. Data gets cached after first download.
#' Reads the geosuite data for the given level and census year. Data gets cached after first download if the
#' cancensus cache path has been set. For older
#' years `get_statcan_geographic_attributes()` can fill in most of the information
#'
#' @param level geographic level to return the data for, valid choices are
#' "DB", "DA", "ADA", "CT", "CSD", "CMA", "CD", "PR"
#' "DB", "DA", "ADA", "CT", "CSD", "CMA", "CD", "PR", "FED", "DPL", "ER", "PN", "POPCTR"
#' @param census_year census year to get the data for, right now only 2021 is supported
#' @param refresh (logical) refresh the cache if true
#' @return tibble with the geosuite data
#'
#' @keywords internal
#'
#' @examples
#' # list add the cached census data
#' \dontrun{
#' get_geo_suite("DA","2021")
#' get_statcan_geo_suite("DA","2021")
#' }
#' @export
get_geo_suite <- function(level,census_year="2021",refresh=FALSE){
get_statcan_geo_suite <- function(level,census_year="2021",refresh=FALSE){
valid_years <- c("2021") #seq(2001,2021,5) %>% as.character()
valid_levels <- c("DB", "DA", "CT", "ADA", "CSD", "CMA", "CD", "PR")
valid_levels <- c("DB", "DA", "CT", "ADA", "CSD", "CMA", "CD", "PR","FED","DPL","ER","PN","POPCTR")
if (!(as.character(census_year) %in% valid_years)) {
stop(paste0("Only census years ",paste0(valid_years,collapse = ", "),
" are supported for GeoSuite"))
Expand Down Expand Up @@ -77,10 +75,12 @@ get_geo_suite <- function(level,census_year="2021",refresh=FALSE){
#'
#' @description
#' Reads the Dissemination Geographies Relationship File for the given census year. The table contains
#' the information on how all the geographic levels are related for each area. A reference guide is available
#' the information on how all the geographic levels are related for each area. Data gets cached after first download if the
#' cancensus cache path has been set. A reference guide is available
#' at https://www150.statcan.gc.ca/n1/en/catalogue/982600032021001
#'
#' @param census_year census year to get the data for, right now only 2021 is supported
#' @param census_year census year to get the data for, right now only 2021 is supported, for older
#' years `get_statcan_geographic_attributes()` can fill in most of the information
#' @param refresh (logical) refresh the cache if true
#' @return tibble with the relationship data
#'
Expand All @@ -89,10 +89,10 @@ get_geo_suite <- function(level,census_year="2021",refresh=FALSE){
#' @examples
#' # list add the cached census data
#' \dontrun{
#' get_geography_relationship("2021")
#' get_statcan_geography_relationships("2021")
#' }
#' @export
get_geography_relationship <- function(census_year="2021", refresh=FALSE){
get_statcan_geography_relationships <- function(census_year="2021", refresh=FALSE){
valid_years <- c("2021")
if (!(as.character(census_year) %in% valid_years)) {
stop(paste0("Only census years ",paste0(valid_years,collapse = ", "),
Expand All @@ -113,28 +113,30 @@ get_geography_relationship <- function(census_year="2021", refresh=FALSE){
#' @description
#' Reads the Geographies Attributes File for the given census year. The table contains
#' the information on how all the geographic levels are related for each area, and population, dwelling and household counts.
#' A reference guide is available
#' Data gets cached after first download if the
#' cancensus cache path has been set. A reference guide is available
#' at https://www150.statcan.gc.ca/n1/en/catalogue/92-151-G2021001
#'
#' @param census_year census year to get the data for, right now only 2006, 2011 and 2016 are supported
#' @param census_year census year to get the data for, right now only 2006, 2011, 2016, 2021 are supported
#' @param refresh (logical) refresh the cache if true
#' @return tibble with the relationship data
#'
#' @examples
#' # list add the cached census data
#' get_geographic_attributes("2016")
#'
#' \dontrun{
#' get_statcan_geographic_attributes("2021")
#' }
#' @export
get_geographic_attributes <- function(census_year="2016",refresh=FALSE){
get_statcan_geographic_attributes <- function(census_year="2021",refresh=FALSE){
census_year <- as.character(census_year)[1]
valid_years <- seq(2006,2016,5) %>% as.character
valid_years <- seq(2006,2021,5) %>% as.character
if (!(as.character(census_year) %in% valid_years)) {
stop(paste0("Only census years ",paste0(valid_years,collapse = ", "),
" are supported for the geographic relationship file."))
}
urls <- c("2016"="https://www12.statcan.gc.ca/census-recensement/2016/geo/ref/gaf/files-fichiers/2016_92-151_XBB_txt.zip",
urls <- c("2021"="https://www12.statcan.gc.ca/census-recensement/2021/geo/aip-pia/attribute-attribs/files-fichiers/2021_92-151_X.zip",
"2016"="https://www12.statcan.gc.ca/census-recensement/2016/geo/ref/gaf/files-fichiers/2016_92-151_XBB_txt.zip",
"2011"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2011_92-151_XBB_txt.zip",
#"2011"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2011_92-151_XBB_xlsx.zip",
"2006"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2006_92-151_XBB_txt.zip")

base_path <- cache_path("attribute_files")
Expand All @@ -147,8 +149,9 @@ get_geographic_attributes <- function(census_year="2016",refresh=FALSE){
utils::download.file(urls[[census_year]],tmp)
utils::unzip(tmp,exdir = base_path_year)
}
file <- dir(base_path_year,pattern="\\.txt",full.names = TRUE)
if (census_year=="2016") {
if (census_year=="2021") file <- dir(base_path_year,pattern="\\.csv",full.names = TRUE)
else file <- dir(base_path_year,pattern="\\.txt",full.names = TRUE)
if (census_year %in% c("2016","2021")) {
result <- readr::read_csv(file,col_types = readr::cols(.default="c"),
locale = readr::locale(encoding ="Windows-1252"))
} else {
Expand All @@ -175,3 +178,7 @@ get_geographic_attributes <- function(census_year="2016",refresh=FALSE){
dplyr::mutate(dplyr::across(dplyr::matches("DBpop\\d{4}|DBtdwell\\d{4}|DBurdwell\\d{4}|DBarea"),as.numeric))
}





66 changes: 66 additions & 0 deletions R/geographies.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#' Read the geosuite data
#'
#' @description
#' Reads the original unprocessed geographic boundary files from Statistics Canada
#'
#' @param census_year census year to get the data for, right now only 2021 is supported
#' @param level geographic level to return the data for, valid choices are
#' "PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR"
#' @param type type of geographic data, valid choices area "cartographic" or "digital"
#' @param cache_path optional path to cache the data. If the cancensus cache path is set the geographic data gets
#' cached in the "geographies" subdirectory of the cancensus cache path.
#' @param timeout optional timeout parameter, adjust as needed if the data download times out when using slow connections
#' @param refresh (logical) refresh the cache if true
#' @param quiet (logical) suppress messages if `TRUE`
#' @return a spatial dataframe with the geographic data
#'
#' @examples
#' # get the digital geographic boundaries for provinces and territories
#' \dontrun{
#' get_statcan_geographies(census_year="2021",level="PR",type="digital")
#' }
#' @export
get_statcan_geographies <- function(census_year,level,type="cartographic",
cache_path = NULL,timeout=1000,
refresh=FALSE,quiet=FALSE) {
valid_census_years <- c("2021")
valid_levels <- c("PR","CD","CMACA","CMA","CA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR")
valid_types <- c("cartographic","digital")
if (!(census_year %in% valid_census_years)) {
stop(paste0("Census year must be one of ",paste0(valid_census_years,collapse = ", "),"."))
}
if (!(type %in% valid_types)) {
stop(paste0("Type must be one of ",paste0(valid_types,collapse = ", "),"."))
}
if (!(level %in% valid_levels)) {
stop(paste0("Level must be one of ",paste0(valid_levels,collapse = ", "),"."))
}
level_map <- c("CMACA"="CMA","CA"="CMA","POPCNTR","PC")
if (level %in% names(level_map)) level <-level_map[[level]]
geo_base_path <- cache_path("geographies")
if (!dir.exists(geo_base_path)) dir.create(geo_base_path)
geo_base_path <- file.path(geo_base_path,type)
if (!dir.exists(geo_base_path)) dir.create(geo_base_path)
exdir <- file.path(geo_base_path,level)
if (refresh || !dir.exists(exdir) || length(dir(exdir,"\\.shp$"))==0) {
old_timeout <- getOption("timeout")
if (type=="cartographic") typeID <- "b" else typeID <- "a"
if (nchar(level)==2) filler="_000"
else if (nchar(level)==3) filler="000"
else {
stop(paste0("Problem, don't know how to get geographic data for level ",level,"."))
}
url <- paste0("https://www12.statcan.gc.ca/census-recensement/",census_year,"/geo/sip-pis/boundary-limites/files-fichiers/l",tolower(level),filler,typeID,"21a_e.zip")
tmp <- tempfile()
options(timeout = timeout)
utils::download.file(url,tmp,mode="wb",quiet=quiet)
options(timeout = old_timeout)
utils::unzip(tmp,exdir = exdir)
} else {
if (!quiet) message("Reading geographic data from local cache.")
}
path <- dir(exdir,"\\.shp$",full.names = TRUE)

geos <- sf::read_sf(path)
geos
}
6 changes: 3 additions & 3 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,15 @@ check_recalled_data_and_warn <- function(meta_file,params){
cached_data<-generate_metadata(meta_file,params)
recalled_data <- list_recalled_cached_data(cached_data,warn_only_once=TRUE)
if (!is.null(recalled_data) && nrow(recalled_data)>0) {
warning("Currently loaded data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nnremove_recalled_cached_data()\nto remove recalled data.")
warning("Currently loaded data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nremove_recalled_cached_data()\nto remove recalled data.")
}
d<-NULL
}

check_for_recalled_data_and_warn <- function(){
recalled_data <- list_recalled_cached_data(warn_only_once=TRUE)
if (!is.null(recalled_data) && nrow(recalled_data)>0) {
warning(paste0("Some locally cached data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nnremove_recalled_cached_data()\nto remove recalled data."))
warning(paste0("Some locally cached data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nremove_recalled_cached_data()\nto remove recalled data."))
}
d<-NULL
}
Expand All @@ -129,7 +129,7 @@ first_run_checks <- function(){
#' @name CODES_TABLE
#' @docType data
#' @author derived from StatCan definitions
#' @references \url{https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/geo012-eng.cfm}
#' @references \url{https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CSDtype}, \url{https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CDtype}
#' @keywords data
NULL

Expand Down
Binary file added R/sysdata.rda
Binary file not shown.
Loading

0 comments on commit a39c509

Please sign in to comment.