diff --git a/.Rbuildignore b/.Rbuildignore index 80d738df..503e01ab 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -25,5 +25,4 @@ lastMiKTeXException ^doc$ -^R/geo_suite.R ^CRAN-SUBMISSION$ diff --git a/DESCRIPTION b/DESCRIPTION index 656e5ab3..dae0b64b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: cancensus Type: Package Title: Access, Retrieve, and Work with Canadian Census Data and Geography -Version: 0.5.4 +Version: 0.5.5 Authors@R: c( person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("aut"), comment = "API creator and maintainer"), person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut", "cre"), comment = "Package maintainer, responsible for correspondence"), @@ -38,7 +38,8 @@ Suggests: knitr, sf, geojsonsf, tidyr, - lwgeom + lwgeom, + xml2 VignetteBuilder: knitr URL: https://github.com/mountainMath/cancensus, https://mountainmath.github.io/cancensus/, https://censusmapper.ca/api BugReports: https://github.com/mountainMath/cancensus/issues diff --git a/NAMESPACE b/NAMESPACE index d5f26572..89fe0e88 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,12 @@ export(find_census_vectors) export(get_census) export(get_census_geometry) export(get_intersecting_geometries) +export(get_statcan_geo_suite) +export(get_statcan_geographic_attributes) +export(get_statcan_geographies) +export(get_statcan_geography_relationships) +export(get_statcan_wds_data) +export(get_statcan_wds_metadata) export(label_vectors) export(list_cancensus_cache) export(list_census_datasets) diff --git a/NEWS.md b/NEWS.md index 1e918aa8..4db26f3b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# cancensus 0.5.5 + +- add functionality for direct access to StatCan census WDS for 2021 +- add functionality to download original StatCan geographies for 2021 +- update CODES_TABLE for 2021 census + # cancensus 0.5.4 - added ability to query census datasets by year diff --git a/R/geo_suite.R b/R/geo_suite.R index ac198b13..2ac939e3 100644 --- a/R/geo_suite.R +++ b/R/geo_suite.R @@ -1,28 +1,26 @@ - - - #' Read the geosuite data #' #' @description -#' Reads the geosuite data for the given level and census year. Data gets cached after first download. +#' Reads the geosuite data for the given level and census year. Data gets cached after first download if the +#' cancensus cache path has been set. For older +#' years `get_statcan_geographic_attributes()` can fill in most of the information #' #' @param level geographic level to return the data for, valid choices are -#' "DB", "DA", "ADA", "CT", "CSD", "CMA", "CD", "PR" +#' "DB", "DA", "ADA", "CT", "CSD", "CMA", "CD", "PR", "FED", "DPL", "ER", "PN", "POPCTR" #' @param census_year census year to get the data for, right now only 2021 is supported #' @param refresh (logical) refresh the cache if true #' @return tibble with the geosuite data #' -#' @keywords internal #' #' @examples #' # list add the cached census data #' \dontrun{ -#' get_geo_suite("DA","2021") +#' get_statcan_geo_suite("DA","2021") #' } #' @export -get_geo_suite <- function(level,census_year="2021",refresh=FALSE){ +get_statcan_geo_suite <- function(level,census_year="2021",refresh=FALSE){ valid_years <- c("2021") #seq(2001,2021,5) %>% as.character() - valid_levels <- c("DB", "DA", "CT", "ADA", "CSD", "CMA", "CD", "PR") + valid_levels <- c("DB", "DA", "CT", "ADA", "CSD", "CMA", "CD", "PR","FED","DPL","ER","PN","POPCTR") if (!(as.character(census_year) %in% valid_years)) { stop(paste0("Only census years ",paste0(valid_years,collapse = ", "), " are supported for GeoSuite")) @@ -77,10 +75,12 @@ get_geo_suite <- function(level,census_year="2021",refresh=FALSE){ #' #' @description #' Reads the Dissemination Geographies Relationship File for the given census year. The table contains -#' the information on how all the geographic levels are related for each area. A reference guide is available +#' the information on how all the geographic levels are related for each area. Data gets cached after first download if the +#' cancensus cache path has been set. A reference guide is available #' at https://www150.statcan.gc.ca/n1/en/catalogue/982600032021001 #' -#' @param census_year census year to get the data for, right now only 2021 is supported +#' @param census_year census year to get the data for, right now only 2021 is supported, for older +#' years `get_statcan_geographic_attributes()` can fill in most of the information #' @param refresh (logical) refresh the cache if true #' @return tibble with the relationship data #' @@ -89,10 +89,10 @@ get_geo_suite <- function(level,census_year="2021",refresh=FALSE){ #' @examples #' # list add the cached census data #' \dontrun{ -#' get_geography_relationship("2021") +#' get_statcan_geography_relationships("2021") #' } #' @export -get_geography_relationship <- function(census_year="2021", refresh=FALSE){ +get_statcan_geography_relationships <- function(census_year="2021", refresh=FALSE){ valid_years <- c("2021") if (!(as.character(census_year) %in% valid_years)) { stop(paste0("Only census years ",paste0(valid_years,collapse = ", "), @@ -113,28 +113,30 @@ get_geography_relationship <- function(census_year="2021", refresh=FALSE){ #' @description #' Reads the Geographies Attributes File for the given census year. The table contains #' the information on how all the geographic levels are related for each area, and population, dwelling and household counts. -#' A reference guide is available +#' Data gets cached after first download if the +#' cancensus cache path has been set. A reference guide is available #' at https://www150.statcan.gc.ca/n1/en/catalogue/92-151-G2021001 #' -#' @param census_year census year to get the data for, right now only 2006, 2011 and 2016 are supported +#' @param census_year census year to get the data for, right now only 2006, 2011, 2016, 2021 are supported #' @param refresh (logical) refresh the cache if true #' @return tibble with the relationship data #' #' @examples #' # list add the cached census data -#' get_geographic_attributes("2016") -#' +#' \dontrun{ +#' get_statcan_geographic_attributes("2021") +#' } #' @export -get_geographic_attributes <- function(census_year="2016",refresh=FALSE){ +get_statcan_geographic_attributes <- function(census_year="2021",refresh=FALSE){ census_year <- as.character(census_year)[1] - valid_years <- seq(2006,2016,5) %>% as.character + valid_years <- seq(2006,2021,5) %>% as.character if (!(as.character(census_year) %in% valid_years)) { stop(paste0("Only census years ",paste0(valid_years,collapse = ", "), " are supported for the geographic relationship file.")) } - urls <- c("2016"="https://www12.statcan.gc.ca/census-recensement/2016/geo/ref/gaf/files-fichiers/2016_92-151_XBB_txt.zip", + urls <- c("2021"="https://www12.statcan.gc.ca/census-recensement/2021/geo/aip-pia/attribute-attribs/files-fichiers/2021_92-151_X.zip", + "2016"="https://www12.statcan.gc.ca/census-recensement/2016/geo/ref/gaf/files-fichiers/2016_92-151_XBB_txt.zip", "2011"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2011_92-151_XBB_txt.zip", - #"2011"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2011_92-151_XBB_xlsx.zip", "2006"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2006_92-151_XBB_txt.zip") base_path <- cache_path("attribute_files") @@ -147,8 +149,9 @@ get_geographic_attributes <- function(census_year="2016",refresh=FALSE){ utils::download.file(urls[[census_year]],tmp) utils::unzip(tmp,exdir = base_path_year) } - file <- dir(base_path_year,pattern="\\.txt",full.names = TRUE) - if (census_year=="2016") { + if (census_year=="2021") file <- dir(base_path_year,pattern="\\.csv",full.names = TRUE) + else file <- dir(base_path_year,pattern="\\.txt",full.names = TRUE) + if (census_year %in% c("2016","2021")) { result <- readr::read_csv(file,col_types = readr::cols(.default="c"), locale = readr::locale(encoding ="Windows-1252")) } else { @@ -175,3 +178,7 @@ get_geographic_attributes <- function(census_year="2016",refresh=FALSE){ dplyr::mutate(dplyr::across(dplyr::matches("DBpop\\d{4}|DBtdwell\\d{4}|DBurdwell\\d{4}|DBarea"),as.numeric)) } + + + + diff --git a/R/geographies.R b/R/geographies.R new file mode 100644 index 00000000..fe5ce12f --- /dev/null +++ b/R/geographies.R @@ -0,0 +1,66 @@ +#' Read the geosuite data +#' +#' @description +#' Reads the original unprocessed geographic boundary files from Statistics Canada +#' +#' @param census_year census year to get the data for, right now only 2021 is supported +#' @param level geographic level to return the data for, valid choices are +#' "PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR" +#' @param type type of geographic data, valid choices area "cartographic" or "digital" +#' @param cache_path optional path to cache the data. If the cancensus cache path is set the geographic data gets +#' cached in the "geographies" subdirectory of the cancensus cache path. +#' @param timeout optional timeout parameter, adjust as needed if the data download times out when using slow connections +#' @param refresh (logical) refresh the cache if true +#' @param quiet (logical) suppress messages if `TRUE` +#' @return a spatial dataframe with the geographic data +#' +#' @examples +#' # get the digital geographic boundaries for provinces and territories +#' \dontrun{ +#' get_statcan_geographies(census_year="2021",level="PR",type="digital") +#' } +#' @export +get_statcan_geographies <- function(census_year,level,type="cartographic", + cache_path = NULL,timeout=1000, + refresh=FALSE,quiet=FALSE) { + valid_census_years <- c("2021") + valid_levels <- c("PR","CD","CMACA","CMA","CA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR") + valid_types <- c("cartographic","digital") + if (!(census_year %in% valid_census_years)) { + stop(paste0("Census year must be one of ",paste0(valid_census_years,collapse = ", "),".")) + } + if (!(type %in% valid_types)) { + stop(paste0("Type must be one of ",paste0(valid_types,collapse = ", "),".")) + } + if (!(level %in% valid_levels)) { + stop(paste0("Level must be one of ",paste0(valid_levels,collapse = ", "),".")) + } + level_map <- c("CMACA"="CMA","CA"="CMA","POPCNTR","PC") + if (level %in% names(level_map)) level <-level_map[[level]] + geo_base_path <- cache_path("geographies") + if (!dir.exists(geo_base_path)) dir.create(geo_base_path) + geo_base_path <- file.path(geo_base_path,type) + if (!dir.exists(geo_base_path)) dir.create(geo_base_path) + exdir <- file.path(geo_base_path,level) + if (refresh || !dir.exists(exdir) || length(dir(exdir,"\\.shp$"))==0) { + old_timeout <- getOption("timeout") + if (type=="cartographic") typeID <- "b" else typeID <- "a" + if (nchar(level)==2) filler="_000" + else if (nchar(level)==3) filler="000" + else { + stop(paste0("Problem, don't know how to get geographic data for level ",level,".")) + } + url <- paste0("https://www12.statcan.gc.ca/census-recensement/",census_year,"/geo/sip-pis/boundary-limites/files-fichiers/l",tolower(level),filler,typeID,"21a_e.zip") + tmp <- tempfile() + options(timeout = timeout) + utils::download.file(url,tmp,mode="wb",quiet=quiet) + options(timeout = old_timeout) + utils::unzip(tmp,exdir = exdir) + } else { + if (!quiet) message("Reading geographic data from local cache.") + } + path <- dir(exdir,"\\.shp$",full.names = TRUE) + + geos <- sf::read_sf(path) + geos +} diff --git a/R/helpers.R b/R/helpers.R index 09b12f3f..a46ddd0f 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -101,7 +101,7 @@ check_recalled_data_and_warn <- function(meta_file,params){ cached_data<-generate_metadata(meta_file,params) recalled_data <- list_recalled_cached_data(cached_data,warn_only_once=TRUE) if (!is.null(recalled_data) && nrow(recalled_data)>0) { - warning("Currently loaded data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nnremove_recalled_cached_data()\nto remove recalled data.") + warning("Currently loaded data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nremove_recalled_cached_data()\nto remove recalled data.") } d<-NULL } @@ -109,7 +109,7 @@ check_recalled_data_and_warn <- function(meta_file,params){ check_for_recalled_data_and_warn <- function(){ recalled_data <- list_recalled_cached_data(warn_only_once=TRUE) if (!is.null(recalled_data) && nrow(recalled_data)>0) { - warning(paste0("Some locally cached data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nnremove_recalled_cached_data()\nto remove recalled data.")) + warning(paste0("Some locally cached data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nremove_recalled_cached_data()\nto remove recalled data.")) } d<-NULL } @@ -129,7 +129,7 @@ first_run_checks <- function(){ #' @name CODES_TABLE #' @docType data #' @author derived from StatCan definitions -#' @references \url{https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/geo012-eng.cfm} +#' @references \url{https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CSDtype}, \url{https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CDtype} #' @keywords data NULL diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 00000000..4921297d Binary files /dev/null and b/R/sysdata.rda differ diff --git a/R/wds.R b/R/wds.R new file mode 100644 index 00000000..7e595504 --- /dev/null +++ b/R/wds.R @@ -0,0 +1,174 @@ +#' Query the StatCan WDS for metadata +#' +#' @description +#' Get official metadata information from Statistics Canada for a given geographic level. Only available for the 2021 census. +#' Data is cached for the duration of the R session. +#' +#' @param census_year census year to get the data for, right now only 2021 is supported +#' @param level geographic level to return the data for, valid choices are +#' "PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR" +#' @param refresh default is `FALSE` will refresh the temporary cache if `TRUE` +#' @return tibble with the metadata +#' +#' @examples +#' # get metadata for federal electoral districts +#' \dontrun{ +#' get_statcan_wds_metadata(census_year="2021",level="FED") +#' } +#' @export +get_statcan_wds_metadata <- function(census_year,level,refresh=FALSE){ + valid_census_years <- c("2021") + valid_levels <- c("PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR") + if (!(census_year %in% valid_census_years)) { + stop(paste0("Census year must be one of ",paste0(valid_census_years,collapse = ", "),".")) + } + if (!(level %in% valid_levels)) { + stop(paste0("Level must be one of ",paste0(valid_levels,collapse = ", "),".")) + } + meta_url <- paste0("https://api.statcan.gc.ca/census-recensement/profile/sdmx/rest/dataflow/STC_CP/DF_",level,"?references=all") + metadata_tempfile <- file.path(tempdir(),paste0("census_wds_metadata_",digest::digest(meta_url),".sdmx")) + if (refresh || !file.exists(metadata_tempfile)) { + utils::download.file(meta_url,metadata_tempfile) + } + d <- xml2::read_xml(metadata_tempfile) + code_lists <- xml2::xml_find_all(d,"//structure:Codelist") + + meta_data <- lapply(code_lists, \(cl){ + codelist_id <- cl |> xml2::xml_attr("id") + agencyID <- cl |> xml2::xml_attr("agencyID") + codelist_en <- cl |> xml2::xml_find_all("common:Name[@xml:lang='en']") |> xml2::xml_text() + codelist_fr <- cl |> xml2::xml_find_all("common:Name[@xml:lang='fr']") |> xml2::xml_text() + description_en <- cl |> xml2::xml_find_all("common:Name[@xml:lang='en']") |> xml2::xml_text() + description_fr <- cl |> xml2::xml_find_all("common:Name[@xml:lang='fr']") |> xml2::xml_text() + codes <- cl |> xml2::xml_find_all("structure:Code") + dplyr::tibble(`Agency ID`=agencyID, + `Codelist ID`=codelist_id, + `Codelist en`=codelist_en, + `Codelist fr`=codelist_fr, + ID=codes |> xml2::xml_attr("id"), + en=codes |> xml2::xml_find_all("common:Name[@xml:lang='en']") |> xml2::xml_text(), + fr=codes |> xml2::xml_find_all("common:Name[@xml:lang='fr']") |> xml2::xml_text(), + `Parent ID`=codes |> xml2::xml_find_all("structure:Parent/Ref",flatten=FALSE) |> + lapply(\(d)ifelse(is.null(d),NA,xml2::xml_attr(d,"id"))) |> unlist() + ) + }) |> + dplyr::bind_rows() + meta_data +} + +#' Query the StatCan WDS for data +#' +#' @description +#' Get official census data from Statistics Canada for a given set of DGUIDs. Only available for the 2021 census. The +#' downloaded data gets enriched by geographic and characteristic names based on metadata obtained via `get_statcan_wds_metadata()`. +#' Data is cached for the duration of the R session. +#' +#' @param DGUIDs census year to get the data for, right now only 2021 is supported. Valid DGUIDs for a given geographic +#' level can be queried via `get_statcan_wds_metadata()`. +#' @param members list of Member IOs to download data for. By default all characteristics are downloaded. Valid +#' Member IDs and their descriptions can be queried via the `get_statcan_wds_metadata()` call. +#' @param gender optionally query data for only one gender. By default this queries data for all genders, possible +#' values are "Total", "Male", "Female" to only query total data, or for males only or for females only. +#' @param language specify language for geography and characteristic names that get added, valid choices are "en" and "fr" +#' @param refresh default is `FALSE` will refresh the temporary cache if `TRUE` +#' @return tibble with the enriched census data +#' +#' @examples +#' # get data for federal electoral district 2013A000459021 +#' \dontrun{ +#' get_statcan_wds_data(DGUIDs="2013A000459021",level="FED") +#' } +#' @export +get_statcan_wds_data <- function(DGUIDs, + members = NULL, + gender="All", + language="en", + refresh=FALSE) { + DGUIDs <- sort(DGUIDs) + members <- sort(members) + level <- geo_level_from_DGUID(DGUIDs[1]) + url <- paste0("https://api.statcan.gc.ca/census-recensement/profile/sdmx/rest/data/STC_CP") + gender <- tolower(gender) + gender <- paste0(toupper(substr(gender,1,1)),substr(gender,2,100)) + valid_genders <- c("All","Total","Male","Female") + if (!(gender %in% valid_genders)) { + stop(paste0("Gender must be one of ",paste0(valid_genders,collapse = ", "),".")) + } + language <- tolower(language) + valid_languages <- c("en","fr") + if (!(language %in% valid_languages)) { + stop(paste0("Language must be one of ",paste0(valid_languages,collapse = ", "),".")) + } + gender <- c("All"="","Total"="1","Male"="2","Female"="3")[[gender]] + dguid_string <- paste0(DGUIDs,collapse="+") + member_string <- paste0(members,collapse = "+") + add=paste0("DF_",level,"/A5.",dguid_string,".",gender,".",member_string,".1") + wds_data_tempfile <- file.path(tempdir(),paste0("wds_data_",digest::digest(add),".csv")) + if (!file.exists(wds_data_tempfile)) { + response <- httr::GET(paste0(url,",",add), + httr::accept("text/csv"), + httr::add_headers("Accept-Encoding"="deflate, gzip, br"), + httr::write_disk(wds_data_tempfile,overwrite = TRUE)) + } + if (!response$status_code=="200") { + stop(paste0("Invalid request.\n",httr::content(response))) + } + census_year <- "2021" + meta_data <- get_statcan_wds_metadata(census_year,level,refresh = refresh) + + levels <- meta_data |> + dplyr::filter(.data$`Codelist ID`=="CL_GEO_LEVEL") + + meta_geos <- meta_data |> + dplyr::filter(.data$`Codelist ID`==paste0("CL_GEO_",level)) + meta_characteristics <- meta_data |> + dplyr::filter(.data$`Codelist ID`=="CL_CHARACTERISTIC") + + name_field <- language #paste0(language,"_description") + + data <- readr::read_csv(wds_data_tempfile,col_types = readr::cols(.default="c")) |> + dplyr::mutate(dplyr::across(dplyr::matches("OBS_VALUE|TNR_CI_"),as.numeric)) |> + dplyr::left_join(meta_geos |> + dplyr::select(GEO_DESC=.data$ID,GEO_NAME=!!as.name(name_field)), + by="GEO_DESC") |> + dplyr::left_join(meta_characteristics |> + dplyr::select(CHARACTERISTIC=.data$ID,CHARACTERISTIC_NAME=!!as.name(name_field)), + by="CHARACTERISTIC") + + data +} + + + +geo_level_from_DGUID <- function(DGUID,simple=TRUE){ + schema <- substr(DGUID,"6","9") + schema_to_level <- c("0000"="C","0001"="C", + "0002"="PR", + "0003"="CD", + "0004"="FED", + "0005"="CSD", + "0006"="DPL", + "0007"="HR", + "0008"="LHR", + "0011"="FSA", + "0500"="ER", + "0501"="CAR", + "0502"="CCSD", + "0503"="CMA", + "0504"="CA", + "0505"="CMAP", + "0507"="CT", + "0510"="PC", + "0511"="PCP", + "0512"="DA", + "0513"="DB", + "0516"="ADA") + level <- schema_to_level[[schema]] + if (simple){ + simple_translation <- c("LHR"="HR","CMA"="CMACA","CA"="CMACA","CMAP"="CMACA") + if (level %in% names(simple_translation)) level <- simple_translation[[level]] + } + level +} + + diff --git a/README.md b/README.md index b50dca47..0187dd36 100644 --- a/README.md +++ b/README.md @@ -139,17 +139,17 @@ There are several other jurisdiction where census data is available via R packag If you wish to cite cancensus: von Bergmann, J., Aaron Jacobs, Dmitry Shkolnik (2022). cancensus: R package to - access, retrieve, and work with Canadian Census data and geography. v0.5.3. + access, retrieve, and work with Canadian Census data and geography. v0.5.5. A BibTeX entry for LaTeX users is ``` - @Manual{, + @Manual{cancensus, author = {Jens {von Bergmann} and Dmitry Shkolnik and Aaron Jacobs}, title = {cancensus: R package to access, retrieve, and work with Canadian Census data and geography}, year = {2022}, - note = {R package version 0.5.3}, - url = {https://mountainmath.github.io/cancensus/}, + note = {R package version 0.5.5}, + url = {https://mountainmath.github.io/cancensus/} } ``` ### Related packages @@ -164,7 +164,7 @@ The [tongfen package](https://mountainmath.github.io/tongfen/index.html) automat ### Statistics Canada Attribution -Subject to the Statistics Canada Open License Agreement, licensed products using Statistics Canada data should employ the following aknowledgement of source: +Subject to the Statistics Canada Open Data License Agreement, licensed products using Statistics Canada data should employ the following acknowledgement of source: ``` Acknowledgment of Source diff --git a/cran-comments.md b/cran-comments.md index d9ad2c5f..003bf85a 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,3 +1,8 @@ +# Update 0.5.5 +- add functionality for direct access to StatCan census WDS for 2021 +- add functionality to download original StatCan geographies for 2021 +- update CODES_TABLE for 2021 census + # Update - 0.5.4 - added ability to query census datasets by year - add a convenience function for creating unique names within given selection of regions from `list_census_regions()` diff --git a/docs/404.html b/docs/404.html index 7ca15fee..af0f1d6f 100644 --- a/docs/404.html +++ b/docs/404.html @@ -50,7 +50,7 @@
@@ -78,6 +78,12 @@
# Attribution for the dataset to be used in graphs
-attribution <- dataset_attribution("CA16xSD")
+attribution <- dataset_attribution("CA16xSD")
# Select all variables base variables, this gives us total counts by structural type of dwelling
-vars <- list_census_vectors("CA16xSD") %>%
+vars <- list_census_vectors("CA16xSD") %>%
filter(is.na(parent_vector))
variables <- setNames(vars$vector,vars$label)
@@ -207,7 +213,7 @@ E
dwelling_types <- setdiff(names(variables),"Total dwellings")
# Grab the census data and compute shares for each dwelling type
-census_data <- get_census("CA16xSD",regions=list(CSD="3520005"), vectors = variables, quiet = TRUE) %>%
+census_data <- get_census("CA16xSD",regions=list(CSD="3520005"), vectors = variables, quiet = TRUE) %>%
pivot_longer(cols = all_of(dwelling_types)) %>%
mutate(share=value/`Total dwellings`)
To visualize what this looks like on a bar chart:
@@ -222,7 +228,7 @@As with regular Census data, all data can be retrieved as spatial
data. Sometimes it’s easier to use the CensusMapper API interface to
search for and select the variables we are interested in. The
-explore_census_vectors()
function opens a browser with the
+explore_census_vectors()
function opens a browser with the
variable selection tool, we determine that “v_CA16xSD_1” and
“v_CA16xSD_28” are the variables enumerating all dwellings and all
unoccupied dwellings, respectively.
Adding colour ramps and additional interactivity takes a little bit +
+Adding colour ramps and additional interactivity takes a little bit more work but is still pretty easy to implement. Following this example we can specify the colour ramp to match our needs.
@@ -236,8 +242,8 @@Interactive maps with leaflet= 1, opacity = 1, fillOpacity = 0.65)
To see all available T1FF datasets and their reference codes we can
-use list_census_datasets()
.
list_census_datasets()
.
-list_census_datasets() %>%
+list_census_datasets() %>%
filter(grepl("taxfiler",description))
#> # A tibble: 19 × 6
#> dataset description geo_dataset attribution refer…¹ refer…²
@@ -198,7 +204,7 @@
-list_census_vectors('TX2017')
+list_census_vectors('TX2017')
#> # A tibble: 818 × 7
#> vector type label units parent…¹ aggre…² details
#> <chr> <fct> <chr> <fct> <chr> <chr> <chr>
@@ -219,7 +225,7 @@ CensusMapper
graphical variable selection interface, which can also be reached by
-calling explore_census_vectors()
from the R console. For
+calling explore_census_vectors()
from the R console. For
this example we are interested in low income families and note that the
internal CensusMapper vector for all families is of the form
*v_TX_607* and that for all families in low income is
@@ -231,7 +237,7 @@
years <- c(2006,2011,2014,2018)
# Attribution for the dataset to be used in graphs
-attribution <- dataset_attribution(paste0("TX",years))
+attribution <- dataset_attribution(paste0("TX",years))
plot_data <- years %>%
lapply(function(year) {
@@ -239,7 +245,7 @@ vectors <- c("Families"=paste0("v_",dataset,"_607"),
"CFLIM-AT"=paste0("v_",dataset,"_786"))
- get_census(dataset,regions=list(CMA="59933"),vectors = vectors,
+ get_census(dataset,regions=list(CMA="59933"),vectors = vectors,
geo_format = 'sf', level="CT", quiet = TRUE) %>%
select(c("GeoUID",names(vectors))) %>%
mutate(Year=year)
@@ -279,7 +285,7 @@ scale_fill_gradient2(labels=scales::percent) +
#scale_fill_viridis_c(labels=scales::percent,option = "inferno") +
coord_sf(datum=NA,xlim=c(-123.4, -122.5), ylim=c(49.01, 49.4)) +
- labs(title="Change in share of census families in low income 2006-2011",fill="Percentage\npoint change",caption=dataset_attribution(paste0("TX",c(2006,2011))))
Analyzing change over longer timelines that span changes in Census
geometries involves more work, the tongfen
diff --git a/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-5-1.png
index c24b06d8..91ff0caa 100644
Binary files a/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-5-1.png differ
diff --git a/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-6-1.png
index c8eeeae1..1712453d 100644
Binary files a/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-6-1.png differ
diff --git a/docs/articles/cancensus.html b/docs/articles/cancensus.html
index fba7a17a..fd647ce0 100644
--- a/docs/articles/cancensus.html
+++ b/docs/articles/cancensus.html
@@ -51,7 +51,7 @@
cancensus utilizes caching to increase speed,
@@ -230,7 +236,7 @@ Accessing Census Data
@@ -79,6 +79,12 @@
# Returns a data frame with data only
-census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
+census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
vectors=c("v_CA21_434","v_CA21_435","v_CA21_440"),
level='CSD', use_cache = FALSE, geo_format = NA, quiet = TRUE)
# Returns data and geography as an sf-class data frame
-census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
+census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
vectors=c("v_CA21_434","v_CA21_435","v_CA21_440"),
level='CSD', use_cache = FALSE, geo_format = 'sf', quiet = TRUE)
# Returns a SpatialPolygonsDataFrame object with data and geography
-census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
+census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
vectors=c("v_CA21_434","v_CA21_435","v_CA21_440"),
level='CSD', use_cache = FALSE, geo_format = 'sp', quiet = TRUE)
Accessing Census Datause_cache = FALSE as a parameter for
get_census
.
Additional parameters for advanced options can be viewed by running
-?get_census
.
?get_census
.
The function list_census_datasets()
will show all
+
The function list_census_datasets()
will show all
available datasets alongside their metadata.
-list_census_datasets()
+list_census_datasets()
#> # A tibble: 29 × 6
#> dataset description geo_d…¹ attri…² refer…³ refer…⁴
#> <chr> <chr> <chr> <chr> <chr> <chr>
@@ -272,7 +278,7 @@ Census Datasets#> # ²attribution, ³reference, ⁴reference_url
As other Census datasets become available via the CensusMapper API,
they will be listed as output when calling
-list_census_datasets()
.
list_census_datasets()
.
-list_census_regions("CA21")
+list_census_regions("CA21")
#> # A tibble: 5,518 × 8
#> region name level pop munic…¹ CMA_UID CD_UID PR_UID
#> <chr> <chr> <chr> <int> <chr> <chr> <chr> <chr>
@@ -308,7 +314,7 @@ Census Regions
# Retrieves Vancouver and Toronto
-list_census_regions('CA21') %>%
+list_census_regions('CA21') %>%
filter(level == "CMA", name %in% c("Vancouver","Toronto"))
#> # A tibble: 2 × 8
#> region name level pop municipal_status CMA_UID CD_UID PR_UID
@@ -316,7 +322,7 @@ Census Regions#> 1 35535 Toronto CMA 6202225 B NA NA 35
#> 2 59933 Vancouver CMA 2642825 B NA NA 59
-census_data <- get_census(dataset='CA21', regions=list(CMA=c("59933","35535")),
+census_data <- get_census(dataset='CA21', regions=list(CMA=c("59933","35535")),
vectors=c("v_CA21_434","v_CA21_435","v_CA21_440"),
level='CSD', use_cache = FALSE, quiet = TRUE)
Run list_census_vectors(dataset)
to view all available
Census variables for a given dataset.
-list_census_vectors("CA21")
-#> # A tibble: 5,756 × 7
+list_census_vectors("CA21")
+#> # A tibble: 7,709 × 7
#> vector type label units paren…¹ aggre…² details
#> <chr> <fct> <chr> <fct> <chr> <chr> <chr>
#> 1 v_CA21_1 Total Population, 2021 Numb… NA Additi… CA 202…
@@ -431,7 +437,7 @@ Displaying available Census varia
#> 8 v_CA21_8 Total Total - Age Numb… NA Additi… CA 202…
#> 9 v_CA21_9 Male Total - Age Numb… NA Additi… CA 202…
#> 10 v_CA21_10 Female Total - Age Numb… NA Additi… CA 202…
-#> # … with 5,746 more rows, and abbreviated variable names ¹parent_vector,
+#> # … with 7,699 more rows, and abbreviated variable names ¹parent_vector,
#> # ²aggregation
Each Census dataset features numerous variables making it a bit of a
challenge to find the exact variable you are looking for. There is a
-function, find_census_vectors()
, for searching through
+function, find_census_vectors()
, for searching through
Census variable metadata in a few different ways. There are three types
of searches possible using this function: exact search, which simply
looks for exact string matches for a given query against the vector
@@ -474,20 +480,20 @@
# Find the variable indicating the number of people of Austrian ethnic origin
-find_census_vectors("Australia", dataset = "CA16", type = "total", query_type = "exact")
+find_census_vectors("Australia", dataset = "CA16", type = "total", query_type = "exact")
#> # A tibble: 2 × 4
#> vector type label details
#> <chr> <fct> <chr> <chr>
#> 1 v_CA16_3813 Total Australia 25% Data; Citizenship and Immigration; Total - S…
#> 2 v_CA16_4809 Total Australian 25% Data; Minority / Origin; Total - Ethnic orig…
-find_census_vectors("Australia origin", dataset = "CA16", type = "total", query_type = "semantic")
+find_census_vectors("Australia origin", dataset = "CA16", type = "total", query_type = "semantic")
#> # A tibble: 1 × 4
#> vector type label details
#> <chr> <fct> <chr> <chr>
#> 1 v_CA16_4809 Total Australian 25% Data; Minority / Origin; Total - Ethnic orig…
-find_census_vectors("Australian ethnic", dataset = "CA16", type = "total", query_type = "keyword", interactive = FALSE)
+find_census_vectors("Australian ethnic", dataset = "CA16", type = "total", query_type = "keyword", interactive = FALSE)
#> # A tibble: 1 × 4
#> vector type label details
#> <chr> <fct> <chr> <chr>
@@ -501,9 +507,9 @@ Managing variable hierarchy
-list_census_vectors("CA16") %>%
+list_census_vectors("CA16") %>%
filter(vector == "v_CA16_4092") %>%
- parent_census_vectors()
+ parent_census_vectors()
#> # A tibble: 3 × 7
#> vector type label units paren…¹ aggre…² details
#> <chr> <fct> <chr> <fct> <chr> <chr> <chr>
@@ -519,7 +525,7 @@ Managing variable hierarchy
# Find the variable indicating the Northern European aggregate
-find_census_vectors("Northern European", dataset = "CA16", type = "Total")
+find_census_vectors("Northern European", dataset = "CA16", type = "Total")
#> # A tibble: 7 × 4
#> vector type label details
#> <chr> <fct> <chr> <chr>
@@ -536,8 +542,8 @@ Managing variable hierarchy
# Show all child variable leaves
-list_census_vectors("CA16") %>%
- filter(vector == "v_CA16_4122") %>% child_census_vectors(leaves = TRUE)
+list_census_vectors("CA16") %>%
+ filter(vector == "v_CA16_4122") %>% child_census_vectors(leaves = TRUE)
#> # A tibble: 6 × 7
#> vector type label units paren…¹ aggre…² details
#> <chr> <fct> <chr> <fct> <chr> <chr> <chr>
diff --git a/docs/articles/data_discovery.html b/docs/articles/data_discovery.html
index 9b6c7a13..d9f44432 100644
--- a/docs/articles/data_discovery.html
+++ b/docs/articles/data_discovery.html
@@ -51,7 +51,7 @@
cancensus
can access Statistics Canada Census data for
the 1996, 2001, 2006 Censuses, the 2011 Census and National Household
-Survey, as well as the 2016 Census. You can run
+Survey, the 2016 Census, as well as the 2021 Census. You can run
list_census_datasets
to check what datasets are currently
-available for access through the CensusMapper API. Additional data for
-the 2016 Census will be included in CensusMapper within a day or two
-after public release by Statistics Canada. Statistics Canada maintains a
-release schedule for the Census 2016 Program which can be viewed on
-their website.
Thanks to contributions by the Canada Mortgage and Housing
Corporation (CMHC), cancensus
now includes additional
Census-linked datasets as open-data releases. These include annual
@@ -146,7 +148,7 @@
list_census_datasets()
## # A tibble: 29 × 6
## dataset description geo_d…¹ attri…² refer…³ refer…⁴
## <chr> <chr> <chr> <chr> <chr> <chr>
@@ -162,7 +164,7 @@ Census datasets## 10 CA16xSD 2016 Canada Census xtab - Structural… CA16 StatCa… 98-301… https:…
## # … with 19 more rows, and abbreviated variable names ¹geo_dataset,
## # ²attribution, ³reference, ⁴reference_url
-The list_census_datasets()
function also provides
+
The list_census_datasets()
function also provides
additional background like series reference code, catalogue reference,
and attribution details.
-list_census_vectors('CA21')
## # A tibble: 5,756 × 7
+list_census_vectors('CA21')
+## # A tibble: 7,709 × 7
## vector type label units paren…¹ aggre…² details
## <chr> <fct> <chr> <fct> <chr> <chr> <chr>
## 1 v_CA21_1 Total Population, 2021 Numb… NA Additi… CA 202…
@@ -195,7 +197,7 @@ View available Census variable v
## 8 v_CA21_8 Total Total - Age Numb… NA Additi… CA 202…
## 9 v_CA21_9 Male Total - Age Numb… NA Additi… CA 202…
## 10 v_CA21_10 Female Total - Age Numb… NA Additi… CA 202…
-## # … with 5,746 more rows, and abbreviated variable names ¹parent_vector,
+## # … with 7,699 more rows, and abbreviated variable names ¹parent_vector,
## # ²aggregation
list_census_vectors(dataset)
retrieves an index of all
available vectors for a given dataset from the CensusMapper API or local
@@ -221,7 +223,7 @@
query_type
argument when calling
-find_census_vectors()
function.
+find_census_vectors()
function.
Note that variable search is optimized for the Census variables in the main Census datasets. While searches generally work for variables in additional datasets such as cross-tabs and taxfiler data, they have not @@ -233,7 +235,7 @@
-find_census_vectors("Oji-cree", dataset = "CA16", type = "total", query_type = "exact")
+find_census_vectors("Oji-cree", dataset = "CA16", type = "total", query_type = "exact")
## # A tibble: 4 × 4
## vector type label details
## <chr> <fct> <chr> <chr>
@@ -243,12 +245,12 @@ Exact search## 4 v_CA16_5930 Total Oji-Cree 25% Data; Work; Total - Language used most often a…
This, on the other hand, will return a warning.
-find_census_vectors("Ojib-cree", dataset = "CA16", type = "total", query_type = "exact")
find_census_vectors("Ojib-cree", dataset = "CA16", type = "total", query_type = "exact")
## Warning: No exact matches found. Please check spelling and try again or consider using semantic or keyword search.
## See ?find_census_vectors() for more details.
##
## Alternatively, you can launch the Censusmapper web API in a browser by calling explore_census_vectors(dataset)
-Unless otherwise specified, find_census_vectors()
will
+
Unless otherwise specified, find_census_vectors()
will
use exact search as the default option.
-find_census_vectors('commute mode', dataset = 'CA16', type = 'female', query_type = 'keyword', interactive = FALSE)
find_census_vectors('commute mode', dataset = 'CA16', type = 'female', query_type = 'keyword', interactive = FALSE)
## # A tibble: 7 × 4 ## vector type label details ## <chr> <fct> <chr> <chr> @@ -276,7 +278,7 @@
, which will prompt the user with a console menu option to see the rest of the matches or not. If using -Keyword searchinteractive = TRUE
find_census_vectors()
in a script or reproducible +find_census_vectors()
in a script or reproducible documentation, we recommend setting this argument tointeractive = FALSE
. @@ -289,7 +291,7 @@Semantic search
+-find_census_vectors("after tax incomes", dataset = "CA16", type = "total", query_type = "semantic")
find_census_vectors("after tax incomes", dataset = "CA16", type = "total", query_type = "semantic")
## # A tibble: 56 × 4
## vector type label details
## <chr> <fct> <chr> <chr>
@@ -308,14 +310,14 @@ Semantic search
-find_census_vectors("ojib cree", dataset = "CA16", type = "total", query_type = "exact")
+find_census_vectors("ojib cree", dataset = "CA16", type = "total", query_type = "exact")
## Warning: No exact matches found. Please check spelling and try again or consider using semantic or keyword search.
## See ?find_census_vectors() for more details.
##
## Alternatively, you can launch the Censusmapper web API in a browser by calling explore_census_vectors(dataset)
This will find the correct Census vector.
-find_census_vectors('ojib cree', dataset = 'CA16', type = 'total', query_type = 'semantic')
find_census_vectors('ojib cree', dataset = 'CA16', type = 'total', query_type = 'semantic')
## Multiple possible matches. Results ordered by closeness.
## # A tibble: 4 × 4
## vector type label details
@@ -336,9 +338,9 @@ Census regionsStandard Geographical Classification
Statistics Canada uses an official classification of geographic areas
-known as the Standard
+known as the Standard
Geographical Classification (SGC), which is updated periodically.
-The latest version is based on the 2016 Census. Geographic
+The latest version is based on the 2021 Census. Geographic
classification codes are standardized across Statistics Canada products,
including the Census as well as any other Statistics Canada dataset. In
practice, this means that the region ID for the Vancouver Census
@@ -368,10 +370,10 @@
Standard Geographical Classificati
## level n
## <chr> <int>
## 1 C 1
-## 2 CA 14
+## 2 CA 9
## 3 CD 293
-## 4 CMA 35
-## 5 CSD 5162
+## 4 CMA 41
+## 5 CSD 5161
## 6 PR 13
There is also an additional region, with the id 01
and
the level code C
which represents all of Canada as a
@@ -392,17 +394,17 @@
All CMAs and CAs consist of Census subdivisions but not all Census subdivisions are a subset of a CMA or a CA. For more details on CMAs and -CAs, consult Statistics Canada’s Census Dictionary article for Census +CAs, consult Statistics Canada’s Census Dictionary article for Census metropolitan areas (CMA) and census agglomerations (CA). All CMAs and some CAs have data at the Census tract level, but most CAs do not. -The 2016 Census has 35 CMAs and 14 CAs with Census tracts that have -their own defined geography. There are a further 106 CAs without Census -tracts that do not have their own distinctly defined geographies.
+The 2021 Census has 41 CMAs and 9 CAs with Census tracts that have their +own defined geography. There are a further 102 CAs without Census tracts +that do not have their own distinctly defined geographies.Dissemination
+ Dissemination
areas (DA) are the smallest atomic geographic unit at which all
census data is captured. DAs cover the entirety of Canada and follow the
boundaries of census subdivisions and census tracts. While inter-census
@@ -411,15 +413,15 @@ Aside: dissemina
In addition to census boundaries, DAs will generally follow natural
boundaries created by other spatial features like roads, railways, water
features, and designed to be spatially compact and with a target
-population around 400-700 persons. The 2016 census data has 56,589
+population around 400-700 persons. The 2021 census data has 57,936
distinct DAs.
Enumeration areas (EA) were the DA equivalent for censuses prior to 2001. Similar to DAs, EAs were used to as the basic level at which census data was collected. They do not necessarily correspond accurately to DAs in data from 2001 onwards.
-Dissemination -block (DB) level data is available for the 2001-2016 datasets. DBs +
Dissemination +block (DB) level data is available for the 2001-2021 datasets. DBs are essentially city blocks, bounded by intersecting streets and therefore are largely the product of road networks at the time of the census. The geographies and identification codes of DBs are not @@ -428,7 +430,7 @@
-list_census_regions('CA21')
list_census_regions('CA21')
## # A tibble: 5,518 × 8
## region name level pop munic…¹ CMA_UID CD_UID PR_UID
## <chr> <chr> <chr> <int> <chr> <chr> <chr> <chr>
@@ -477,7 +479,7 @@ Searching through named Census r
any geographies that have a name that matches or partially matches the
search query.
-search_census_regions("Vancouver","CA21")
+search_census_regions("Vancouver","CA21")
## # A tibble: 7 × 8
## region name level pop municipal_status CMA_UID CD_UID PR_UID
## <chr> <chr> <chr> <int> <chr> <chr> <chr> <chr>
diff --git a/docs/articles/data_discovery_files/figure-html/unnamed-chunk-2-1.png b/docs/articles/data_discovery_files/figure-html/unnamed-chunk-2-1.png
index 1dcf849a..75c65deb 100644
Binary files a/docs/articles/data_discovery_files/figure-html/unnamed-chunk-2-1.png and b/docs/articles/data_discovery_files/figure-html/unnamed-chunk-2-1.png differ
diff --git a/docs/articles/index.html b/docs/articles/index.html
index 02a4da6b..7a2d8daf 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -98,6 +104,20 @@ Combining custom geographies with census data
- Finding intersecting geometries from custom data
-
+
+ StatCan attribute files
+
+
+
+
+ StatCan WDS
+
+
+ - StatCan WDS
+ -
+
Additional datasets
diff --git a/docs/articles/intersecting_geometries.html b/docs/articles/intersecting_geometries.html
index 9e9ba2e3..c3d779fa 100644
--- a/docs/articles/intersecting_geometries.html
+++ b/docs/articles/intersecting_geometries.html
@@ -51,7 +51,7 @@
@@ -79,6 +79,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -130,7 +136,7 @@ Bring custom data and
geographies or other geospatial data of interest and want to be able to
quickly and easily identify the collection of census features that
correspond to that region.
-
The get_intersecting_geometries()
function is projection
+
The get_intersecting_geometries()
function is projection
agnostic and accepts any valid sf
or sfc
class
object as input. These objects are then reprojected into lat/lon
coordinates on the backend to facilitate the intersecting join on the
@@ -159,9 +165,9 @@
A simple example
-station_city_ids <- get_intersecting_geometries("CA16", level = "CSD", geometry = cov_station_buffers,
+station_city_ids <- get_intersecting_geometries("CA16", level = "CSD", geometry = cov_station_buffers,
quiet=TRUE)
-station_ct_ids <- get_intersecting_geometries("CA16", level = "CT", geometry = cov_station_buffers,
+station_ct_ids <- get_intersecting_geometries("CA16", level = "CT", geometry = cov_station_buffers,
quiet=TRUE)
These return a list of census geographic identifiers suitable for use
in the ‘region’ argument in get_census
. We may be
@@ -169,10 +175,10 @@
A simple example
variables <- c(mode_base="v_CA16_5792",transit="v_CA16_5801",walk="v_CA16_5804")
-station_city <- get_census("CA16", regions = station_city_ids, vectors = variables,
+station_city <- get_census("CA16", regions = station_city_ids, vectors = variables,
geo_format = 'sf', quiet=TRUE) %>%
filter(name == "Vancouver (CY)")
-station_cts <- get_census("CA16", regions = station_ct_ids, vectors = variables,
+station_cts <- get_census("CA16", regions = station_ct_ids, vectors = variables,
geo_format = 'sf', quiet=TRUE)
To understand how these relate we plot the data.
@@ -189,9 +195,9 @@ A simple exampleTo get a closer match we can cut out the dissemination areas
intersecting the station catchment areas.
-station_das <- get_intersecting_geometries("CA16", level = "DA", geometry = cov_station_buffers,
+station_das <- get_intersecting_geometries("CA16", level = "DA", geometry = cov_station_buffers,
quiet=TRUE) %>%
- get_census("CA16", regions = ., vectors=variables, geo_format = 'sf', quiet=TRUE)
+ get_census("CA16", regions = ., vectors=variables, geo_format = 'sf', quiet=TRUE)
ggplot(station_city) +
geom_sf(fill=NA) +
@@ -209,7 +215,7 @@ A simple exampleget_intersecting_geometries
call and then filter down to
those intersecting the station buffers.
-station_das2 <- get_census("CA16", regions = station_ct_ids, vectors=variables,
+station_das2 <- get_census("CA16", regions = station_ct_ids, vectors=variables,
geo_format = 'sf', level="DA", quiet=TRUE) %>%
sf::st_filter(cov_station_buffers)
diff --git a/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-5-1.png
index c4c2466d..5c23dfd3 100644
Binary files a/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-5-1.png differ
diff --git a/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-6-1.png
index f4ef447a..3a180885 100644
Binary files a/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-6-1.png differ
diff --git a/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-7-1.png
index f4ef447a..3a180885 100644
Binary files a/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/intersecting_geometries_files/figure-html/unnamed-chunk-7-1.png differ
diff --git a/docs/articles/statcan_attribute_files.html b/docs/articles/statcan_attribute_files.html
new file mode 100644
index 00000000..83a31f3d
--- /dev/null
+++ b/docs/articles/statcan_attribute_files.html
@@ -0,0 +1,245 @@
+
+
+
+
+
+
+
+StatCan attribute files • cancensus
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ StatCan attribute files
+
+
+ Source: vignettes/statcan_attribute_files.Rmd
+ statcan_attribute_files.Rmd
+
+
+
+
+
+
+
+Background
+
+Attribute files describe the detailed relationship of various
+Statistics Canada geographic levels and provide population, household
+and dewlling counts. This information can be useful for understanding
+the hierarchical relationships of different geographic levels. The CensusMapper API that
+cancensus uses for most of the data queries is
+ill-suited to get comprehensive data on the hierarichal relationships
+Canada wide, so it can be helpful to have direct access to this data in
+comprehensive tabular form.
+
+
+Match between Census Tracts and Census Subdivisions
+
+If we are interested in understanding which Census Tracts respect
+municipal boundaries and which ones don’t in 2021 we can consult the
+geographic attributes file. It contains a row for each Census Block, the
+basic building block of census geographies, and tags other levels of
+geography the Census Block lies in.
+
+attributes <- get_statcan_geographic_attributes("2021")
+
+attributes %>% colnames()
+#> [1] "PRUID_PRIDU" "PRDGUID_PRIDUGD"
+#> [3] "PRNAME_PRNOM" "PRENAME_PRANOM"
+#> [5] "PRFNAME_PRFNOM" "PREABBR_PRAABBREV"
+#> [7] "PRFABBR_PRFABBREV" "CDUID_DRIDU"
+#> [9] "CDDGUID_DRIDUGD" "CDNAME_DRNOM"
+#> [11] "CDTYPE_DRGENRE" "FEDUID_CEFIDU"
+#> [13] "FEDDGUID_CEFIDUGD" "FEDNAME_CEFNOM"
+#> [15] "CSDUID_SDRIDU" "CSDDGUID_SDRIDUGD"
+#> [17] "CSDNAME_SDRNOM" "CSDTYPE_SDRGENRE"
+#> [19] "DPLUID_LDIDU" "DPLDGUID_LDIDUGD"
+#> [21] "DPLNAME_LDNOM" "DPLTYPE_LDGENRE"
+#> [23] "ERUID_REIDU" "ERDGUID_REIDUGD"
+#> [25] "ERNAME_RENOM" "CCSUID_SRUIDU"
+#> [27] "CCSDGUID_SRUIDUGD" "CCSNAME_SRUNOM"
+#> [29] "SACTYPE_CSSGENRE" "SACCODE_CSSCODE"
+#> [31] "CMAPUID_RMRPIDU" "CMAPDGUID_RMRPIDUGD"
+#> [33] "CMAUID_RMRIDU" "CMADGUID_RMRIDUGD"
+#> [35] "CMANAME_RMRNOM" "CMATYPE_RMRGENRE"
+#> [37] "CTUID_SRIDU" "CTDGUID_SRIDUGD"
+#> [39] "CTCODE_SRCODE" "CTNAME_SRNOM"
+#> [41] "POPCTRRAPUID_CTRPOPRRPIDU" "POPCTRRAPDGUID_CTRPOPRRPIDUGD"
+#> [43] "POPCTRRAUID_CTRPOPRRIDU" "POPCTRRADGUID_CTRPOPRRIDUGD"
+#> [45] "POPCTRRANAME_CTRPOPRRNOM" "POPCTRRATYPE_CTRPOPRRGENRE"
+#> [47] "POPCTRRACLASS_CTRPOPRRCLASSE" "DAUID_ADIDU"
+#> [49] "DADGUID_ADIDUGD" "DARPLAMX_ADLAMX"
+#> [51] "DARPLAMY_ADLAMY" "DARPLAT_ADLAT"
+#> [53] "DARPLONG_ADLONG" "DBUID_IDIDU"
+#> [55] "DBDGUID_IDIDUGD" "DBPOP2021_IDPOP2021"
+#> [57] "DBTDWELL2021_IDTLOG2021" "DBURDWELL2021_IDRHLOG2021"
+#> [59] "DBAREA2021_IDSUP2021" "DBIR2021_IDRI2021"
+#> [61] "ADAUID_ADAIDU" "ADADGUID_ADAIDUGD"
+#> [63] "ADACODE_ADACODE"
+To answer our question, we filter Census Block by the ones that lie
+within a Census Tract, and check for the collection of Census Blocks
+within each Census Tract how many municipalities they lie in.
+
+attributes %>%
+ filter(CMATYPE_RMRGENRE %in% c("B","K")) |> # filter areas not in CTs
+ group_by(CTCODE_SRCODE,CMATYPE_RMRGENRE) |>
+ summarise(`Number of municipalities`=length(unique(CSDUID_SDRIDU)),.groups="drop") |>
+ count(`Number of municipalities`,CMATYPE_RMRGENRE) |>
+ arrange(CMATYPE_RMRGENRE,`Number of municipalities`)
+#> # A tibble: 7 × 3
+#> `Number of municipalities` CMATYPE_RMRGENRE n
+#> <int> <chr> <int>
+#> 1 1 B 5997
+#> 2 2 B 33
+#> 3 3 B 13
+#> 4 4 B 5
+#> 5 6 B 4
+#> 6 1 K 194
+#> 7 2 K 1
+This shows that most census tracts for both Census Metropolitan Areas
+(CMATYPE_RMRGENRE=“B”) and tracted Census Agglomerations
+(CMATYPE_RMRGENRE=“K”), with some census tracts spanning several
+municipalities.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/articles/statcan_wds.html b/docs/articles/statcan_wds.html
new file mode 100644
index 00000000..12d642d5
--- /dev/null
+++ b/docs/articles/statcan_wds.html
@@ -0,0 +1,254 @@
+
+
+
+
+
+
+
+StatCan WDS • cancensus
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Some census datasets have not been imported into CensusMapper, and
+thus aren’t available via the get_census()
function. But
+the data can be queried directly from the Statistics
+Canada Web Data Service for the 2021 census, as well as geographic
+boundary files.
+
+Word of caution
+
+The StatCan census WDS is not a stable API, the meaning of the
+internal identifiers used to query have changed in the past, and may
+change again in the future. Use with extreme caution.
+The same call may give different results at different points in time.
+Moreover, the API server is sometimes unavailable.
+
+
+Ukrainians by Federal Electoral Districts
+
+As an example we access the share of people with Ukrainian ethnic
+origin at the Federal Electoral District level. First we need to find
+the StatCan Characteristic IDs for our characteristic of interest. For
+this we download the metadata, extract the characteristics, locate the
+base characteristic for ethnic origin and the entry referencing
+“Ukrainian” that’s a descendant of the base characteristic.
+
+metadata <- get_statcan_wds_metadata("2021","FED")
+
+characteristics <- metadata |>
+ filter(`Codelist en`=="Characteristic") |>
+ mutate(across(matches("ID"),as.integer))
+
+ethnic_base <- characteristics |>
+ filter(grepl("Total - Ethnic",en))
+ukranian <- characteristics |>
+ filter(grepl("Ukrainian",en), `Parent ID`==ethnic_base$ID)
+
+selected_characteristics <- bind_rows(ethnic_base,ukranian)
+
+selected_characteristics |> select(ID,en)
+#> # A tibble: 2 × 2
+#> ID en
+#> <int> <chr>
+#> 1 1684 Total - Ethnic or cultural origin for the population in private househo…
+#> 2 1694 Ukrainian
+We can also get the geographic identifiers for the federal electoral
+districts from the metadata.
+
+
+data <- get_statcan_wds_data(dguids,members=selected_characteristics$ID,gender="Total")
+The data comes enriched with metadata to make working with it easier,
+in particular the CHARACTERISTIC_NAME
column contains plain
+language names. Now we can transform the data to compute
+percentages.
+
+plot_data <- data |>
+ select(DGUID=GEO_DESC,Name=GEO_NAME,name=CHARACTERISTIC_NAME,value=OBS_VALUE) |>
+ pivot_wider() |>
+ mutate(Share=Ukrainian/`Total - Ethnic or cultural origin for the population in private households - 25% sample data`)
+Now we can visualize the data, here are the top 20 Federal Electoral
+Districts by share of the population with Ukrainian heritage.
+
+plot_data |> slice_max(Share,n=20) |>
+ ggplot(aes(y=reorder(Name,Share),x=Share)) +
+ geom_bar(stat="identity",fill="steelblue") +
+ scale_x_continuous(labels=scales::percent) +
+ labs(title="Ukrainian ethnic origin",
+ y="Federal electoral district",
+ x="Share of population in private households",
+ caption="StatCan Census 2021")
+
+To map the data we have to get the geographies.
+
+fed_geos <- get_statcan_geographies("2021","FED")
+With these we can join on our census data and map it.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/articles/statcan_wds_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/statcan_wds_files/figure-html/unnamed-chunk-6-1.png
new file mode 100644
index 00000000..02c96fcc
Binary files /dev/null and b/docs/articles/statcan_wds_files/figure-html/unnamed-chunk-6-1.png differ
diff --git a/docs/articles/statcan_wds_files/figure-html/unnamed-chunk-8-1.png b/docs/articles/statcan_wds_files/figure-html/unnamed-chunk-8-1.png
new file mode 100644
index 00000000..3296332d
Binary files /dev/null and b/docs/articles/statcan_wds_files/figure-html/unnamed-chunk-8-1.png differ
diff --git a/docs/authors.html b/docs/authors.html
index 90cf8502..02fd1809 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/index.html b/docs/index.html
index 3091e30f..f4868f71 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -58,7 +58,7 @@
@@ -86,6 +86,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -157,7 +163,7 @@ API key
Local Cache
For performance reasons, and to avoid unnecessarily drawing down API quotas, cancensus caches data queries under the hood. By default, cancensus caches in R’s temporary directory, but this cache is not persistent across sessions. In order to speed up performance, reduce quota usage, and reduce the need for unnecessary network calls, we recommend assigning a persistent local cache using set_cancensus_cache_path(<local cache path>, install = TRUE)
, this enables more efficient loading and reuse of downloaded data. Users will be prompted with a suggestion to change their default cache location when making API calls if one has not been set yet.
-Starting with version 0.5.2 cancensus will automatically check if for data that has been recalled by Statistics Canada and is stored in the local cache via the new data recall API implemented in CensusMapper. Statistics Canada occasionally detects and corrects errors in their census data releases, and cancensus will download a list of recalled data at the first invocation of get_census()
in each session and emit a warning if it detected locally cached data that has been recalled. Removal of the cached recalled data has to be done explicitly by the user via the remove_recalled_chached_data()
function. If data was cached with cancensus versions prior to version 0.5.0 there is insufficient metadata to determine all instances of recalled cached data, but the package will check every time cached data is loaded and can identify recalled data at this point at the latest and issues a warning if recalled data is loaded.
+Starting with version 0.5.2 cancensus will automatically check if for data that has been recalled by Statistics Canada and is stored in the local cache via the new data recall API implemented in CensusMapper. Statistics Canada occasionally detects and corrects errors in their census data releases, and cancensus will download a list of recalled data at the first invocation of get_census()
in each session and emit a warning if it detected locally cached data that has been recalled. Removal of the cached recalled data has to be done explicitly by the user via the remove_recalled_chached_data()
function. If data was cached with cancensus versions prior to version 0.5.0 there is insufficient metadata to determine all instances of recalled cached data, but the package will check every time cached data is loaded and can identify recalled data at this point at the latest and issues a warning if recalled data is loaded.
Currently available datasets
@@ -170,13 +176,13 @@ Picking regions and variables# To view available Census datasets
-list_census_datasets()
+list_census_datasets()
# To view available named regions at different levels of Census hierarchy for the 2016 Census (for example)
-list_census_regions("CA16")
+list_census_regions("CA16")
# To view available Census variables for the 2016 Census
-list_census_vectors("CA16")
+list_census_vectors("CA16")
There is also an interactive tool that is available at the CensusMapper API to visually select regions and variables and generate code for the API call. Calling explore_census_vectors(dataset = "CA16")
or explore_census_regions(dataset = "CA16")
will open a new browser window to this interactive tool, preconfigured for whichever Census dataset is set as an argument.
@@ -184,11 +190,11 @@ Getting the data
cancensus can return census data with or without associated Census geographical information that can be used for mapping and GIS. By default, cancensus returns tidy tabular data only, but has options to return spatial data objects in either sf or sp formats.
# Return data only
-census_data <- get_census(dataset='CA16', regions=list(CMA="59933"),
+census_data <- get_census(dataset='CA16', regions=list(CMA="59933"),
vectors=c("v_CA16_408","v_CA16_409","v_CA16_410"), level='CSD')
# Return an sf-class data frame
-census_data <- get_census(dataset='CA16', regions=list(CMA="59933"),
+census_data <- get_census(dataset='CA16', regions=list(CMA="59933"),
vectors=c("v_CA16_408","v_CA16_409","v_CA16_410"), level='CSD', geo_format = "sf")
cancensus attempts to minimize bandwidth usage and download time by caching downloads. When attempting to download data that has previously been downloaded, cancensus will instead access the locally cached equivalent.
@@ -268,14 +274,14 @@ Cite cancensus
If you wish to cite cancensus:
-von Bergmann, J., Aaron Jacobs, Dmitry Shkolnik (2022). cancensus: R package to access, retrieve, and work with Canadian Census data and geography. v0.5.3.
+von Bergmann, J., Aaron Jacobs, Dmitry Shkolnik (2022). cancensus: R package to access, retrieve, and work with Canadian Census data and geography. v0.5.5.
A BibTeX entry for LaTeX users is
- @Manual{,
+ @Manual{cancensus,
author = {Jens {von Bergmann} and Dmitry Shkolnik and Aaron Jacobs},
title = {cancensus: R package to access, retrieve, and work with Canadian Census data and geography},
year = {2022},
- note = {R package version 0.5.3},
- url = {https://mountainmath.github.io/cancensus/},
+ note = {R package version 0.5.5},
+ url = {https://mountainmath.github.io/cancensus/}
}
@@ -289,7 +295,7 @@ Related packages
Statistics Canada Attribution
-Subject to the Statistics Canada Open License Agreement, licensed products using Statistics Canada data should employ the following aknowledgement of source:
+Subject to the Statistics Canada Open Data License Agreement, licensed products using Statistics Canada data should employ the following acknowledgement of source:
Acknowledgment of Source
(a) You shall include and maintain the following notice on all licensed rights of the Information:
diff --git a/docs/news/index.html b/docs/news/index.html
index dcdc5d60..05a00d6c 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -78,10 +84,16 @@ Changelog
Source: NEWS.md
+
+cancensus 0.5.5
+- add functionality for direct access to StatCan census WDS for 2021
+- add functionality to download original StatCan geographies for 2021
+- update CODES_TABLE for 2021 census
+
cancensus 0.5.42022-11-07
- added ability to query census datasets by year
-- added a convenience function for creating unique names within given selection of regions from
list_census_regions()
+ - added a convenience function for creating unique names within given selection of regions from
list_census_regions()
- added a check and context menu to install
sf
package when user requests spatial data but does not have the required package installed as opposed to erroring out.
- improved checking that correct spatial formats are requested
@@ -129,7 +141,7 @@ cancensus 0.4.2
Minor changes
- Fixed a minor problem where cache path wasn’t always picked up
-- Added optional argument
keep_parent
when calling child_census_vectors()
that retains the input parent variable in the list of result. We found that in many cases user would follow up a call to child_census_vectors()
with a bind_rows(...)
to do this, so this should save a step.
+- Added optional argument
keep_parent
when calling child_census_vectors()
that retains the input parent variable in the list of result. We found that in many cases user would follow up a call to child_census_vectors()
with a bind_rows(...)
to do this, so this should save a step.
Minor changes
-- Fixes bug in
find_census_vectors()
+- Fixes bug in
find_census_vectors()
@@ -176,17 +188,17 @@ Minor changescancensus 0.3.0
Major changes
-- Fully redesigned variable search using
find_census_vectors()
and deprecation of `search_census_vectors(). See the Data discovery: resources for finding available and relevant data vignette for additional information.
-- Census Agglomerations with defined geographies and Census tracts are separated from CMAs when calling
list_census_regions()
+- Fully redesigned variable search using
find_census_vectors()
and deprecation of `search_census_vectors(). See the Data discovery: resources for finding available and relevant data vignette for additional information.
+- Census Agglomerations with defined geographies and Census tracts are separated from CMAs when calling
list_census_regions()
-- Additional metadata for catalogue information and attribution is returned when calling
list_census_datasets()
+ - Additional metadata for catalogue information and attribution is returned when calling
list_census_datasets()
- New functions
explore_census_regions
and explore_census_vectors
which open a browser page towards the interactive discovery and selection tools on the Censusmapper website
- New function
attribution_for_dataset
which provides accurate attribution information for citation and visualizations for a given dataset.
- Additional datasets: T1FF taxfiler data and dwelling type crosstabs, made available by CMHC. For more info, see the new vignettes for these datasets: Additional datasets: Structural type of dwelling by document type, Additional datasets: Annual T1FF taxfiler data.
-
-
get_census_geometry()
is now hard-deprecated and will stop the program flow. Use get_census()
instead.
+get_census_geometry()
is now hard-deprecated and will stop the program flow. Use get_census()
instead.
Minor changes
-- Default behaviour for
list_census_vectors()
changed to have quiet = TRUE
+- Default behaviour for
list_census_vectors()
changed to have quiet = TRUE
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index 208bbbf0..7508534c 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -8,7 +8,9 @@ articles:
cancensus: cancensus.html
data_discovery: data_discovery.html
intersecting_geometries: intersecting_geometries.html
-last_built: 2022-11-06T20:54Z
+ statcan_attribute_files: statcan_attribute_files.html
+ statcan_wds: statcan_wds.html
+last_built: 2023-01-21T22:13Z
urls:
reference: https://mountainmath.github.io/cancensus/reference
article: https://mountainmath.github.io/cancensus/articles
diff --git a/docs/reference/CODES_TABLE.html b/docs/reference/CODES_TABLE.html
index 72134d09..9603ac10 100644
--- a/docs/reference/CODES_TABLE.html
+++ b/docs/reference/CODES_TABLE.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -86,7 +92,7 @@ A dataset with code table summaries for census data
References
- https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/geo012-eng.cfm
+ https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CSDtype, https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CDtype
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/add_unique_names_to_region_list.html b/docs/reference/add_unique_names_to_region_list.html
index 64dd80d0..467a52a3 100644
--- a/docs/reference/add_unique_names_to_region_list.html
+++ b/docs/reference/add_unique_names_to_region_list.html
@@ -26,7 +26,7 @@
@@ -52,6 +52,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/as_census_region_list.html b/docs/reference/as_census_region_list.html
index 40778d1c..cd2b5d66 100644
--- a/docs/reference/as_census_region_list.html
+++ b/docs/reference/as_census_region_list.html
@@ -29,7 +29,7 @@
@@ -55,6 +55,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/census_vectors.html b/docs/reference/census_vectors.html
index ffda1e32..917e9077 100644
--- a/docs/reference/census_vectors.html
+++ b/docs/reference/census_vectors.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/child_census_vectors.html b/docs/reference/child_census_vectors.html
index 377faccf..6982bc5c 100644
--- a/docs/reference/child_census_vectors.html
+++ b/docs/reference/child_census_vectors.html
@@ -29,7 +29,7 @@
@@ -55,6 +55,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -128,14 +134,6 @@ Arguments
Examples
# Query parent vectors directly using vector identifier
child_census_vectors("v_CA16_2510")
-#> Called from: eval(expr, p)
-#> debug at /Users/jens/Documents/R/cancensus/R/helpers.R#51: dataset
-#> Called from: eval(expr, p)
-#> debug at /Users/jens/Documents/R/cancensus/R/helpers.R#51: dataset
-#> Called from: eval(expr, p)
-#> debug at /Users/jens/Documents/R/cancensus/R/helpers.R#51: dataset
-#> Called from: eval(expr, p)
-#> debug at /Users/jens/Documents/R/cancensus/R/helpers.R#51: dataset
#> # A tibble: 4 × 7
#> vector type label units parent_vector aggregation details
#> <chr> <fct> <chr> <fct> <chr> <chr> <chr>
diff --git a/docs/reference/dataset_attribution.html b/docs/reference/dataset_attribution.html
index 2599727c..e03bbac3 100644
--- a/docs/reference/dataset_attribution.html
+++ b/docs/reference/dataset_attribution.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -105,10 +111,6 @@ Examples
# Attribution string for the 2006 and 2016 census datasets
dataset_attribution(c('CA06','CA16'))
-#> Called from: eval(expr, p)
-#> debug at /Users/jens/Documents/R/cancensus/R/helpers.R#51: dataset
-#> Called from: eval(expr, p)
-#> debug at /Users/jens/Documents/R/cancensus/R/helpers.R#51: dataset
#> [1] "StatCan 2006, 2016 Census"
diff --git a/docs/reference/explore_census_regions.html b/docs/reference/explore_census_regions.html
index e05a4b8a..26cb9418 100644
--- a/docs/reference/explore_census_regions.html
+++ b/docs/reference/explore_census_regions.html
@@ -27,7 +27,7 @@
@@ -53,6 +53,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/explore_census_vectors.html b/docs/reference/explore_census_vectors.html
index 086639fd..1633436d 100644
--- a/docs/reference/explore_census_vectors.html
+++ b/docs/reference/explore_census_vectors.html
@@ -27,7 +27,7 @@
@@ -53,6 +53,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/find_census_vectors.html b/docs/reference/find_census_vectors.html
index 3d3c8fcc..3673a80d 100644
--- a/docs/reference/find_census_vectors.html
+++ b/docs/reference/find_census_vectors.html
@@ -36,7 +36,7 @@
@@ -62,6 +62,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -145,8 +151,6 @@ Arguments
Examples
find_census_vectors('Oji-cree', dataset = 'CA16', type = 'total', query_type = 'exact')
-#> Called from: eval(expr, p)
-#> debug at /Users/jens/Documents/R/cancensus/R/helpers.R#51: dataset
#> # A tibble: 4 × 4
#> vector type label details
#> <chr> <fct> <chr> <chr>
@@ -156,8 +160,6 @@ Examples
#> 4 v_CA16_5930 Total Oji-Cree 25% Data; Work; Total - Language used most often a…
find_census_vectors('commuting duration', dataset = 'CA11', type = 'female', query_type = 'keyword')
-#> Called from: eval(expr, p)
-#> debug at /Users/jens/Documents/R/cancensus/R/helpers.R#51: dataset
#> # A tibble: 2 × 4
#> vector type label details
#> <chr> <fct> <chr> <chr>
@@ -165,8 +167,6 @@ Examples
#> 2 v_CA11N_2217 Female Median commuting duration CA 201…
find_census_vectors('after tax income', dataset = 'CA16', type = 'total', query_type = 'semantic')
-#> Called from: eval(expr, p)
-#> debug at /Users/jens/Documents/R/cancensus/R/helpers.R#51: dataset
#> Multiple possible matches. Results ordered by closeness.
#> # A tibble: 56 × 4
#> vector type label details
diff --git a/docs/reference/get_census.html b/docs/reference/get_census.html
index 6646a393..9a284105 100644
--- a/docs/reference/get_census.html
+++ b/docs/reference/get_census.html
@@ -24,7 +24,7 @@
@@ -50,6 +50,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/get_census_geometry.html b/docs/reference/get_census_geometry.html
index cd1c77ae..f9c3829b 100644
--- a/docs/reference/get_census_geometry.html
+++ b/docs/reference/get_census_geometry.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/get_intersecting_geometries.html b/docs/reference/get_intersecting_geometries.html
index 9d0babdd..76437046 100644
--- a/docs/reference/get_intersecting_geometries.html
+++ b/docs/reference/get_intersecting_geometries.html
@@ -28,7 +28,7 @@
@@ -54,6 +54,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/get_recalled_database.html b/docs/reference/get_recalled_database.html
index b107cc68..228ac756 100644
--- a/docs/reference/get_recalled_database.html
+++ b/docs/reference/get_recalled_database.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/get_statcan_geo_suite.html b/docs/reference/get_statcan_geo_suite.html
new file mode 100644
index 00000000..f1897127
--- /dev/null
+++ b/docs/reference/get_statcan_geo_suite.html
@@ -0,0 +1,153 @@
+
+Read the geosuite data — get_statcan_geo_suite • cancensus
+
+
+
+
+
+
+
+
+
+
+
+
+ Reads the geosuite data for the given level and census year. Data gets cached after first download if the
+cancensus cache path has been set. For older
+years `get_statcan_geographic_attributes()` can fill in most of the information
+
+
+
+ get_statcan_geo_suite(level, census_year = "2021", refresh = FALSE)
+
+
+
+ Arguments
+ - level
+geographic level to return the data for, valid choices are
+"DB", "DA", "ADA", "CT", "CSD", "CMA", "CD", "PR", "FED", "DPL", "ER", "PN", "POPCTR"
+
+
+- census_year
+census year to get the data for, right now only 2021 is supported
+
+
+- refresh
+(logical) refresh the cache if true
+
+
+
+ Value
+
+
+tibble with the geosuite data
+
+
+
+ Examples
+ # list add the cached census data
+if (FALSE) {
+get_statcan_geo_suite("DA","2021")
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/get_statcan_geographic_attributes.html b/docs/reference/get_statcan_geographic_attributes.html
new file mode 100644
index 00000000..3b076588
--- /dev/null
+++ b/docs/reference/get_statcan_geographic_attributes.html
@@ -0,0 +1,152 @@
+
+Read the Geographic Attributes File — get_statcan_geographic_attributes • cancensus
+
+
+
+
+
+
+
+
+
+
+ Read the Geographic Attributes File
+ Source: R/geo_suite.R
+ get_statcan_geographic_attributes.Rd
+
+
+
+ Reads the Geographies Attributes File for the given census year. The table contains
+the information on how all the geographic levels are related for each area, and population, dwelling and household counts.
+Data gets cached after first download if the
+cancensus cache path has been set. A reference guide is available
+at https://www150.statcan.gc.ca/n1/en/catalogue/92-151-G2021001
+
+
+
+ get_statcan_geographic_attributes(census_year = "2021", refresh = FALSE)
+
+
+
+ Arguments
+ - census_year
+census year to get the data for, right now only 2006, 2011, 2016, 2021 are supported
+
+
+- refresh
+(logical) refresh the cache if true
+
+
+
+ Value
+
+
+tibble with the relationship data
+
+
+
+ Examples
+ # list add the cached census data
+if (FALSE) {
+get_statcan_geographic_attributes("2021")
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/get_statcan_geographies.html b/docs/reference/get_statcan_geographies.html
new file mode 100644
index 00000000..eb10cd0a
--- /dev/null
+++ b/docs/reference/get_statcan_geographies.html
@@ -0,0 +1,174 @@
+
+Read the geosuite data — get_statcan_geographies • cancensus
+
+
+
+
+
+
+
+
+
+
+
+
+ Reads the original unprocessed geographic boundary files from Statistics Canada
+
+
+
+ get_statcan_geographies(
+ census_year,
+ level,
+ type = "cartographic",
+ cache_path = NULL,
+ timeout = 1000,
+ refresh = FALSE,
+ quiet = FALSE
+)
+
+
+
+ Arguments
+ - census_year
+census year to get the data for, right now only 2021 is supported
+
+
+- level
+geographic level to return the data for, valid choices are
+"PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR"
+
+
+- type
+type of geographic data, valid choices area "cartographic" or "digital"
+
+
+- cache_path
+optional path to cache the data. If the cancensus cache path is set the geographic data gets
+cached in the "geographies" subdirectory of the cancensus cache path.
+
+
+- timeout
+optional timeout parameter, adjust as needed if the data download times out when using slow connections
+
+
+- refresh
+(logical) refresh the cache if true
+
+
+- quiet
+(logical) suppress messages if `TRUE`
+
+
+
+ Value
+
+
+a spatial dataframe with the geographic data
+
+
+
+ Examples
+ # get the digital geographic boundaries for provinces and territories
+if (FALSE) {
+get_statcan_geographies(census_year="2021",level="PR",type="digital")
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/get_statcan_geography_relationships.html b/docs/reference/get_statcan_geography_relationships.html
new file mode 100644
index 00000000..2fc6e8ab
--- /dev/null
+++ b/docs/reference/get_statcan_geography_relationships.html
@@ -0,0 +1,151 @@
+
+Read the Dissemination Geographies Relationship File — get_statcan_geography_relationships • cancensus
+
+
+
+
+
+
+
+
+
+
+ Read the Dissemination Geographies Relationship File
+ Source: R/geo_suite.R
+ get_statcan_geography_relationships.Rd
+
+
+
+ Reads the Dissemination Geographies Relationship File for the given census year. The table contains
+the information on how all the geographic levels are related for each area. Data gets cached after first download if the
+cancensus cache path has been set. A reference guide is available
+at https://www150.statcan.gc.ca/n1/en/catalogue/982600032021001
+
+
+
+ get_statcan_geography_relationships(census_year = "2021", refresh = FALSE)
+
+
+
+ Arguments
+ - census_year
+census year to get the data for, right now only 2021 is supported, for older
+years `get_statcan_geographic_attributes()` can fill in most of the information
+
+
+- refresh
+(logical) refresh the cache if true
+
+
+
+ Value
+
+
+tibble with the relationship data
+
+
+
+ Examples
+ # list add the cached census data
+if (FALSE) {
+get_statcan_geography_relationships("2021")
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/get_statcan_wds_data.html b/docs/reference/get_statcan_wds_data.html
new file mode 100644
index 00000000..5e837196
--- /dev/null
+++ b/docs/reference/get_statcan_wds_data.html
@@ -0,0 +1,169 @@
+
+Query the StatCan WDS for data — get_statcan_wds_data • cancensus
+
+
+
+
+
+
+
+
+
+
+
+
+ Get official census data from Statistics Canada for a given set of DGUIDs. Only available for the 2021 census. The
+downloaded data gets enriched by geographic and characteristic names based on metadata obtained via `get_statcan_wds_metadata()`.
+Data is cached for the duration of the R session.
+
+
+
+ get_statcan_wds_data(
+ DGUIDs,
+ members = NULL,
+ gender = "All",
+ language = "en",
+ refresh = FALSE
+)
+
+
+
+ Arguments
+ - DGUIDs
+census year to get the data for, right now only 2021 is supported. Valid DGUIDs for a given geographic
+level can be queried via `get_statcan_wds_metadata()`.
+
+
+- members
+list of Member IOs to download data for. By default all characteristics are downloaded. Valid
+Member IDs and their descriptions can be queried via the `get_statcan_wds_metadata()` call.
+
+
+- gender
+optionally query data for only one gender. By default this queries data for all genders, possible
+values are "Total", "Male", "Female" to only query total data, or for males only or for females only.
+
+
+- language
+specify language for geography and characteristic names that get added, valid choices are "en" and "fr"
+
+
+- refresh
+default is `FALSE` will refresh the temporary cache if `TRUE`
+
+
+
+ Value
+
+
+tibble with the enriched census data
+
+
+
+ Examples
+ # get data for federal electoral district 2013A000459021
+if (FALSE) {
+get_statcan_wds_data(DGUIDs="2013A000459021",level="FED")
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/get_statcan_wds_metadata.html b/docs/reference/get_statcan_wds_metadata.html
new file mode 100644
index 00000000..e649e5cd
--- /dev/null
+++ b/docs/reference/get_statcan_wds_metadata.html
@@ -0,0 +1,151 @@
+
+Query the StatCan WDS for metadata — get_statcan_wds_metadata • cancensus
+
+
+
+
+
+
+
+
+
+
+
+
+ Get official metadata information from Statistics Canada for a given geographic level. Only available for the 2021 census.
+Data is cached for the duration of the R session.
+
+
+
+ get_statcan_wds_metadata(census_year, level, refresh = FALSE)
+
+
+
+ Arguments
+ - census_year
+census year to get the data for, right now only 2021 is supported
+
+
+- level
+geographic level to return the data for, valid choices are
+"PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR"
+
+
+- refresh
+default is `FALSE` will refresh the temporary cache if `TRUE`
+
+
+
+ Value
+
+
+tibble with the metadata
+
+
+
+ Examples
+ # get metadata for federal electoral districts
+if (FALSE) {
+get_statcan_wds_metadata(census_year="2021",level="FED")
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 3e546d41..6a49299f 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -101,8 +107,8 @@ Data discovery as_census_region_list()
Convert a (suitably filtered) data frame from
-list_census_regions
to a list suitable for passing to
-get_census
.
+list_census_regions
to a list suitable for passing to
+get_census
.
@@ -203,6 +209,34 @@ User settings show_cancensus_cache_path()
View saved cache directory path
+
+ Getting data directly from StatCan
+
+
+
+
+
+ Query the StatCan WDS for data
+
+
+
+ Query the StatCan WDS for metadata
+
+
+
+ Read the geosuite data
+
+
+
+ Read the Geographic Attributes File
+
+
+
+ Read the Dissemination Geographies Relationship File
+
+
+
+ Read the geosuite data
Data
diff --git a/docs/reference/label_vectors.html b/docs/reference/label_vectors.html
index a3001c3f..77845aff 100644
--- a/docs/reference/label_vectors.html
+++ b/docs/reference/label_vectors.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/list_cancensus_cache.html b/docs/reference/list_cancensus_cache.html
index dd28bc90..1fd9286b 100644
--- a/docs/reference/list_cancensus_cache.html
+++ b/docs/reference/list_cancensus_cache.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -98,20 +104,20 @@ Value
Examples
# list add the cached census data
list_cancensus_cache()
-#> # A tibble: 1,242 × 11
+#> # A tibble: 1,453 × 11
#> path dataset regions level vectors created_at version size
#> <chr> <chr> <chr> <chr> <chr> <dttm> <chr> <dbl>
#> 1 CM_data_009… CA06 "[\"{\… DA "[]" 2022-05-21 15:28:16 d.1 1663
#> 2 CM_data_00a… CA16 "[\"{\… DA "[\"Po… 2022-05-14 16:24:07 d.1 16663
#> 3 CM_data_014… CA06 "{\"CS… CT "[\"v_… 2022-11-03 04:34:33 d.3 13801
-#> 4 CM_data_016… CA16 "[\"{\… DA "[]" 2022-05-21 15:28:24 d.1 1658
-#> 5 CM_data_01c… CA16 "[\"{\… CT "[\"v_… 2022-07-13 15:23:08 d.1 5258
-#> 6 CM_data_01f… CA1996 "{\"CS… CSD "[\"v_… 2022-08-18 19:13:18 d.2 8898
-#> 7 CM_data_024… CA16 "[\"{\… Regi… "[\"v_… 2022-04-27 23:26:47 d.1 3603
-#> 8 CM_data_02b… CA16 "{\"C\… CT "[\"v_… 2022-10-06 10:39:55 d.2 143505
-#> 9 CM_data_02b… CA11 "[\"{\… Regi… "[\"v_… 2022-04-29 11:48:58 d.1 656
-#> 10 CM_data_02b… CA16 "[\"{\… CT "[\"v_… 2022-05-02 16:44:21 d.1 2764
-#> # … with 1,232 more rows, and 3 more variables: last_accessed <dttm>,
+#> 4 CM_data_016… CA01 "{\"CS… DB "[\"Po… 2022-12-12 12:54:14 d.3 179740
+#> 5 CM_data_016… CA16 "[\"{\… DA "[]" 2022-05-21 15:28:24 d.1 1658
+#> 6 CM_data_01c… CA16 "[\"{\… CT "[\"v_… 2022-07-13 15:23:08 d.1 5258
+#> 7 CM_data_01f… CA1996 "{\"CS… CSD "[\"v_… 2022-08-18 19:13:18 d.2 8898
+#> 8 CM_data_024… CA16 "[\"{\… Regi… "[\"v_… 2022-04-27 23:26:47 d.1 3603
+#> 9 CM_data_02b… CA21 "{\"CD… CT "[\"v_… 2022-12-31 09:37:21 d.4 361
+#> 10 CM_data_02b… CA16 "{\"C\… CT "[\"v_… 2022-10-06 10:39:55 d.2 143505
+#> # … with 1,443 more rows, and 3 more variables: last_accessed <dttm>,
#> # access_count <dbl>, resolution <chr>
diff --git a/docs/reference/list_census_datasets.html b/docs/reference/list_census_datasets.html
index b5efc580..7075ed41 100644
--- a/docs/reference/list_census_datasets.html
+++ b/docs/reference/list_census_datasets.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/list_census_regions.html b/docs/reference/list_census_regions.html
index fc32d93b..d2fda857 100644
--- a/docs/reference/list_census_regions.html
+++ b/docs/reference/list_census_regions.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
@@ -150,9 +156,7 @@ Value
Examples
list_census_regions('CA16')
-#> Called from: eval(expr, p)
-#> debug at /Users/jens/Documents/R/cancensus/R/helpers.R#51: dataset
-#> Querying CensusMapper API for regions data...
+#> Reading regions list from local cache.
#> # A tibble: 5,518 × 8
#> region name level pop munic…¹ CMA_UID CD_UID PR_UID
#> <chr> <chr> <chr> <int> <chr> <chr> <chr> <chr>
diff --git a/docs/reference/list_census_vectors.html b/docs/reference/list_census_vectors.html
index ddfd5073..d057b115 100644
--- a/docs/reference/list_census_vectors.html
+++ b/docs/reference/list_census_vectors.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/list_recalled_cached_data.html b/docs/reference/list_recalled_cached_data.html
index 7063c69e..b7e5e8a5 100644
--- a/docs/reference/list_recalled_cached_data.html
+++ b/docs/reference/list_recalled_cached_data.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/parent_census_vectors.html b/docs/reference/parent_census_vectors.html
index d67e2784..002f2132 100644
--- a/docs/reference/parent_census_vectors.html
+++ b/docs/reference/parent_census_vectors.html
@@ -29,7 +29,7 @@
@@ -55,6 +55,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/remove_from_cancensus_cache.html b/docs/reference/remove_from_cancensus_cache.html
index f0f7879b..a2c0b78f 100644
--- a/docs/reference/remove_from_cancensus_cache.html
+++ b/docs/reference/remove_from_cancensus_cache.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/remove_recalled_cached_data.html b/docs/reference/remove_recalled_cached_data.html
index 528931e1..937014c7 100644
--- a/docs/reference/remove_recalled_cached_data.html
+++ b/docs/reference/remove_recalled_cached_data.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/search_census_regions.html b/docs/reference/search_census_regions.html
index dabd3ddc..4bc26268 100644
--- a/docs/reference/search_census_regions.html
+++ b/docs/reference/search_census_regions.html
@@ -27,7 +27,7 @@
@@ -53,6 +53,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/search_census_vectors.html b/docs/reference/search_census_vectors.html
index 5f2bf21d..fa843220 100644
--- a/docs/reference/search_census_vectors.html
+++ b/docs/reference/search_census_vectors.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/set_cancensus_api_key.html b/docs/reference/set_cancensus_api_key.html
index ba51e12a..7716eafc 100644
--- a/docs/reference/set_cancensus_api_key.html
+++ b/docs/reference/set_cancensus_api_key.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/set_cancensus_cache_path.html b/docs/reference/set_cancensus_cache_path.html
index 3e319a2d..b9d1c092 100644
--- a/docs/reference/set_cancensus_cache_path.html
+++ b/docs/reference/set_cancensus_cache_path.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/show_cancensus_api_key.html b/docs/reference/show_cancensus_api_key.html
index 22917511..ffe240d2 100644
--- a/docs/reference/show_cancensus_api_key.html
+++ b/docs/reference/show_cancensus_api_key.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/reference/show_cancensus_cache_path.html b/docs/reference/show_cancensus_cache_path.html
index 45e58818..ef36f064 100644
--- a/docs/reference/show_cancensus_cache_path.html
+++ b/docs/reference/show_cancensus_cache_path.html
@@ -23,7 +23,7 @@
@@ -49,6 +49,12 @@
Finding intersecting geometries from custom data
+
+ StatCan attribute files
+
+
+ StatCan WDS
+
Additional datasets: Structural type of dwelling by document type
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 4f918b26..bb3b6fa2 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -27,6 +27,12 @@
https://mountainmath.github.io/cancensus/articles/intersecting_geometries.html
+
+ https://mountainmath.github.io/cancensus/articles/statcan_attribute_files.html
+
+
+ https://mountainmath.github.io/cancensus/articles/statcan_wds.html
+
https://mountainmath.github.io/cancensus/authors.html
@@ -78,6 +84,24 @@
https://mountainmath.github.io/cancensus/reference/get_recalled_database.html
+
+ https://mountainmath.github.io/cancensus/reference/get_statcan_geo_suite.html
+
+
+ https://mountainmath.github.io/cancensus/reference/get_statcan_geographic_attributes.html
+
+
+ https://mountainmath.github.io/cancensus/reference/get_statcan_geographies.html
+
+
+ https://mountainmath.github.io/cancensus/reference/get_statcan_geography_relationships.html
+
+
+ https://mountainmath.github.io/cancensus/reference/get_statcan_wds_data.html
+
+
+ https://mountainmath.github.io/cancensus/reference/get_statcan_wds_metadata.html
+
https://mountainmath.github.io/cancensus/reference/index.html
diff --git a/man/CODES_TABLE.Rd b/man/CODES_TABLE.Rd
index e4a2b04b..80090510 100644
--- a/man/CODES_TABLE.Rd
+++ b/man/CODES_TABLE.Rd
@@ -8,7 +8,7 @@
A dataset with code table summaries for census data
}
\references{
-\url{https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/geo012-eng.cfm}
+\url{https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CSDtype}, \url{https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CDtype}
}
\author{
derived from StatCan definitions
diff --git a/man/get_statcan_geo_suite.Rd b/man/get_statcan_geo_suite.Rd
new file mode 100644
index 00000000..4c1aba79
--- /dev/null
+++ b/man/get_statcan_geo_suite.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/geo_suite.R
+\name{get_statcan_geo_suite}
+\alias{get_statcan_geo_suite}
+\title{Read the geosuite data}
+\usage{
+get_statcan_geo_suite(level, census_year = "2021", refresh = FALSE)
+}
+\arguments{
+\item{level}{geographic level to return the data for, valid choices are
+"DB", "DA", "ADA", "CT", "CSD", "CMA", "CD", "PR", "FED", "DPL", "ER", "PN", "POPCTR"}
+
+\item{census_year}{census year to get the data for, right now only 2021 is supported}
+
+\item{refresh}{(logical) refresh the cache if true}
+}
+\value{
+tibble with the geosuite data
+}
+\description{
+Reads the geosuite data for the given level and census year. Data gets cached after first download if the
+cancensus cache path has been set. For older
+years `get_statcan_geographic_attributes()` can fill in most of the information
+}
+\examples{
+# list add the cached census data
+\dontrun{
+get_statcan_geo_suite("DA","2021")
+}
+}
diff --git a/man/get_statcan_geographic_attributes.Rd b/man/get_statcan_geographic_attributes.Rd
new file mode 100644
index 00000000..67316654
--- /dev/null
+++ b/man/get_statcan_geographic_attributes.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/geo_suite.R
+\name{get_statcan_geographic_attributes}
+\alias{get_statcan_geographic_attributes}
+\title{Read the Geographic Attributes File}
+\usage{
+get_statcan_geographic_attributes(census_year = "2021", refresh = FALSE)
+}
+\arguments{
+\item{census_year}{census year to get the data for, right now only 2006, 2011, 2016, 2021 are supported}
+
+\item{refresh}{(logical) refresh the cache if true}
+}
+\value{
+tibble with the relationship data
+}
+\description{
+Reads the Geographies Attributes File for the given census year. The table contains
+the information on how all the geographic levels are related for each area, and population, dwelling and household counts.
+Data gets cached after first download if the
+cancensus cache path has been set. A reference guide is available
+at https://www150.statcan.gc.ca/n1/en/catalogue/92-151-G2021001
+}
+\examples{
+# list add the cached census data
+\dontrun{
+get_statcan_geographic_attributes("2021")
+}
+}
diff --git a/man/get_statcan_geographies.Rd b/man/get_statcan_geographies.Rd
new file mode 100644
index 00000000..1a17d3bf
--- /dev/null
+++ b/man/get_statcan_geographies.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/geographies.R
+\name{get_statcan_geographies}
+\alias{get_statcan_geographies}
+\title{Read the geosuite data}
+\usage{
+get_statcan_geographies(
+ census_year,
+ level,
+ type = "cartographic",
+ cache_path = NULL,
+ timeout = 1000,
+ refresh = FALSE,
+ quiet = FALSE
+)
+}
+\arguments{
+\item{census_year}{census year to get the data for, right now only 2021 is supported}
+
+\item{level}{geographic level to return the data for, valid choices are
+"PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR"}
+
+\item{type}{type of geographic data, valid choices area "cartographic" or "digital"}
+
+\item{cache_path}{optional path to cache the data. If the cancensus cache path is set the geographic data gets
+cached in the "geographies" subdirectory of the cancensus cache path.}
+
+\item{timeout}{optional timeout parameter, adjust as needed if the data download times out when using slow connections}
+
+\item{refresh}{(logical) refresh the cache if true}
+
+\item{quiet}{(logical) suppress messages if `TRUE`}
+}
+\value{
+a spatial dataframe with the geographic data
+}
+\description{
+Reads the original unprocessed geographic boundary files from Statistics Canada
+}
+\examples{
+# get the digital geographic boundaries for provinces and territories
+\dontrun{
+get_statcan_geographies(census_year="2021",level="PR",type="digital")
+}
+}
diff --git a/man/get_statcan_geography_relationships.Rd b/man/get_statcan_geography_relationships.Rd
new file mode 100644
index 00000000..45d41d1b
--- /dev/null
+++ b/man/get_statcan_geography_relationships.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/geo_suite.R
+\name{get_statcan_geography_relationships}
+\alias{get_statcan_geography_relationships}
+\title{Read the Dissemination Geographies Relationship File}
+\usage{
+get_statcan_geography_relationships(census_year = "2021", refresh = FALSE)
+}
+\arguments{
+\item{census_year}{census year to get the data for, right now only 2021 is supported, for older
+years `get_statcan_geographic_attributes()` can fill in most of the information}
+
+\item{refresh}{(logical) refresh the cache if true}
+}
+\value{
+tibble with the relationship data
+}
+\description{
+Reads the Dissemination Geographies Relationship File for the given census year. The table contains
+the information on how all the geographic levels are related for each area. Data gets cached after first download if the
+cancensus cache path has been set. A reference guide is available
+at https://www150.statcan.gc.ca/n1/en/catalogue/982600032021001
+}
+\examples{
+# list add the cached census data
+\dontrun{
+get_statcan_geography_relationships("2021")
+}
+}
+\keyword{internal}
diff --git a/man/get_statcan_wds_data.Rd b/man/get_statcan_wds_data.Rd
new file mode 100644
index 00000000..6788a21d
--- /dev/null
+++ b/man/get_statcan_wds_data.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wds.R
+\name{get_statcan_wds_data}
+\alias{get_statcan_wds_data}
+\title{Query the StatCan WDS for data}
+\usage{
+get_statcan_wds_data(
+ DGUIDs,
+ members = NULL,
+ gender = "All",
+ language = "en",
+ refresh = FALSE
+)
+}
+\arguments{
+\item{DGUIDs}{census year to get the data for, right now only 2021 is supported. Valid DGUIDs for a given geographic
+level can be queried via `get_statcan_wds_metadata()`.}
+
+\item{members}{list of Member IOs to download data for. By default all characteristics are downloaded. Valid
+Member IDs and their descriptions can be queried via the `get_statcan_wds_metadata()` call.}
+
+\item{gender}{optionally query data for only one gender. By default this queries data for all genders, possible
+values are "Total", "Male", "Female" to only query total data, or for males only or for females only.}
+
+\item{language}{specify language for geography and characteristic names that get added, valid choices are "en" and "fr"}
+
+\item{refresh}{default is `FALSE` will refresh the temporary cache if `TRUE`}
+}
+\value{
+tibble with the enriched census data
+}
+\description{
+Get official census data from Statistics Canada for a given set of DGUIDs. Only available for the 2021 census. The
+downloaded data gets enriched by geographic and characteristic names based on metadata obtained via `get_statcan_wds_metadata()`.
+Data is cached for the duration of the R session.
+}
+\examples{
+# get data for federal electoral district 2013A000459021
+\dontrun{
+get_statcan_wds_data(DGUIDs="2013A000459021",level="FED")
+}
+}
diff --git a/man/get_statcan_wds_metadata.Rd b/man/get_statcan_wds_metadata.Rd
new file mode 100644
index 00000000..9fec9992
--- /dev/null
+++ b/man/get_statcan_wds_metadata.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wds.R
+\name{get_statcan_wds_metadata}
+\alias{get_statcan_wds_metadata}
+\title{Query the StatCan WDS for metadata}
+\usage{
+get_statcan_wds_metadata(census_year, level, refresh = FALSE)
+}
+\arguments{
+\item{census_year}{census year to get the data for, right now only 2021 is supported}
+
+\item{level}{geographic level to return the data for, valid choices are
+"PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR"}
+
+\item{refresh}{default is `FALSE` will refresh the temporary cache if `TRUE`}
+}
+\value{
+tibble with the metadata
+}
+\description{
+Get official metadata information from Statistics Canada for a given geographic level. Only available for the 2021 census.
+Data is cached for the duration of the R session.
+}
+\examples{
+# get metadata for federal electoral districts
+\dontrun{
+get_statcan_wds_metadata(census_year="2021",level="FED")
+}
+}
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
index d6c84ab6..549107c4 100644
--- a/pkgdown/_pkgdown.yml
+++ b/pkgdown/_pkgdown.yml
@@ -44,6 +44,14 @@ reference:
- set_cancensus_cache_path
- show_cancensus_api_key
- show_cancensus_cache_path
+ - title: Getting data directly from StatCan
+ - contents:
+ - get_statcan_wds_data
+ - get_statcan_wds_metadata
+ - get_statcan_geographies
+ - get_statcan_geographic_attributes
+ - get_statcan_geography_relationships
+ - get_statcan_geo_suite
- title: Data
- contents:
- CODES_TABLE
@@ -62,6 +70,14 @@ articles:
navbar: ~
contents:
- intersecting_geometries
+ - title: StatCan attribute files
+ navbar: ~
+ contents:
+ - statcan_attribute_files
+ - title: StatCan WDS
+ navbar: ~
+ contents:
+ - statcan_wds
- title: Additional datasets
navbar: ~
contents:
diff --git a/vignettes/data_discovery.Rmd b/vignettes/data_discovery.Rmd
index 80b0fb79..b5d6c621 100644
--- a/vignettes/data_discovery.Rmd
+++ b/vignettes/data_discovery.Rmd
@@ -16,7 +16,7 @@ library(cancensus)
# Census datasets
-`cancensus` can access Statistics Canada Census data for the 1996, 2001, 2006 Censuses, the 2011 Census and National Household Survey, as well as the 2016 Census. You can run `list_census_datasets` to check what datasets are currently available for access through the CensusMapper API. Additional data for the 2016 Census will be included in CensusMapper within a day or two after public release by Statistics Canada. Statistics Canada maintains a release schedule for the Census 2016 Program which can be viewed on their [website](http://www12.statcan.gc.ca/census-recensement/2016/ref/release-dates-diffusion-eng.cfm).
+`cancensus` can access Statistics Canada Census data for the 1996, 2001, 2006 Censuses, the 2011 Census and National Household Survey, the 2016 Census, as well as the 2021 Census. You can run `list_census_datasets` to check what datasets are currently available for access through the CensusMapper API.
Thanks to contributions by the Canada Mortgage and Housing Corporation (CMHC), `cancensus` now includes additional Census-linked datasets as open-data releases. These include annual tax-filer data at the census tract level for tax years 2000 through 2017, which includes data on incomes and demographics, as well as specialized crosstabs for Structural type of dwelling by Document type, which details occupancy status for residences. These crosstabs are available for the 2001, 2006, 2011, and 2016 Census years at all levels starting with census tract.
```{r}
@@ -101,7 +101,7 @@ Results are ordered by string proximity if there are multiple possible matches.
## Standard Geographical Classification
-Statistics Canada uses an official classification of geographic areas known as the [Standard Geographical Classification (SGC)](https://www.statcan.gc.ca/eng/subjects/standard/sgc/2016/introduction), which is updated periodically. The latest version is based on the 2016 Census. Geographic classification codes are standardized across Statistics Canada products, including the Census as well as any other Statistics Canada dataset. In practice, this means that the region ID for the Vancouver Census subdivision is 5915022 across all products. In `cancensus` the region ID code is used to identify the appropriate spatial vector data to retrieve alongside Census data. These region IDs have a predictable structure, where provinces are two digits, Census divisions are 4 digits (including 2 for the province), and Census subdivisions have 7 digits (including 2 for the province, and 2 for the Census division).
+Statistics Canada uses an official classification of geographic areas known as the [Standard Geographical Classification (SGC)](https://www.statcan.gc.ca/en/subjects/standard/sgc/2021/introduction), which is updated periodically. The latest version is based on the 2021 Census. Geographic classification codes are standardized across Statistics Canada products, including the Census as well as any other Statistics Canada dataset. In practice, this means that the region ID for the Vancouver Census subdivision is 5915022 across all products. In `cancensus` the region ID code is used to identify the appropriate spatial vector data to retrieve alongside Census data. These region IDs have a predictable structure, where provinces are two digits, Census divisions are 4 digits (including 2 for the province), and Census subdivisions have 7 digits (including 2 for the province, and 2 for the Census division).
```{r echo=FALSE}
tibble(PR = c(35,35,35), CD = c(NA, 18, 18), CSD = c(NA, NA, 013), name = c("Ontario","Durham (Regional municipality","Oshawa (City)"))
```
@@ -109,7 +109,7 @@ These levels are hierarchical and complete in that a province is split in Census
Geographies have standardized names for the province, Census division, and Census subdivision levels, as well as Census metropolitan areas and Census agglomerations. Lower geographic levels such as Census tracts or dissemination areas (DA, EA, and DB) are not named or listed but have unique identifying codes derived from their parent Census subdivision.
```{r echo=FALSE}
-list_census_regions('CA16') %>%
+list_census_regions('CA21') %>%
group_by(level) %>%
tally()
```
@@ -121,15 +121,15 @@ Data can also be extracted at the Census Metropolitan Area (CMA) or Census Agglo
A Census metropolitan area consists of adjacent municipalities with a defined core with a total population of at least 100,000 of which 50,000 or more must live in the core based on Census data. Adjacent municipalities must have a high degree of integration with the core, which Statistics Canada measures based on the commuting flows indicated in Census data. Census Agglomeration areas have to have a core population above 10,000.
-All CMAs and CAs consist of Census subdivisions but not all Census subdivisions are a subset of a CMA or a CA. For more details on CMAs and CAs, consult Statistics Canada's Census Dictionary article for [Census metropolitan areas (CMA) and census agglomerations (CA)](https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/geo009-eng.cfm). All CMAs and some CAs have data at the Census tract level, but most CAs do not. The 2016 Census has 35 CMAs and 14 CAs with Census tracts that have their own defined geography. There are a further 106 CAs without Census tracts that do not have their own distinctly defined geographies.
+All CMAs and CAs consist of Census subdivisions but not all Census subdivisions are a subset of a CMA or a CA. For more details on CMAs and CAs, consult Statistics Canada's Census Dictionary article for [Census metropolitan areas (CMA) and census agglomerations (CA)](https://www12.statcan.gc.ca/census-recensement/2021/ref/dict/az/Definition-eng.cfm?ID=geo009). All CMAs and some CAs have data at the Census tract level, but most CAs do not. The 2021 Census has 41 CMAs and 9 CAs with Census tracts that have their own defined geography. There are a further 102 CAs without Census tracts that do not have their own distinctly defined geographies.
### Aside: dissemination areas, blocks, and enumeration areas
-[Dissemination areas](https://www150.statcan.gc.ca/n1/pub/92-195-x/2011001/geo/da-ad/def-eng.htm) (DA) are the smallest atomic geographic unit at which all census data is captured. DAs cover the entirety of Canada and follow the boundaries of census subdivisions and census tracts. While inter-census geographic stability is not guaranteed, they generally tend to be as stable as the census tracts and census subdivisions that they make up. In addition to census boundaries, DAs will generally follow natural boundaries created by other spatial features like roads, railways, water features, and designed to be spatially compact and with a target population around 400-700 persons. The 2016 census data has 56,589 distinct DAs.
+[Dissemination areas](https://www12.statcan.gc.ca/census-recensement/2021/ref/dict/az/Definition-eng.cfm?ID=geo021) (DA) are the smallest atomic geographic unit at which all census data is captured. DAs cover the entirety of Canada and follow the boundaries of census subdivisions and census tracts. While inter-census geographic stability is not guaranteed, they generally tend to be as stable as the census tracts and census subdivisions that they make up. In addition to census boundaries, DAs will generally follow natural boundaries created by other spatial features like roads, railways, water features, and designed to be spatially compact and with a target population around 400-700 persons. The 2021 census data has 57,936 distinct DAs.
[Enumeration areas](https://www12.statcan.gc.ca/English/census01/products/reference/dict/geo024.htm) (EA) were the DA equivalent for censuses prior to 2001. Similar to DAs, EAs were used to as the basic level at which census data was collected. They do not necessarily correspond accurately to DAs in data from 2001 onwards.
-[Dissemination block](https://www150.statcan.gc.ca/n1/pub/92-195-x/2011001/geo/db-id/def-eng.htm) (DB) level data is available for the 2001-2016 datasets. DBs are essentially city blocks, bounded by intersecting streets and therefore are largely the product of road networks at the time of the census. The geographies and identification codes of DBs are not necessarily stable over time. DBs are split whenever they intersect with boundaries of higher geographic levels in such a way as to ensure that they can be aggregated upwards precisely. DBs only provide data for population, dwelling counts, and number of households (from 2006 onwards) without any additional characteristic data. DBs with population under 15 have their population counts adjusted for privacy. For the 2016 census, there are close to half a million DB distinct regions.
+[Dissemination block](https://www12.statcan.gc.ca/census-recensement/2021/ref/dict/az/Definition-eng.cfm?ID=geo014) (DB) level data is available for the 2001-2021 datasets. DBs are essentially city blocks, bounded by intersecting streets and therefore are largely the product of road networks at the time of the census. The geographies and identification codes of DBs are not necessarily stable over time. DBs are split whenever they intersect with boundaries of higher geographic levels in such a way as to ensure that they can be aggregated upwards precisely. DBs only provide data for population, dwelling counts, and number of households (from 2006 onwards) without any additional characteristic data. DBs with population under 15 have their population counts adjusted for privacy. For the 2021 census, there are close to half a million DB distinct regions.
## Viewing available Census regions
diff --git a/vignettes/statcan_attribute_files.Rmd b/vignettes/statcan_attribute_files.Rmd
new file mode 100644
index 00000000..2c2fb78f
--- /dev/null
+++ b/vignettes/statcan_attribute_files.Rmd
@@ -0,0 +1,49 @@
+---
+title: "StatCan attribute files"
+output: rmarkdown::html_vignette
+mainfont: Roboto
+vignette: >
+ %\VignetteIndexEntry{StatCan attribute files}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ message = FALSE,
+ warning = FALSE,
+ comment = "#>",
+ eval = nzchar(Sys.getenv("COMPILE_VIG"))
+)
+```
+
+```{r setup}
+library(cancensus)
+library(dplyr)
+```
+
+## Background
+Attribute files describe the detailed relationship of various Statistics Canada geographic levels and provide population, household and dewlling counts. This information can be useful for understanding the hierarchical relationships of different geographic levels. The [CensusMapper API](https://censusmapper.ca/api) that **cancensus** uses for most of the data queries is ill-suited to get comprehensive data on the hierarichal relationships Canada wide, so it can be helpful to have direct access to this data in comprehensive tabular form.
+
+## Match between Census Tracts and Census Subdivisions
+If we are interested in understanding which Census Tracts respect municipal boundaries and which ones don't in 2021 we can consult the geographic attributes file. It contains a row for each Census Block, the basic building block of census geographies, and tags other levels of geography the Census Block lies in.
+
+```{r}
+attributes <- get_statcan_geographic_attributes("2021")
+
+attributes %>% colnames()
+```
+
+To answer our question, we filter Census Block by the ones that lie within a Census Tract, and check for the collection of Census Blocks within each Census Tract how many municipalities they lie in.
+
+```{r}
+attributes %>%
+ filter(CMATYPE_RMRGENRE %in% c("B","K")) |> # filter areas not in CTs
+ group_by(CTCODE_SRCODE,CMATYPE_RMRGENRE) |>
+ summarise(`Number of municipalities`=length(unique(CSDUID_SDRIDU)),.groups="drop") |>
+ count(`Number of municipalities`,CMATYPE_RMRGENRE) |>
+ arrange(CMATYPE_RMRGENRE,`Number of municipalities`)
+```
+
+This shows that most census tracts for both Census Metropolitan Areas (CMATYPE_RMRGENRE="B") and tracted Census Agglomerations (CMATYPE_RMRGENRE="K"), with some census tracts spanning several municipalities.
diff --git a/vignettes/statcan_wds.Rmd b/vignettes/statcan_wds.Rmd
new file mode 100644
index 00000000..eb88053d
--- /dev/null
+++ b/vignettes/statcan_wds.Rmd
@@ -0,0 +1,107 @@
+---
+title: "StatCan WDS"
+output: rmarkdown::html_vignette
+mainfont: Roboto
+vignette: >
+ %\VignetteIndexEntry{StatCan WDS}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ message = FALSE,
+ warning = FALSE,
+ comment = "#>",
+ eval = nzchar(Sys.getenv("COMPILE_VIG"))
+)
+```
+
+```{r setup}
+library(cancensus)
+library(dplyr)
+library(tidyr)
+library(ggplot2)
+```
+
+
+Some census datasets have not been imported into CensusMapper, and thus aren't available via the `get_census()` function. But the data can be queried directly from the [Statistics Canada Web Data Service for the 2021 census](https://www12.statcan.gc.ca/wds-sdw/2021profile-profil2021-eng.cfm), as well as geographic boundary files.
+
+## Word of caution
+The StatCan census WDS is not a stable API, the meaning of the internal identifiers used to query have changed in the past, and may change again in the future. **Use with extreme caution.** The same call may give different results at different points in time. Moreover, the API server is sometimes unavailable.
+
+## Ukrainians by Federal Electoral Districts
+As an example we access the share of people with Ukrainian ethnic origin at the Federal Electoral District level. First we need to find the StatCan Characteristic IDs for our characteristic of interest. For this we download the metadata, extract the characteristics, locate the base characteristic for ethnic origin and the entry referencing "Ukrainian" that's a descendant of the base characteristic.
+
+```{r}
+metadata <- get_statcan_wds_metadata("2021","FED")
+
+characteristics <- metadata |>
+ filter(`Codelist en`=="Characteristic") |>
+ mutate(across(matches("ID"),as.integer))
+
+ethnic_base <- characteristics |>
+ filter(grepl("Total - Ethnic",en))
+ukranian <- characteristics |>
+ filter(grepl("Ukrainian",en), `Parent ID`==ethnic_base$ID)
+
+selected_characteristics <- bind_rows(ethnic_base,ukranian)
+
+selected_characteristics |> select(ID,en)
+```
+
+We can also get the geographic identifiers for the federal electoral districts from the metadata.
+
+```{r}
+dguids <- metadata |>
+ filter(`Codelist ID`=="CL_GEO_FED") |>
+ pull(ID)
+```
+
+```{r}
+data <- get_statcan_wds_data(dguids,members=selected_characteristics$ID,gender="Total")
+```
+
+The data comes enriched with metadata to make working with it easier, in particular the `CHARACTERISTIC_NAME` column contains plain language names. Now we can transform the data to compute percentages.
+
+```{r}
+plot_data <- data |>
+ select(DGUID=GEO_DESC,Name=GEO_NAME,name=CHARACTERISTIC_NAME,value=OBS_VALUE) |>
+ pivot_wider() |>
+ mutate(Share=Ukrainian/`Total - Ethnic or cultural origin for the population in private households - 25% sample data`)
+```
+
+Now we can visualize the data, here are the top 20 Federal Electoral Districts by share of the population with Ukrainian heritage.
+
+```{r fig.height=4.5, fig.width=6}
+plot_data |> slice_max(Share,n=20) |>
+ ggplot(aes(y=reorder(Name,Share),x=Share)) +
+ geom_bar(stat="identity",fill="steelblue") +
+ scale_x_continuous(labels=scales::percent) +
+ labs(title="Ukrainian ethnic origin",
+ y="Federal electoral district",
+ x="Share of population in private households",
+ caption="StatCan Census 2021")
+```
+
+To map the data we have to get the geographies.
+
+```{r}
+fed_geos <- get_statcan_geographies("2021","FED")
+```
+
+With these we can join on our census data and map it.
+
+```{r fig.height=4.5, fig.width=6}
+fed_geos |>
+ left_join(plot_data,by="DGUID") |>
+ ggplot(aes(fill=Share)) +
+ geom_sf() +
+ scale_fill_viridis_c(labels=scales::percent) +
+ coord_sf(datum=NA) +
+ labs(title="Ukrainian ethnic origin by Federal Electoral District",
+ fill="Share of\npopulation",
+ caption="StatCan Census 2021")
+```
+