diff --git a/.Rbuildignore b/.Rbuildignore index 80d738df..503e01ab 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -25,5 +25,4 @@ lastMiKTeXException ^doc$ -^R/geo_suite.R ^CRAN-SUBMISSION$ diff --git a/DESCRIPTION b/DESCRIPTION index 656e5ab3..dae0b64b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: cancensus Type: Package Title: Access, Retrieve, and Work with Canadian Census Data and Geography -Version: 0.5.4 +Version: 0.5.5 Authors@R: c( person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("aut"), comment = "API creator and maintainer"), person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut", "cre"), comment = "Package maintainer, responsible for correspondence"), @@ -38,7 +38,8 @@ Suggests: knitr, sf, geojsonsf, tidyr, - lwgeom + lwgeom, + xml2 VignetteBuilder: knitr URL: https://github.com/mountainMath/cancensus, https://mountainmath.github.io/cancensus/, https://censusmapper.ca/api BugReports: https://github.com/mountainMath/cancensus/issues diff --git a/NAMESPACE b/NAMESPACE index d5f26572..89fe0e88 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,12 @@ export(find_census_vectors) export(get_census) export(get_census_geometry) export(get_intersecting_geometries) +export(get_statcan_geo_suite) +export(get_statcan_geographic_attributes) +export(get_statcan_geographies) +export(get_statcan_geography_relationships) +export(get_statcan_wds_data) +export(get_statcan_wds_metadata) export(label_vectors) export(list_cancensus_cache) export(list_census_datasets) diff --git a/NEWS.md b/NEWS.md index 1e918aa8..4db26f3b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# cancensus 0.5.5 + +- add functionality for direct access to StatCan census WDS for 2021 +- add functionality to download original StatCan geographies for 2021 +- update CODES_TABLE for 2021 census + # cancensus 0.5.4 - added ability to query census datasets by year diff --git a/R/geo_suite.R b/R/geo_suite.R index ac198b13..2ac939e3 100644 --- a/R/geo_suite.R +++ b/R/geo_suite.R @@ -1,28 +1,26 @@ - - - #' Read the geosuite data #' #' @description -#' Reads the geosuite data for the given level and census year. Data gets cached after first download. +#' Reads the geosuite data for the given level and census year. Data gets cached after first download if the +#' cancensus cache path has been set. For older +#' years `get_statcan_geographic_attributes()` can fill in most of the information #' #' @param level geographic level to return the data for, valid choices are -#' "DB", "DA", "ADA", "CT", "CSD", "CMA", "CD", "PR" +#' "DB", "DA", "ADA", "CT", "CSD", "CMA", "CD", "PR", "FED", "DPL", "ER", "PN", "POPCTR" #' @param census_year census year to get the data for, right now only 2021 is supported #' @param refresh (logical) refresh the cache if true #' @return tibble with the geosuite data #' -#' @keywords internal #' #' @examples #' # list add the cached census data #' \dontrun{ -#' get_geo_suite("DA","2021") +#' get_statcan_geo_suite("DA","2021") #' } #' @export -get_geo_suite <- function(level,census_year="2021",refresh=FALSE){ +get_statcan_geo_suite <- function(level,census_year="2021",refresh=FALSE){ valid_years <- c("2021") #seq(2001,2021,5) %>% as.character() - valid_levels <- c("DB", "DA", "CT", "ADA", "CSD", "CMA", "CD", "PR") + valid_levels <- c("DB", "DA", "CT", "ADA", "CSD", "CMA", "CD", "PR","FED","DPL","ER","PN","POPCTR") if (!(as.character(census_year) %in% valid_years)) { stop(paste0("Only census years ",paste0(valid_years,collapse = ", "), " are supported for GeoSuite")) @@ -77,10 +75,12 @@ get_geo_suite <- function(level,census_year="2021",refresh=FALSE){ #' #' @description #' Reads the Dissemination Geographies Relationship File for the given census year. The table contains -#' the information on how all the geographic levels are related for each area. A reference guide is available +#' the information on how all the geographic levels are related for each area. Data gets cached after first download if the +#' cancensus cache path has been set. A reference guide is available #' at https://www150.statcan.gc.ca/n1/en/catalogue/982600032021001 #' -#' @param census_year census year to get the data for, right now only 2021 is supported +#' @param census_year census year to get the data for, right now only 2021 is supported, for older +#' years `get_statcan_geographic_attributes()` can fill in most of the information #' @param refresh (logical) refresh the cache if true #' @return tibble with the relationship data #' @@ -89,10 +89,10 @@ get_geo_suite <- function(level,census_year="2021",refresh=FALSE){ #' @examples #' # list add the cached census data #' \dontrun{ -#' get_geography_relationship("2021") +#' get_statcan_geography_relationships("2021") #' } #' @export -get_geography_relationship <- function(census_year="2021", refresh=FALSE){ +get_statcan_geography_relationships <- function(census_year="2021", refresh=FALSE){ valid_years <- c("2021") if (!(as.character(census_year) %in% valid_years)) { stop(paste0("Only census years ",paste0(valid_years,collapse = ", "), @@ -113,28 +113,30 @@ get_geography_relationship <- function(census_year="2021", refresh=FALSE){ #' @description #' Reads the Geographies Attributes File for the given census year. The table contains #' the information on how all the geographic levels are related for each area, and population, dwelling and household counts. -#' A reference guide is available +#' Data gets cached after first download if the +#' cancensus cache path has been set. A reference guide is available #' at https://www150.statcan.gc.ca/n1/en/catalogue/92-151-G2021001 #' -#' @param census_year census year to get the data for, right now only 2006, 2011 and 2016 are supported +#' @param census_year census year to get the data for, right now only 2006, 2011, 2016, 2021 are supported #' @param refresh (logical) refresh the cache if true #' @return tibble with the relationship data #' #' @examples #' # list add the cached census data -#' get_geographic_attributes("2016") -#' +#' \dontrun{ +#' get_statcan_geographic_attributes("2021") +#' } #' @export -get_geographic_attributes <- function(census_year="2016",refresh=FALSE){ +get_statcan_geographic_attributes <- function(census_year="2021",refresh=FALSE){ census_year <- as.character(census_year)[1] - valid_years <- seq(2006,2016,5) %>% as.character + valid_years <- seq(2006,2021,5) %>% as.character if (!(as.character(census_year) %in% valid_years)) { stop(paste0("Only census years ",paste0(valid_years,collapse = ", "), " are supported for the geographic relationship file.")) } - urls <- c("2016"="https://www12.statcan.gc.ca/census-recensement/2016/geo/ref/gaf/files-fichiers/2016_92-151_XBB_txt.zip", + urls <- c("2021"="https://www12.statcan.gc.ca/census-recensement/2021/geo/aip-pia/attribute-attribs/files-fichiers/2021_92-151_X.zip", + "2016"="https://www12.statcan.gc.ca/census-recensement/2016/geo/ref/gaf/files-fichiers/2016_92-151_XBB_txt.zip", "2011"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2011_92-151_XBB_txt.zip", - #"2011"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2011_92-151_XBB_xlsx.zip", "2006"="https://www12.statcan.gc.ca/census-recensement/2011/geo/ref/files-fichiers/2006_92-151_XBB_txt.zip") base_path <- cache_path("attribute_files") @@ -147,8 +149,9 @@ get_geographic_attributes <- function(census_year="2016",refresh=FALSE){ utils::download.file(urls[[census_year]],tmp) utils::unzip(tmp,exdir = base_path_year) } - file <- dir(base_path_year,pattern="\\.txt",full.names = TRUE) - if (census_year=="2016") { + if (census_year=="2021") file <- dir(base_path_year,pattern="\\.csv",full.names = TRUE) + else file <- dir(base_path_year,pattern="\\.txt",full.names = TRUE) + if (census_year %in% c("2016","2021")) { result <- readr::read_csv(file,col_types = readr::cols(.default="c"), locale = readr::locale(encoding ="Windows-1252")) } else { @@ -175,3 +178,7 @@ get_geographic_attributes <- function(census_year="2016",refresh=FALSE){ dplyr::mutate(dplyr::across(dplyr::matches("DBpop\\d{4}|DBtdwell\\d{4}|DBurdwell\\d{4}|DBarea"),as.numeric)) } + + + + diff --git a/R/geographies.R b/R/geographies.R new file mode 100644 index 00000000..fe5ce12f --- /dev/null +++ b/R/geographies.R @@ -0,0 +1,66 @@ +#' Read the geosuite data +#' +#' @description +#' Reads the original unprocessed geographic boundary files from Statistics Canada +#' +#' @param census_year census year to get the data for, right now only 2021 is supported +#' @param level geographic level to return the data for, valid choices are +#' "PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR" +#' @param type type of geographic data, valid choices area "cartographic" or "digital" +#' @param cache_path optional path to cache the data. If the cancensus cache path is set the geographic data gets +#' cached in the "geographies" subdirectory of the cancensus cache path. +#' @param timeout optional timeout parameter, adjust as needed if the data download times out when using slow connections +#' @param refresh (logical) refresh the cache if true +#' @param quiet (logical) suppress messages if `TRUE` +#' @return a spatial dataframe with the geographic data +#' +#' @examples +#' # get the digital geographic boundaries for provinces and territories +#' \dontrun{ +#' get_statcan_geographies(census_year="2021",level="PR",type="digital") +#' } +#' @export +get_statcan_geographies <- function(census_year,level,type="cartographic", + cache_path = NULL,timeout=1000, + refresh=FALSE,quiet=FALSE) { + valid_census_years <- c("2021") + valid_levels <- c("PR","CD","CMACA","CMA","CA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR") + valid_types <- c("cartographic","digital") + if (!(census_year %in% valid_census_years)) { + stop(paste0("Census year must be one of ",paste0(valid_census_years,collapse = ", "),".")) + } + if (!(type %in% valid_types)) { + stop(paste0("Type must be one of ",paste0(valid_types,collapse = ", "),".")) + } + if (!(level %in% valid_levels)) { + stop(paste0("Level must be one of ",paste0(valid_levels,collapse = ", "),".")) + } + level_map <- c("CMACA"="CMA","CA"="CMA","POPCNTR","PC") + if (level %in% names(level_map)) level <-level_map[[level]] + geo_base_path <- cache_path("geographies") + if (!dir.exists(geo_base_path)) dir.create(geo_base_path) + geo_base_path <- file.path(geo_base_path,type) + if (!dir.exists(geo_base_path)) dir.create(geo_base_path) + exdir <- file.path(geo_base_path,level) + if (refresh || !dir.exists(exdir) || length(dir(exdir,"\\.shp$"))==0) { + old_timeout <- getOption("timeout") + if (type=="cartographic") typeID <- "b" else typeID <- "a" + if (nchar(level)==2) filler="_000" + else if (nchar(level)==3) filler="000" + else { + stop(paste0("Problem, don't know how to get geographic data for level ",level,".")) + } + url <- paste0("https://www12.statcan.gc.ca/census-recensement/",census_year,"/geo/sip-pis/boundary-limites/files-fichiers/l",tolower(level),filler,typeID,"21a_e.zip") + tmp <- tempfile() + options(timeout = timeout) + utils::download.file(url,tmp,mode="wb",quiet=quiet) + options(timeout = old_timeout) + utils::unzip(tmp,exdir = exdir) + } else { + if (!quiet) message("Reading geographic data from local cache.") + } + path <- dir(exdir,"\\.shp$",full.names = TRUE) + + geos <- sf::read_sf(path) + geos +} diff --git a/R/helpers.R b/R/helpers.R index 09b12f3f..a46ddd0f 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -101,7 +101,7 @@ check_recalled_data_and_warn <- function(meta_file,params){ cached_data<-generate_metadata(meta_file,params) recalled_data <- list_recalled_cached_data(cached_data,warn_only_once=TRUE) if (!is.null(recalled_data) && nrow(recalled_data)>0) { - warning("Currently loaded data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nnremove_recalled_cached_data()\nto remove recalled data.") + warning("Currently loaded data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nremove_recalled_cached_data()\nto remove recalled data.") } d<-NULL } @@ -109,7 +109,7 @@ check_recalled_data_and_warn <- function(meta_file,params){ check_for_recalled_data_and_warn <- function(){ recalled_data <- list_recalled_cached_data(warn_only_once=TRUE) if (!is.null(recalled_data) && nrow(recalled_data)>0) { - warning(paste0("Some locally cached data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nnremove_recalled_cached_data()\nto remove recalled data.")) + warning(paste0("Some locally cached data has been recalled. Use\nlist_recalled_cached_data()\nto inspect recalled locally cached data and\nremove_recalled_cached_data()\nto remove recalled data.")) } d<-NULL } @@ -129,7 +129,7 @@ first_run_checks <- function(){ #' @name CODES_TABLE #' @docType data #' @author derived from StatCan definitions -#' @references \url{https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/geo012-eng.cfm} +#' @references \url{https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CSDtype}, \url{https://www12.statcan.gc.ca/census-recensement/2021/geo/ref/domain-domaine/index2021-eng.cfm?lang=e&id=CDtype} #' @keywords data NULL diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 00000000..4921297d Binary files /dev/null and b/R/sysdata.rda differ diff --git a/R/wds.R b/R/wds.R new file mode 100644 index 00000000..7e595504 --- /dev/null +++ b/R/wds.R @@ -0,0 +1,174 @@ +#' Query the StatCan WDS for metadata +#' +#' @description +#' Get official metadata information from Statistics Canada for a given geographic level. Only available for the 2021 census. +#' Data is cached for the duration of the R session. +#' +#' @param census_year census year to get the data for, right now only 2021 is supported +#' @param level geographic level to return the data for, valid choices are +#' "PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR" +#' @param refresh default is `FALSE` will refresh the temporary cache if `TRUE` +#' @return tibble with the metadata +#' +#' @examples +#' # get metadata for federal electoral districts +#' \dontrun{ +#' get_statcan_wds_metadata(census_year="2021",level="FED") +#' } +#' @export +get_statcan_wds_metadata <- function(census_year,level,refresh=FALSE){ + valid_census_years <- c("2021") + valid_levels <- c("PR","CD","CMACA","CSD","CT","ADA","DA","ER","FED","DPL","POPCNTR") + if (!(census_year %in% valid_census_years)) { + stop(paste0("Census year must be one of ",paste0(valid_census_years,collapse = ", "),".")) + } + if (!(level %in% valid_levels)) { + stop(paste0("Level must be one of ",paste0(valid_levels,collapse = ", "),".")) + } + meta_url <- paste0("https://api.statcan.gc.ca/census-recensement/profile/sdmx/rest/dataflow/STC_CP/DF_",level,"?references=all") + metadata_tempfile <- file.path(tempdir(),paste0("census_wds_metadata_",digest::digest(meta_url),".sdmx")) + if (refresh || !file.exists(metadata_tempfile)) { + utils::download.file(meta_url,metadata_tempfile) + } + d <- xml2::read_xml(metadata_tempfile) + code_lists <- xml2::xml_find_all(d,"//structure:Codelist") + + meta_data <- lapply(code_lists, \(cl){ + codelist_id <- cl |> xml2::xml_attr("id") + agencyID <- cl |> xml2::xml_attr("agencyID") + codelist_en <- cl |> xml2::xml_find_all("common:Name[@xml:lang='en']") |> xml2::xml_text() + codelist_fr <- cl |> xml2::xml_find_all("common:Name[@xml:lang='fr']") |> xml2::xml_text() + description_en <- cl |> xml2::xml_find_all("common:Name[@xml:lang='en']") |> xml2::xml_text() + description_fr <- cl |> xml2::xml_find_all("common:Name[@xml:lang='fr']") |> xml2::xml_text() + codes <- cl |> xml2::xml_find_all("structure:Code") + dplyr::tibble(`Agency ID`=agencyID, + `Codelist ID`=codelist_id, + `Codelist en`=codelist_en, + `Codelist fr`=codelist_fr, + ID=codes |> xml2::xml_attr("id"), + en=codes |> xml2::xml_find_all("common:Name[@xml:lang='en']") |> xml2::xml_text(), + fr=codes |> xml2::xml_find_all("common:Name[@xml:lang='fr']") |> xml2::xml_text(), + `Parent ID`=codes |> xml2::xml_find_all("structure:Parent/Ref",flatten=FALSE) |> + lapply(\(d)ifelse(is.null(d),NA,xml2::xml_attr(d,"id"))) |> unlist() + ) + }) |> + dplyr::bind_rows() + meta_data +} + +#' Query the StatCan WDS for data +#' +#' @description +#' Get official census data from Statistics Canada for a given set of DGUIDs. Only available for the 2021 census. The +#' downloaded data gets enriched by geographic and characteristic names based on metadata obtained via `get_statcan_wds_metadata()`. +#' Data is cached for the duration of the R session. +#' +#' @param DGUIDs census year to get the data for, right now only 2021 is supported. Valid DGUIDs for a given geographic +#' level can be queried via `get_statcan_wds_metadata()`. +#' @param members list of Member IOs to download data for. By default all characteristics are downloaded. Valid +#' Member IDs and their descriptions can be queried via the `get_statcan_wds_metadata()` call. +#' @param gender optionally query data for only one gender. By default this queries data for all genders, possible +#' values are "Total", "Male", "Female" to only query total data, or for males only or for females only. +#' @param language specify language for geography and characteristic names that get added, valid choices are "en" and "fr" +#' @param refresh default is `FALSE` will refresh the temporary cache if `TRUE` +#' @return tibble with the enriched census data +#' +#' @examples +#' # get data for federal electoral district 2013A000459021 +#' \dontrun{ +#' get_statcan_wds_data(DGUIDs="2013A000459021",level="FED") +#' } +#' @export +get_statcan_wds_data <- function(DGUIDs, + members = NULL, + gender="All", + language="en", + refresh=FALSE) { + DGUIDs <- sort(DGUIDs) + members <- sort(members) + level <- geo_level_from_DGUID(DGUIDs[1]) + url <- paste0("https://api.statcan.gc.ca/census-recensement/profile/sdmx/rest/data/STC_CP") + gender <- tolower(gender) + gender <- paste0(toupper(substr(gender,1,1)),substr(gender,2,100)) + valid_genders <- c("All","Total","Male","Female") + if (!(gender %in% valid_genders)) { + stop(paste0("Gender must be one of ",paste0(valid_genders,collapse = ", "),".")) + } + language <- tolower(language) + valid_languages <- c("en","fr") + if (!(language %in% valid_languages)) { + stop(paste0("Language must be one of ",paste0(valid_languages,collapse = ", "),".")) + } + gender <- c("All"="","Total"="1","Male"="2","Female"="3")[[gender]] + dguid_string <- paste0(DGUIDs,collapse="+") + member_string <- paste0(members,collapse = "+") + add=paste0("DF_",level,"/A5.",dguid_string,".",gender,".",member_string,".1") + wds_data_tempfile <- file.path(tempdir(),paste0("wds_data_",digest::digest(add),".csv")) + if (!file.exists(wds_data_tempfile)) { + response <- httr::GET(paste0(url,",",add), + httr::accept("text/csv"), + httr::add_headers("Accept-Encoding"="deflate, gzip, br"), + httr::write_disk(wds_data_tempfile,overwrite = TRUE)) + } + if (!response$status_code=="200") { + stop(paste0("Invalid request.\n",httr::content(response))) + } + census_year <- "2021" + meta_data <- get_statcan_wds_metadata(census_year,level,refresh = refresh) + + levels <- meta_data |> + dplyr::filter(.data$`Codelist ID`=="CL_GEO_LEVEL") + + meta_geos <- meta_data |> + dplyr::filter(.data$`Codelist ID`==paste0("CL_GEO_",level)) + meta_characteristics <- meta_data |> + dplyr::filter(.data$`Codelist ID`=="CL_CHARACTERISTIC") + + name_field <- language #paste0(language,"_description") + + data <- readr::read_csv(wds_data_tempfile,col_types = readr::cols(.default="c")) |> + dplyr::mutate(dplyr::across(dplyr::matches("OBS_VALUE|TNR_CI_"),as.numeric)) |> + dplyr::left_join(meta_geos |> + dplyr::select(GEO_DESC=.data$ID,GEO_NAME=!!as.name(name_field)), + by="GEO_DESC") |> + dplyr::left_join(meta_characteristics |> + dplyr::select(CHARACTERISTIC=.data$ID,CHARACTERISTIC_NAME=!!as.name(name_field)), + by="CHARACTERISTIC") + + data +} + + + +geo_level_from_DGUID <- function(DGUID,simple=TRUE){ + schema <- substr(DGUID,"6","9") + schema_to_level <- c("0000"="C","0001"="C", + "0002"="PR", + "0003"="CD", + "0004"="FED", + "0005"="CSD", + "0006"="DPL", + "0007"="HR", + "0008"="LHR", + "0011"="FSA", + "0500"="ER", + "0501"="CAR", + "0502"="CCSD", + "0503"="CMA", + "0504"="CA", + "0505"="CMAP", + "0507"="CT", + "0510"="PC", + "0511"="PCP", + "0512"="DA", + "0513"="DB", + "0516"="ADA") + level <- schema_to_level[[schema]] + if (simple){ + simple_translation <- c("LHR"="HR","CMA"="CMACA","CA"="CMACA","CMAP"="CMACA") + if (level %in% names(simple_translation)) level <- simple_translation[[level]] + } + level +} + + diff --git a/README.md b/README.md index b50dca47..0187dd36 100644 --- a/README.md +++ b/README.md @@ -139,17 +139,17 @@ There are several other jurisdiction where census data is available via R packag If you wish to cite cancensus: von Bergmann, J., Aaron Jacobs, Dmitry Shkolnik (2022). cancensus: R package to - access, retrieve, and work with Canadian Census data and geography. v0.5.3. + access, retrieve, and work with Canadian Census data and geography. v0.5.5. A BibTeX entry for LaTeX users is ``` - @Manual{, + @Manual{cancensus, author = {Jens {von Bergmann} and Dmitry Shkolnik and Aaron Jacobs}, title = {cancensus: R package to access, retrieve, and work with Canadian Census data and geography}, year = {2022}, - note = {R package version 0.5.3}, - url = {https://mountainmath.github.io/cancensus/}, + note = {R package version 0.5.5}, + url = {https://mountainmath.github.io/cancensus/} } ``` ### Related packages @@ -164,7 +164,7 @@ The [tongfen package](https://mountainmath.github.io/tongfen/index.html) automat ### Statistics Canada Attribution -Subject to the Statistics Canada Open License Agreement, licensed products using Statistics Canada data should employ the following aknowledgement of source: +Subject to the Statistics Canada Open Data License Agreement, licensed products using Statistics Canada data should employ the following acknowledgement of source: ``` Acknowledgment of Source diff --git a/cran-comments.md b/cran-comments.md index d9ad2c5f..003bf85a 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,3 +1,8 @@ +# Update 0.5.5 +- add functionality for direct access to StatCan census WDS for 2021 +- add functionality to download original StatCan geographies for 2021 +- update CODES_TABLE for 2021 census + # Update - 0.5.4 - added ability to query census datasets by year - add a convenience function for creating unique names within given selection of regions from `list_census_regions()` diff --git a/docs/404.html b/docs/404.html index 7ca15fee..af0f1d6f 100644 --- a/docs/404.html +++ b/docs/404.html @@ -50,7 +50,7 @@ cancensus - 0.5.4 + 0.5.5 @@ -78,6 +78,12 @@
  • Finding intersecting geometries from custom data
  • +
  • + StatCan attribute files +
  • +
  • + StatCan WDS +
  • Additional datasets: Structural type of dwelling by document type
  • diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index aaf3348b..edbf629d 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -23,7 +23,7 @@ cancensus - 0.5.4 + 0.5.5 @@ -49,6 +49,12 @@
  • Finding intersecting geometries from custom data
  • +
  • + StatCan attribute files +
  • +
  • + StatCan WDS +
  • Additional datasets: Structural type of dwelling by document type
  • diff --git a/docs/articles/Dwellings_by_document_type_cross_tabulation.html b/docs/articles/Dwellings_by_document_type_cross_tabulation.html index 440289b9..21ac4288 100644 --- a/docs/articles/Dwellings_by_document_type_cross_tabulation.html +++ b/docs/articles/Dwellings_by_document_type_cross_tabulation.html @@ -51,7 +51,7 @@ cancensus - 0.5.4 + 0.5.5 @@ -79,6 +79,12 @@
  • Finding intersecting geometries from custom data
  • +
  • + StatCan attribute files +
  • +
  • + StatCan WDS +
  • Additional datasets: Structural type of dwelling by document type
  • @@ -174,10 +180,10 @@

    E started with cancensus vignette.

     # Attribution for the dataset to be used in graphs
    -attribution <- dataset_attribution("CA16xSD")
    +attribution <- dataset_attribution("CA16xSD")
     
     # Select all variables base variables, this gives us total counts by structural type of dwelling
    -vars <- list_census_vectors("CA16xSD") %>% 
    +vars <- list_census_vectors("CA16xSD") %>% 
       filter(is.na(parent_vector))
     variables <- setNames(vars$vector,vars$label)
     
    @@ -207,7 +213,7 @@ 

    E dwelling_types <- setdiff(names(variables),"Total dwellings") # Grab the census data and compute shares for each dwelling type -census_data <- get_census("CA16xSD",regions=list(CSD="3520005"), vectors = variables, quiet = TRUE) %>% +census_data <- get_census("CA16xSD",regions=list(CSD="3520005"), vectors = variables, quiet = TRUE) %>% pivot_longer(cols = all_of(dwelling_types)) %>% mutate(share=value/`Total dwellings`)

    To visualize what this looks like on a bar chart:

    @@ -222,7 +228,7 @@

    E

    As with regular Census data, all data can be retrieved as spatial data. Sometimes it’s easier to use the CensusMapper API interface to search for and select the variables we are interested in. The -explore_census_vectors() function opens a browser with the +explore_census_vectors() function opens a browser with the variable selection tool, we determine that “v_CA16xSD_1” and “v_CA16xSD_28” are the variables enumerating all dwellings and all unoccupied dwellings, respectively.

    @@ -231,7 +237,7 @@

    E vars <- c(Total="v_CA16xSD_1", Unoccupied="v_CA16xSD_28") # Retrieve data with attached geography -census_data <- get_census("CA16xSD",regions=list(CSD="3520005"), level="CT", quiet = TRUE, geo_format = "sf", +census_data <- get_census("CA16xSD",regions=list(CSD="3520005"), level="CT", quiet = TRUE, geo_format = "sf", vectors = vars,use_cache = FALSE) %>% mutate(share=Unoccupied/Total) diff --git a/docs/articles/Dwellings_by_document_type_cross_tabulation_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/Dwellings_by_document_type_cross_tabulation_files/figure-html/unnamed-chunk-5-1.png index d6e8d2fb..361a4599 100644 Binary files a/docs/articles/Dwellings_by_document_type_cross_tabulation_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/Dwellings_by_document_type_cross_tabulation_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/Making_maps_with_cancensus.html b/docs/articles/Making_maps_with_cancensus.html index 8cc47bbc..50aa4c6e 100644 --- a/docs/articles/Making_maps_with_cancensus.html +++ b/docs/articles/Making_maps_with_cancensus.html @@ -51,7 +51,7 @@ cancensus - 0.5.4 + 0.5.5 @@ -79,6 +79,12 @@
  • Finding intersecting geometries from custom data
  • +
  • + StatCan attribute files +
  • +
  • + StatCan WDS +
  • Additional datasets: Structural type of dwelling by document type
  • @@ -150,7 +156,7 @@

    Spatial data in cancensuslibrary(cancensus) library(sf) # retrieve sf dataframe -toronto <- get_census(dataset='CA21', regions=list(CMA="35535"), +toronto <- get_census(dataset='CA21', regions=list(CMA="35535"), vectors=c("median_hh_income"="v_CA21_906"), level='CSD', quiet = TRUE, geo_format = 'sf', labels = 'short') @@ -222,8 +228,8 @@

    Interactive maps with leafletleaflet(toronto) %>% addProviderTiles(providers$CartoDB.Positron) %>% addPolygons() -
    -

    Adding colour ramps and additional interactivity takes a little bit +

    +

    Adding colour ramps and additional interactivity takes a little bit more work but is still pretty easy to implement. Following this example we can specify the colour ramp to match our needs.

    @@ -236,8 +242,8 @@ 

    Interactive maps with leaflet= 1, opacity = 1, fillOpacity = 0.65)

    -
    - +
    + diff --git a/docs/articles/Making_maps_with_cancensus_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/Making_maps_with_cancensus_files/figure-html/unnamed-chunk-4-1.png index 96780f75..c9afb25a 100644 Binary files a/docs/articles/Making_maps_with_cancensus_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/Making_maps_with_cancensus_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/Making_maps_with_cancensus_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/Making_maps_with_cancensus_files/figure-html/unnamed-chunk-5-1.png index b97dbf1f..c03b2966 100644 Binary files a/docs/articles/Making_maps_with_cancensus_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/Making_maps_with_cancensus_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/Taxfiler_Data.html b/docs/articles/Taxfiler_Data.html index 3a121c93..5bf003a6 100644 --- a/docs/articles/Taxfiler_Data.html +++ b/docs/articles/Taxfiler_Data.html @@ -51,7 +51,7 @@
    cancensus - 0.5.4 + 0.5.5 @@ -79,6 +79,12 @@
  • Finding intersecting geometries from custom data
  • +
  • + StatCan attribute files +
  • +
  • + StatCan WDS +
  • Additional datasets: Structural type of dwelling by document type
  • @@ -168,9 +174,9 @@

    library(ggplot2) library(sf)

    To see all available T1FF datasets and their reference codes we can -use list_census_datasets().

    +use list_census_datasets().

    -list_census_datasets() %>% 
    +list_census_datasets() %>% 
       filter(grepl("taxfiler",description))
     #> # A tibble: 19 × 6
     #>    dataset description             geo_dataset attribution       refer…¹ refer…²
    @@ -198,7 +204,7 @@ 

    -list_census_vectors('TX2017')
    +list_census_vectors('TX2017')
     #> # A tibble: 818 × 7
     #>    vector      type  label           units              parent…¹ aggre…² details
     #>    <chr>       <fct> <chr>           <fct>              <chr>    <chr>   <chr>  
    @@ -219,7 +225,7 @@ 

    + labs(title="Change in share of census families in low income 2006-2011",fill="Percentage\npoint change",caption=dataset_attribution(paste0("TX",c(2006,2011))))

    Analyzing change over longer timelines that span changes in Census geometries involves more work, the tongfen diff --git a/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-5-1.png index c24b06d8..91ff0caa 100644 Binary files a/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-6-1.png index c8eeeae1..1712453d 100644 Binary files a/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/Taxfiler_Data_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/cancensus.html b/docs/articles/cancensus.html index fba7a17a..fd647ce0 100644 --- a/docs/articles/cancensus.html +++ b/docs/articles/cancensus.html @@ -51,7 +51,7 @@ cancensus - 0.5.4 + 0.5.5 @@ -79,6 +79,12 @@

  • Finding intersecting geometries from custom data
  • +
  • + StatCan attribute files +
  • +
  • + StatCan WDS +
  • Additional datasets: Structural type of dwelling by document type
  • @@ -209,17 +215,17 @@

    Accessing Census Data
     # Returns a data frame with data only
    -census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
    +census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
                               vectors=c("v_CA21_434","v_CA21_435","v_CA21_440"),
                               level='CSD', use_cache = FALSE, geo_format = NA, quiet = TRUE)
     
     # Returns data and geography as an sf-class data frame
    -census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
    +census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
                               vectors=c("v_CA21_434","v_CA21_435","v_CA21_440"),
                               level='CSD', use_cache = FALSE, geo_format = 'sf', quiet = TRUE)
     
     # Returns a SpatialPolygonsDataFrame object with data and geography
    -census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
    +census_data <- get_census(dataset='CA21', regions=list(CMA="59933"),
                               vectors=c("v_CA21_434","v_CA21_435","v_CA21_440"),
                               level='CSD', use_cache = FALSE, geo_format = 'sp', quiet = TRUE)

    cancensus utilizes caching to increase speed, @@ -230,7 +236,7 @@

    Accessing Census Datause_cache = FALSE as a parameter for get_census.

    Additional parameters for advanced options can be viewed by running -?get_census.

    +?get_census.

    Census Datasets

    @@ -251,10 +257,10 @@

    Census Datasets -

    The function list_census_datasets() will show all +

    The function list_census_datasets() will show all available datasets alongside their metadata.

    -list_census_datasets()
    +list_census_datasets()
     #> # A tibble: 29 × 6
     #>    dataset description                           geo_d…¹ attri…² refer…³ refer…⁴
     #>    <chr>   <chr>                                 <chr>   <chr>   <chr>   <chr>  
    @@ -272,7 +278,7 @@ 

    Census Datasets#> # ²​attribution, ³​reference, ⁴​reference_url

    As other Census datasets become available via the CensusMapper API, they will be listed as output when calling -list_census_datasets().

    +list_census_datasets().

    Census Regions @@ -287,7 +293,7 @@

    Census Regionslist_census_regions(dataset), to display all named census regions and their corresponding id for a given census dataset.

    -list_census_regions("CA21")
    +list_census_regions("CA21")
     #> # A tibble: 5,518 × 8
     #>    region name                      level      pop munic…¹ CMA_UID CD_UID PR_UID
     #>    <chr>  <chr>                     <chr>    <int> <chr>   <chr>   <chr>  <chr> 
    @@ -308,7 +314,7 @@ 

    Census Regions
     # Retrieves Vancouver and Toronto
    -list_census_regions('CA21') %>% 
    +list_census_regions('CA21') %>% 
       filter(level == "CMA", name %in% c("Vancouver","Toronto"))
     #> # A tibble: 2 × 8
     #>   region name      level     pop municipal_status CMA_UID CD_UID PR_UID
    @@ -316,7 +322,7 @@ 

    Census Regions#> 1 35535 Toronto CMA 6202225 B NA NA 35 #> 2 59933 Vancouver CMA 2642825 B NA NA 59 -census_data <- get_census(dataset='CA21', regions=list(CMA=c("59933","35535")), +census_data <- get_census(dataset='CA21', regions=list(CMA=c("59933","35535")), vectors=c("v_CA21_434","v_CA21_435","v_CA21_440"), level='CSD', use_cache = FALSE, quiet = TRUE)

    @@ -417,8 +423,8 @@

    Displaying available Census varia

    Run list_census_vectors(dataset) to view all available Census variables for a given dataset.

    -list_census_vectors("CA21")
    -#> # A tibble: 5,756 × 7
    +list_census_vectors("CA21")
    +#> # A tibble: 7,709 × 7
     #>    vector    type   label                          units paren…¹ aggre…² details
     #>    <chr>     <fct>  <chr>                          <fct> <chr>   <chr>   <chr>  
     #>  1 v_CA21_1  Total  Population, 2021               Numb… NA      Additi… CA 202…
    @@ -431,7 +437,7 @@ 

    Displaying available Census varia #> 8 v_CA21_8 Total Total - Age Numb… NA Additi… CA 202… #> 9 v_CA21_9 Male Total - Age Numb… NA Additi… CA 202… #> 10 v_CA21_10 Female Total - Age Numb… NA Additi… CA 202… -#> # … with 5,746 more rows, and abbreviated variable names ¹​parent_vector, +#> # … with 7,699 more rows, and abbreviated variable names ¹​parent_vector, #> # ²​aggregation

    @@ -462,7 +468,7 @@
    ## # A tibble: 29 × 6
     ##    dataset description                           geo_d…¹ attri…² refer…³ refer…⁴
     ##    <chr>   <chr>                                 <chr>   <chr>   <chr>   <chr>  
    @@ -162,7 +164,7 @@ 

    Census datasets## 10 CA16xSD 2016 Canada Census xtab - Structural… CA16 StatCa… 98-301… https:… ## # … with 19 more rows, and abbreviated variable names ¹​geo_dataset, ## # ²​attribution, ³​reference, ⁴​reference_url

    -

    The list_census_datasets() function also provides +

    The list_census_datasets() function also provides additional background like series reference code, catalogue reference, and attribution details.

    @@ -181,8 +183,8 @@

    Variable vectorsView available Census variable vectors

    -
    ## # A tibble: 5,756 × 7
    +list_census_vectors('CA21')
    +
    ## # A tibble: 7,709 × 7
     ##    vector    type   label                          units paren…¹ aggre…² details
     ##    <chr>     <fct>  <chr>                          <fct> <chr>   <chr>   <chr>  
     ##  1 v_CA21_1  Total  Population, 2021               Numb… NA      Additi… CA 202…
    @@ -195,7 +197,7 @@ 

    View available Census variable v ## 8 v_CA21_8 Total Total - Age Numb… NA Additi… CA 202… ## 9 v_CA21_9 Male Total - Age Numb… NA Additi… CA 202… ## 10 v_CA21_10 Female Total - Age Numb… NA Additi… CA 202… -## # … with 5,746 more rows, and abbreviated variable names ¹​parent_vector, +## # … with 7,699 more rows, and abbreviated variable names ¹​parent_vector, ## # ²​aggregation

    list_census_vectors(dataset) retrieves an index of all available vectors for a given dataset from the CensusMapper API or local @@ -221,7 +223,7 @@

    Searching for Census variable vec matches; and, semantic search which works better with search phrases and has tolerance for inexact searches. Switching between search modes is done using the query_type argument when calling -find_census_vectors() function.

    +find_census_vectors() function.

    Note that variable search is optimized for the Census variables in the main Census datasets. While searches generally work for variables in additional datasets such as cross-tabs and taxfiler data, they have not @@ -233,7 +235,7 @@