improved datasets, added examples tm_facets

r-tmap · Nov 29, 2024 · c797aaf · c797aaf
1 parent 3bfdfd5
commit c797aaf
Show file tree

Hide file tree

Showing 16 changed files with 179 additions and 80 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -11,11 +11,13 @@ S3method(tmapGetShapeMeta1,Raster)
 S3method(tmapGetShapeMeta1,SpatRaster)
 S3method(tmapGetShapeMeta1,SpatVector)
 S3method(tmapGetShapeMeta1,Spatial)
+S3method(tmapGetShapeMeta1,default)
 S3method(tmapGetShapeMeta1,sf)
 S3method(tmapGetShapeMeta1,sfc)
 S3method(tmapGetShapeMeta1,stars)
 S3method(tmapGetShapeMeta2,SpatRaster)
 S3method(tmapGetShapeMeta2,SpatVector)
+S3method(tmapGetShapeMeta2,default)
 S3method(tmapGetShapeMeta2,sf)
 S3method(tmapGetShapeMeta2,sfc)
 S3method(tmapGetShapeMeta2,stars)
@@ -111,6 +113,7 @@ S3method(tmapShape,Raster)
 S3method(tmapShape,SpatRaster)
 S3method(tmapShape,SpatVector)
 S3method(tmapShape,Spatial)
+S3method(tmapShape,default)
 S3method(tmapShape,sf)
 S3method(tmapShape,stars)
 S3method(tmapSplitShp,default)
@@ -119,6 +122,7 @@ S3method(tmapSubsetShp,Raster)
 S3method(tmapSubsetShp,SpatRaster)
 S3method(tmapSubsetShp,SpatVector)
 S3method(tmapSubsetShp,Spatial)
+S3method(tmapSubsetShp,default)
 S3method(tmapSubsetShp,sf)
 S3method(tmapSubsetShp,sfc)
 S3method(tmapSubsetShp,stars)

diff --git a/R/data.R b/R/data.R
@@ -3,7 +3,7 @@
 #' World dataset, class [`sf`][`sf::sf`]
 #'
 #' @details
-#' | variable        | Source 	| description |
+#' | **Variable**        | **Source**	| **Description** |
 #' | ------         	| -----------    	| ----------- |
 #' | `iso_a3`		    | NED | ISO 3166-1 alpha-3 three-letter country code (see below) |
 #' | `name`		    	| NED | Country name |
@@ -23,7 +23,7 @@
 #' | `gender`			| UNDP/OWiD | Gender Inequality Index (GII) Composite metric using reproductive health, empowerment and the labour market. A low value indicates low inequality between women and men, and vice-versa |
 #' | `press`			| RSF | World Press Freedom Index. Degree of freedom that journalists, news organizations and netizens have |
 #'
-#' See sources for more in-depth information of the variables.
+#' See sources for more detailed information about the variables.
 #'
 #' This dataset, created Noveber 2024, is an update from the old version, which has been created around 2016. All variables from the old version are included, but updated. Furthermore, gender ineuqlity and press freedom have been added.
 #'
@@ -45,43 +45,43 @@
 
 #' Netherlands datasets
 #'
-#' Datasets of the Netherlands at three levels: `NLD_prov` (12) provinces, `NLD_muni` (345) municipalities and `NLD_dist` districts (3340), all class [`sf`][`sf::sf`]
+#' Datasets of the Netherlands for 2022 at three levels: `NLD_prov` (12) provinces, `NLD_muni` (345) municipalities and `NLD_dist` (3340) districts , all class [`sf`][`sf::sf`]
 #'
 #' @details
-#' The variables for `NLD_muni` and `NLD_dist` are identical. The data variables are:
+#' The data variables for `NLD_muni` and `NLD_dist` are identical:
 #'
-#' | variable        | description |
+#' | **Variable**        | **Description** |
 #' | ------          | ----------- |
-#' | `code`		     | Code |
-#' | `name`		     | Name |
-#' | `prov_code`	 | Province code (corresponds to `code` column of `NLD_prov`) |
-#' | `area`		     | Total area in km2 |
+#' | `code`		     | Code. Format is "GMaaaa" (municipality/'**g**e**m**eente') and "WKaaaabb" (district/**w**ij**k**). Here, "aaaa" represents the municipality id number, and "bb" the additional district number.
+#' | `name`		     | Name. |
+#' | `province`	 | Province name. |
+#' | `area`		     | Total area in km2. This area corresponds to the area of the polygons (including inland waters, excluding coastal waters), but is more precise because it is based on non-simplified geometries. |
 #' | `urbanity`		 | Level of urbanity. Five classes, determined by the number of addresses per km2 (break values are 2500, 1500, 1000, and 500). |
-#' | `population`	 | The total population count at 2022-01-01 |
-#' | `pop_0_14`		 | Percentage (rounded) of people between 0 and 15 |
-#' | `pop_15_24`	 | Percentage (rounded) of people between 15 and 25  |
-#' | `pop_25_44`	 | Percentage (rounded) of people between 25 and 45  |
-#' | `pop_45_64`	 | Percentage (rounded) of people between 45 and 65  |
-#' | `pop_65plus`	 | Percentage (rounded) of people of 65 and older |
-#' | `dwelling_total`	 | Number of dwellings |
-#' | `dwelling_value`	 | Average dwelling value (Dutch: WOZ-value) |
-#' | `dwelling_ownership`| Percentage of dwellings owned by the residents |
-#' | `employment_rate`|  Share of the employed population within the total population from 15-75 years old (working and non-working) |
+#' | `population`	 | The total population count at 2022-01-01. |
+#' | `pop_0_14`		 | Percentage (rounded) of people between 0 and 15. |
+#' | `pop_15_24`	 | Percentage (rounded) of people between 15 and 25.  |
+#' | `pop_25_44`	 | Percentage (rounded) of people between 25 and 45.  |
+#' | `pop_45_64`	 | Percentage (rounded) of people between 45 and 65.  |
+#' | `pop_65plus`	 | Percentage (rounded) of people of 65 and older. |
+#' | `dwelling_total`	 | Number of dwellings. |
+#' | `dwelling_value`	 | Average dwelling value (Dutch: WOZ-value). |
+#' | `dwelling_ownership`| Percentage of dwellings owned by the residents. |
+#' | `employment_rate`|  Share of the employed population within the total population from 15 to 75 years old. |
 #' | `income_low`	 | Percentage of individuals in private households belonging to the lowest 40% of personal income nationwide. |
 #' | `income_high`	 | Percentage of individuals in private households belonging to the highest 20% of personal income nationwide. |
 #' | `edu_appl_sci`	 | Percentage of people aged 15 to 75 with a university of applied sciences (Dutch: HBO) or university (Dutch: WO) degree. |
 #'
-#' See source for more in-depth information of the variables.
+#' See source for detailed information about the variables.
 #'
 #' This dataset, created Noveber 2024, is an update from the datasets `NLD_muni` and `NLD_prov` used in tmap <= 3, which has been created around 2016. Note that the number of municipalities have been reduced (due to mergings). All old variables are included, except for variables related to ethnicity. Many new variable have been added, and moreover, district (Dutch: wijk) level data have added: `NLD_dist`.
 #'
-#' The CRS (coordinate reference system) used is the Rijksdriekhoekstelsel New, EPSG 28992. Coordinates have been rounded to meters to reduce filesize.
+#' The CRS (coordinate reference system) used is the Rijksdriekhoekstelsel New, EPSG 28992. Coordinates have been rounded to meters to reduce file size.
 #'
 #' @rdname Netherlands
 #' @docType data
 #' @format NULL
 #' @source https://www.cbs.nl/nl-nl/maatwerk/2024/11/kerncijfers-wijken-en-buurten-2022
-#' @references Statistics Netherlands (2022), The Hague/Heerlen, Netherlands, <https://www.cbs.nl/>.
+#' @references Statistics Netherlands (2024), The Hague/Heerlen, Netherlands, <https://www.cbs.nl/>.
 "NLD_prov"
 
 #' @rdname Netherlands

diff --git a/R/spatial_non_supported.R b/R/spatial_non_supported.R
@@ -0,0 +1,19 @@
+#' @export
+tmapGetShapeMeta1.default = function(shp, o) {
+	stop("Specified shp argument of tm_shape is a ", class(shp)[1], ", which is a recognized/supported spatial data class.", call. = FALSE)
+}
+
+#' @export
+tmapGetShapeMeta2.default = function(shp, smeta, o) {
+	stop("Specified shp argument of tm_shape is a ", class(shp)[1], ", which is a recognized/supported spatial data class.", call. = FALSE)
+}
+
+#' @export
+tmapSubsetShp.default = function(shp, vars) {
+	stop("Specified shp argument of tm_shape is a ", class(shp)[1], ", which is a recognized/supported spatial data class.", call. = FALSE)
+}
+
+#' @export
+tmapShape.default = function(shp, is.main, crs, bbox, unit, filter, shp_name, smeta, o, tmf) {
+	stop("Specified shp argument of tm_shape is a ", class(shp)[1], ", which is a recognized/supported spatial data class.", call. = FALSE)
+}
diff --git a/R/step1_helper_facets.R b/R/step1_helper_facets.R
@@ -136,7 +136,7 @@ step1_rearrange_facets = function(tmo, o) {
 					split_stars_dim = ""
 
 					#value_orig = value # just for the case of L156
-					if (length(value) && is.na(value[[1]][1]) && !inherits(value, c("tmapOption", "tmapVars", "tmapAsIs", "tmapSpecial"))) {
+					if (!inherits(value, c("tmapOption", "tmapVars", "tmapAsIs", "tmapSpecial")) && length(value) && is.na(value[[1]][1])) {
 						# NA -> value.blank
 						value = tmapVV(getAesOption("value.blank", o, aes = aes, layer = layer))
 					}

diff --git a/R/tm_facets.R b/R/tm_facets.R
@@ -47,6 +47,7 @@
 #' @param along deprecated Please use `tm_facets_pagewisse()`
 #' @param free.scales deprecated. Please use the `.free` arguments in the layer functions, e.g. `fill.free` in `tm_polygons`.
 #' @param ... used to catch deprecated arguments
+#' @example ./examples/tm_facets.R
 #' @export
 tm_facets = function(by = NULL,
 					 rows = NULL,

diff --git a/R/tmap_palettes.R b/R/tmap_palettes.R
@@ -28,6 +28,7 @@ getPalBiv = function(name, m = NA, n = NA, rep = TRUE) {
 }
 
 getPalMeta = function(name, no.match = "null") {
+	if (!is.character(name)) return(NULL)
 	if (name %in% c("cat", "seq", "div")) {
 		name = cols4all::c4a_options("defaults")$defaults[[name]]
 	}

diff --git a/data-raw/NLD.R b/data-raw/NLD.R
@@ -2,35 +2,39 @@ library(cbsodataR)
 library(sf)
 library(tidyverse)
 
-wb24 = cbs_get_data(id = "85984NED")
-wb23 = cbs_get_data(id = "85618NED")
-wb22 = cbs_get_data(id = "85318NED")
-
-#https://geodata.cbs.nl/files/Wijkenbuurtkaart/WijkBuurtkaart_2023_v2.zip
-dir = tempdir()
-
-sort(sapply(wb22, function(x)sum(is.na(x))))
 
+## Demographic data on municipality (gemeente), district, (wijk) and neighbourhood (buurt) level
+##
+## Source CBS
+## https://www.cbs.nl/nl-nl/maatwerk/2024/11/kerncijfers-wijken-en-buurten-2022
+## https://www.cbs.nl/nl-nl/cijfers/detail/85318NED
+##
+## Note: numbers from 2022 taken (rather than 2023 or 2024) because of availability: as of sept-2024 these are final figures
 
+wb22 = cbs_get_data(id = "85318NED")
 
+# number of missings per variable:
+if (FALSE) {
+	sort(sapply(wb22, function(x)sum(is.na(x))))
+}
 
-gemeentegrenzen <- st_read("https://service.pdok.nl/cbs/gebiedsindelingen/2017/wfs/v1_0?request=GetFeature&service=WFS&version=2.0.0&typeName=gemeente_gegeneraliseerd&outputFormat=json")
 
+## Download geospatial data from PDOK (a platform for Dutch geospatial data)
 gm22 = st_read("https://service.pdok.nl/cbs/gebiedsindelingen/2022/wfs/v1_0?request=GetFeature&service=WFS&version=2.0.0&typeName=gemeente_gegeneraliseerd&outputFormat=json")
-
 pv22 = st_read("https://service.pdok.nl/cbs/gebiedsindelingen/2022/wfs/v1_0?request=GetFeature&service=WFS&version=2.0.0&typeName=provincie_gegeneraliseerd&outputFormat=json")
-
-
-#number of wijken in 2022: sum(substr(wb22$WijkenEnBuurten, 1, 2) == "WK")
-
-wks = list()
-for (start in seq(0, 3000, by = 1000)) {
-	wks = c(wks, list(st_read(paste0("https://service.pdok.nl/cbs/gebiedsindelingen/2022/wfs/v1_0?request=GetFeature&service=WFS&version=2.0.0&typeName=wijk_gegeneraliseerd&outputFormat=json&startindex=", start))))
-}
-wk22 = do.call(rbind, wks)
-
-sel = c(#statcode = "code",
-	#statnaam = "name",
+wk22 = local({
+	# PDOK has a restriction of 1000 features per download
+	# number of districts in 2022: sum(substr(wb22$WijkenEnBuurten, 1, 2) == "WK")
+	wks = list()
+	for (start in seq(0, 3000, by = 1000)) {
+		wks = c(wks, list(st_read(paste0("https://service.pdok.nl/cbs/gebiedsindelingen/2022/wfs/v1_0?request=GetFeature&service=WFS&version=2.0.0&typeName=wijk_gegeneraliseerd&outputFormat=json&startindex=", start))))
+	}
+    do.call(rbind, wks)
+})
+
+
+# selection of variables
+sel = c(
 	WijkenEnBuurten = "code",
 	OppervlakteTotaal_111 = "area",
 	MateVanStedelijkheid_116 = "urbanity",
@@ -50,6 +54,7 @@ sel = c(#statcode = "code",
 	k_40PersonenMetLaagsteInkomen_73 = "income_low",
 	k_20PersonenMetHoogsteInkomen_74 = "income_high")
 
+# some data manipulation
 wb22b = wb22 |>
 	select(all_of(names(sel))) |>
 	rename_at(vars(names(sel)), ~sel) |>
@@ -60,6 +65,7 @@ wb22b = wb22 |>
 		   pop_45_64 = round(population_45_64 / population * 100),
 		   pop_65plus = round(population_65_plus / population * 100),
 		   edu_appl_sci = round(edu_high / (edu_low + edu_middle + edu_high) * 100),
+		   area = units::as_units(area / 100, "km2"),
 		   edu_high = NULL,
 		   edu_middle = NULL,
 		   edu_low = NULL,
@@ -71,6 +77,7 @@ wb22b = wb22 |>
 		   urbanity = factor(urbanity, levels = 1:5, labels = c("extremely urbanised", "strongly urbanised", "moderately urbanised", "hardly urbanised", "not urbanised"))) |>
 	relocate(starts_with("pop_"), .after = population)
 
+# Create datasets
 NLD_prov = pv22 |>
 	rename(code = statcode,
 		   name = statnaam) |>
@@ -83,19 +90,39 @@ NLD_dist = wk22 |>
 	select(code, name) |>
 	mutate(name = stringi::stri_trans_general(name, "Latin-ASCII")) |>
 	left_join(wb22b, by = "code") |>
-	mutate(prov_code = NLD_prov$code[unlist(st_intersects(st_point_on_surface(geometry), NLD_prov))]) |>
-	relocate(prov_code, .after = name)
+	mutate(province = NLD_prov$name[unlist(st_intersects(st_point_on_surface(geometry), NLD_prov))]) |>
+	relocate(province, .after = name)
 
 NLD_muni = gm22 |>
 	rename(code = statcode,
 		   name = statnaam) |>
 	select(code, name) |>
 	mutate(name = stringi::stri_trans_general(name, "Latin-ASCII")) |>
 	left_join(wb22b, by = "code") |>
-	mutate(prov_code = NLD_prov$code[unlist(st_intersects(st_point_on_surface(geometry), NLD_prov))]) |>
-	relocate(prov_code, .after = name)
+	mutate(province = NLD_prov$name[unlist(st_intersects(st_point_on_surface(geometry), NLD_prov))]) |>
+	relocate(province, .after = name)
+
 
-# set precision to 1: round coordinates by meters
+# recalculate total area per muni, because it includes water (other than rivers):
+if (FALSE) {
+	sum(NLD_muni$area)
+	sum(NLD_dist$area)
+	sum(st_area(NLD_dist))
+	sum(st_area(NLD_muni))
+}
+NLD_muni$area = local({
+	aggr_area_from_dist = NLD_dist |>
+		st_drop_geometry() |>
+		mutate(code_muni = substr(code, 3, 6)) |>
+		group_by(code_muni) |>
+		summarize(area_sum = sum(area)) |>
+		ungroup() |>
+		mutate(code = paste0("GM", code_muni))
+	aggr_area_from_dist$area_sum[match(NLD_muni$code, aggr_area_from_dist$code)]
+})
+
+
+# Set precision to 1: round coordinates by meters (to reduce file size)
 NLD_prov$geometry = NLD_prov |> st_geometry() |> st_sfc(precision = 1) %>% st_as_binary %>% st_as_sfc
 st_crs(NLD_prov) = st_crs(wk22)
 

diff --git a/data-raw/World.R b/data-raw/World.R
@@ -42,8 +42,6 @@ co2 = co[setdiff(2:80, 48), ]
 co2 = rbind(co2, co2[1,])
 W$geometry[[15]][[1]][[1]] = co2[,1:2]
 
-all(st_is_valid(W))
-
 # also problems with Fiji (1) and Russia (19) because they cross the 180 meridian
 split_multipolyon_at_180 = function(mp) {
 	co = st_coordinates(mp)
@@ -59,7 +57,7 @@ split_multipolyon_at_180 = function(mp) {
 		st_polygon(list(c))
 	}), list(crs = 4326)))
 
-	m180 = st_sfc(st_linestring(cbind(180, c(80,0,-80))), crs = 4326)
+	m180 = st_sfc(st_linestring(cbind(180, c(88,0,-88))), crs = 4326)
 	sfc1b = st_split(sfc1, m180)
 
 	colist2 = unlist(lapply(sfc1b, function(x) {
@@ -80,11 +78,30 @@ split_multipolyon_at_180 = function(mp) {
 
 	st_multipolygon(colist2)
 }
-W1 = split_multipolyon_at_180(W$geometry[[1]])
 
 W$geometry[[1]] = split_multipolyon_at_180(W$geometry[[1]])
 W$geometry[[19]] = split_multipolyon_at_180(W$geometry[[19]])
 
+# Manually fix Antarctica
+mp = W$geometry[[160]]
+co = st_coordinates(mp)
+colist = unname(split.data.frame(co[,1:2], co[,4]))
+col = colist[[8]]
+
+mn = which.min(col[,1])
+x1 = unname(col[mn, 1])
+x2 = unname(col[mn+1, 1])
+
+col = rbind(cbind(X = 180, Y = -86), col[1:mn, ], cbind(X = c(-180, 180), Y = -86))
+colist[[8]] = col
+
+colist2 = lapply(colist, function(x) {
+	list(x)
+})
+
+res = st_multipolygon(colist2)
+W$geometry[[160]] = st_multipolygon(colist2)
+
 
 ##################
 # area

diff --git a/data/NLD_dist.rda b/data/NLD_dist.rda
diff --git a/data/NLD_muni.rda b/data/NLD_muni.rda
diff --git a/data/World.rda b/data/World.rda
diff --git a/examples/tm_facets.R b/examples/tm_facets.R
@@ -0,0 +1,15 @@
+tm_shape(NLD_dist) +
+  tm_polygons("edu_appl_sci",
+    fill.scale = tm_scale_intervals(values = "pu_gn", style = "kmeans", n = 7)) +
+  tm_facets(by = "province") +
+tm_shape(NLD_muni) +
+  tm_borders(lwd = 3) +
+  tm_facets(by = "province") +
+tm_title("Population with a univeristy degree (incl appl. sciences), percentages")
+
+tm_shape(World) +
+  tm_polygons(c("gender", "press"),
+    fill.scale = list(tm_scale_intervals(values = "bu_br_div", midpoint = 0.5),
+      tm_scale_intervals(values = "pu_gn_div", midpoint = 50)),
+    fill.legend = tm_legend("")) +
+tm_layout(panel.labels = c("Gender Inequality Index (GII)", "World Press Freedom Index"))