Skip to content

Commit

Permalink
improved datasets, added examples tm_facets
Browse files Browse the repository at this point in the history
  • Loading branch information
mtennekes committed Nov 29, 2024
1 parent 3bfdfd5 commit c797aaf
Show file tree
Hide file tree
Showing 16 changed files with 179 additions and 80 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ S3method(tmapGetShapeMeta1,Raster)
S3method(tmapGetShapeMeta1,SpatRaster)
S3method(tmapGetShapeMeta1,SpatVector)
S3method(tmapGetShapeMeta1,Spatial)
S3method(tmapGetShapeMeta1,default)
S3method(tmapGetShapeMeta1,sf)
S3method(tmapGetShapeMeta1,sfc)
S3method(tmapGetShapeMeta1,stars)
S3method(tmapGetShapeMeta2,SpatRaster)
S3method(tmapGetShapeMeta2,SpatVector)
S3method(tmapGetShapeMeta2,default)
S3method(tmapGetShapeMeta2,sf)
S3method(tmapGetShapeMeta2,sfc)
S3method(tmapGetShapeMeta2,stars)
Expand Down Expand Up @@ -111,6 +113,7 @@ S3method(tmapShape,Raster)
S3method(tmapShape,SpatRaster)
S3method(tmapShape,SpatVector)
S3method(tmapShape,Spatial)
S3method(tmapShape,default)
S3method(tmapShape,sf)
S3method(tmapShape,stars)
S3method(tmapSplitShp,default)
Expand All @@ -119,6 +122,7 @@ S3method(tmapSubsetShp,Raster)
S3method(tmapSubsetShp,SpatRaster)
S3method(tmapSubsetShp,SpatVector)
S3method(tmapSubsetShp,Spatial)
S3method(tmapSubsetShp,default)
S3method(tmapSubsetShp,sf)
S3method(tmapSubsetShp,sfc)
S3method(tmapSubsetShp,stars)
Expand Down
44 changes: 22 additions & 22 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' World dataset, class [`sf`][`sf::sf`]
#'
#' @details
#' | variable | Source | description |
#' | **Variable** | **Source** | **Description** |
#' | ------ | ----------- | ----------- |
#' | `iso_a3` | NED | ISO 3166-1 alpha-3 three-letter country code (see below) |
#' | `name` | NED | Country name |
Expand All @@ -23,7 +23,7 @@
#' | `gender` | UNDP/OWiD | Gender Inequality Index (GII) Composite metric using reproductive health, empowerment and the labour market. A low value indicates low inequality between women and men, and vice-versa |
#' | `press` | RSF | World Press Freedom Index. Degree of freedom that journalists, news organizations and netizens have |
#'
#' See sources for more in-depth information of the variables.
#' See sources for more detailed information about the variables.
#'
#' This dataset, created Noveber 2024, is an update from the old version, which has been created around 2016. All variables from the old version are included, but updated. Furthermore, gender ineuqlity and press freedom have been added.
#'
Expand All @@ -45,43 +45,43 @@

#' Netherlands datasets
#'
#' Datasets of the Netherlands at three levels: `NLD_prov` (12) provinces, `NLD_muni` (345) municipalities and `NLD_dist` districts (3340), all class [`sf`][`sf::sf`]
#' Datasets of the Netherlands for 2022 at three levels: `NLD_prov` (12) provinces, `NLD_muni` (345) municipalities and `NLD_dist` (3340) districts , all class [`sf`][`sf::sf`]
#'
#' @details
#' The variables for `NLD_muni` and `NLD_dist` are identical. The data variables are:
#' The data variables for `NLD_muni` and `NLD_dist` are identical:
#'
#' | variable | description |
#' | **Variable** | **Description** |
#' | ------ | ----------- |
#' | `code` | Code |
#' | `name` | Name |
#' | `prov_code` | Province code (corresponds to `code` column of `NLD_prov`) |
#' | `area` | Total area in km2 |
#' | `code` | Code. Format is "GMaaaa" (municipality/'**g**e**m**eente') and "WKaaaabb" (district/**w**ij**k**). Here, "aaaa" represents the municipality id number, and "bb" the additional district number.
#' | `name` | Name. |
#' | `province` | Province name. |
#' | `area` | Total area in km2. This area corresponds to the area of the polygons (including inland waters, excluding coastal waters), but is more precise because it is based on non-simplified geometries. |
#' | `urbanity` | Level of urbanity. Five classes, determined by the number of addresses per km2 (break values are 2500, 1500, 1000, and 500). |
#' | `population` | The total population count at 2022-01-01 |
#' | `pop_0_14` | Percentage (rounded) of people between 0 and 15 |
#' | `pop_15_24` | Percentage (rounded) of people between 15 and 25 |
#' | `pop_25_44` | Percentage (rounded) of people between 25 and 45 |
#' | `pop_45_64` | Percentage (rounded) of people between 45 and 65 |
#' | `pop_65plus` | Percentage (rounded) of people of 65 and older |
#' | `dwelling_total` | Number of dwellings |
#' | `dwelling_value` | Average dwelling value (Dutch: WOZ-value) |
#' | `dwelling_ownership`| Percentage of dwellings owned by the residents |
#' | `employment_rate`| Share of the employed population within the total population from 15-75 years old (working and non-working) |
#' | `population` | The total population count at 2022-01-01. |
#' | `pop_0_14` | Percentage (rounded) of people between 0 and 15. |
#' | `pop_15_24` | Percentage (rounded) of people between 15 and 25. |
#' | `pop_25_44` | Percentage (rounded) of people between 25 and 45. |
#' | `pop_45_64` | Percentage (rounded) of people between 45 and 65. |
#' | `pop_65plus` | Percentage (rounded) of people of 65 and older. |
#' | `dwelling_total` | Number of dwellings. |
#' | `dwelling_value` | Average dwelling value (Dutch: WOZ-value). |
#' | `dwelling_ownership`| Percentage of dwellings owned by the residents. |
#' | `employment_rate`| Share of the employed population within the total population from 15 to 75 years old. |
#' | `income_low` | Percentage of individuals in private households belonging to the lowest 40% of personal income nationwide. |
#' | `income_high` | Percentage of individuals in private households belonging to the highest 20% of personal income nationwide. |
#' | `edu_appl_sci` | Percentage of people aged 15 to 75 with a university of applied sciences (Dutch: HBO) or university (Dutch: WO) degree. |
#'
#' See source for more in-depth information of the variables.
#' See source for detailed information about the variables.
#'
#' This dataset, created Noveber 2024, is an update from the datasets `NLD_muni` and `NLD_prov` used in tmap <= 3, which has been created around 2016. Note that the number of municipalities have been reduced (due to mergings). All old variables are included, except for variables related to ethnicity. Many new variable have been added, and moreover, district (Dutch: wijk) level data have added: `NLD_dist`.
#'
#' The CRS (coordinate reference system) used is the Rijksdriekhoekstelsel New, EPSG 28992. Coordinates have been rounded to meters to reduce filesize.
#' The CRS (coordinate reference system) used is the Rijksdriekhoekstelsel New, EPSG 28992. Coordinates have been rounded to meters to reduce file size.
#'
#' @rdname Netherlands
#' @docType data
#' @format NULL
#' @source https://www.cbs.nl/nl-nl/maatwerk/2024/11/kerncijfers-wijken-en-buurten-2022
#' @references Statistics Netherlands (2022), The Hague/Heerlen, Netherlands, <https://www.cbs.nl/>.
#' @references Statistics Netherlands (2024), The Hague/Heerlen, Netherlands, <https://www.cbs.nl/>.
"NLD_prov"

#' @rdname Netherlands
Expand Down
19 changes: 19 additions & 0 deletions R/spatial_non_supported.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#' @export
tmapGetShapeMeta1.default = function(shp, o) {
stop("Specified shp argument of tm_shape is a ", class(shp)[1], ", which is a recognized/supported spatial data class.", call. = FALSE)
}

#' @export
tmapGetShapeMeta2.default = function(shp, smeta, o) {
stop("Specified shp argument of tm_shape is a ", class(shp)[1], ", which is a recognized/supported spatial data class.", call. = FALSE)
}

#' @export
tmapSubsetShp.default = function(shp, vars) {
stop("Specified shp argument of tm_shape is a ", class(shp)[1], ", which is a recognized/supported spatial data class.", call. = FALSE)
}

#' @export
tmapShape.default = function(shp, is.main, crs, bbox, unit, filter, shp_name, smeta, o, tmf) {
stop("Specified shp argument of tm_shape is a ", class(shp)[1], ", which is a recognized/supported spatial data class.", call. = FALSE)
}
2 changes: 1 addition & 1 deletion R/step1_helper_facets.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ step1_rearrange_facets = function(tmo, o) {
split_stars_dim = ""

#value_orig = value # just for the case of L156
if (length(value) && is.na(value[[1]][1]) && !inherits(value, c("tmapOption", "tmapVars", "tmapAsIs", "tmapSpecial"))) {
if (!inherits(value, c("tmapOption", "tmapVars", "tmapAsIs", "tmapSpecial")) && length(value) && is.na(value[[1]][1])) {
# NA -> value.blank
value = tmapVV(getAesOption("value.blank", o, aes = aes, layer = layer))
}
Expand Down
1 change: 1 addition & 0 deletions R/tm_facets.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#' @param along deprecated Please use `tm_facets_pagewisse()`
#' @param free.scales deprecated. Please use the `.free` arguments in the layer functions, e.g. `fill.free` in `tm_polygons`.
#' @param ... used to catch deprecated arguments
#' @example ./examples/tm_facets.R
#' @export
tm_facets = function(by = NULL,
rows = NULL,
Expand Down
1 change: 1 addition & 0 deletions R/tmap_palettes.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ getPalBiv = function(name, m = NA, n = NA, rep = TRUE) {
}

getPalMeta = function(name, no.match = "null") {
if (!is.character(name)) return(NULL)
if (name %in% c("cat", "seq", "div")) {
name = cols4all::c4a_options("defaults")$defaults[[name]]
}
Expand Down
81 changes: 54 additions & 27 deletions data-raw/NLD.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,39 @@ library(cbsodataR)
library(sf)
library(tidyverse)

wb24 = cbs_get_data(id = "85984NED")
wb23 = cbs_get_data(id = "85618NED")
wb22 = cbs_get_data(id = "85318NED")

#https://geodata.cbs.nl/files/Wijkenbuurtkaart/WijkBuurtkaart_2023_v2.zip
dir = tempdir()

sort(sapply(wb22, function(x)sum(is.na(x))))

## Demographic data on municipality (gemeente), district, (wijk) and neighbourhood (buurt) level
##
## Source CBS
## https://www.cbs.nl/nl-nl/maatwerk/2024/11/kerncijfers-wijken-en-buurten-2022
## https://www.cbs.nl/nl-nl/cijfers/detail/85318NED
##
## Note: numbers from 2022 taken (rather than 2023 or 2024) because of availability: as of sept-2024 these are final figures

wb22 = cbs_get_data(id = "85318NED")

# number of missings per variable:
if (FALSE) {
sort(sapply(wb22, function(x)sum(is.na(x))))
}

gemeentegrenzen <- st_read("https://service.pdok.nl/cbs/gebiedsindelingen/2017/wfs/v1_0?request=GetFeature&service=WFS&version=2.0.0&typeName=gemeente_gegeneraliseerd&outputFormat=json")

## Download geospatial data from PDOK (a platform for Dutch geospatial data)
gm22 = st_read("https://service.pdok.nl/cbs/gebiedsindelingen/2022/wfs/v1_0?request=GetFeature&service=WFS&version=2.0.0&typeName=gemeente_gegeneraliseerd&outputFormat=json")

pv22 = st_read("https://service.pdok.nl/cbs/gebiedsindelingen/2022/wfs/v1_0?request=GetFeature&service=WFS&version=2.0.0&typeName=provincie_gegeneraliseerd&outputFormat=json")


#number of wijken in 2022: sum(substr(wb22$WijkenEnBuurten, 1, 2) == "WK")

wks = list()
for (start in seq(0, 3000, by = 1000)) {
wks = c(wks, list(st_read(paste0("https://service.pdok.nl/cbs/gebiedsindelingen/2022/wfs/v1_0?request=GetFeature&service=WFS&version=2.0.0&typeName=wijk_gegeneraliseerd&outputFormat=json&startindex=", start))))
}
wk22 = do.call(rbind, wks)

sel = c(#statcode = "code",
#statnaam = "name",
wk22 = local({
# PDOK has a restriction of 1000 features per download
# number of districts in 2022: sum(substr(wb22$WijkenEnBuurten, 1, 2) == "WK")
wks = list()
for (start in seq(0, 3000, by = 1000)) {
wks = c(wks, list(st_read(paste0("https://service.pdok.nl/cbs/gebiedsindelingen/2022/wfs/v1_0?request=GetFeature&service=WFS&version=2.0.0&typeName=wijk_gegeneraliseerd&outputFormat=json&startindex=", start))))
}
do.call(rbind, wks)
})


# selection of variables
sel = c(
WijkenEnBuurten = "code",
OppervlakteTotaal_111 = "area",
MateVanStedelijkheid_116 = "urbanity",
Expand All @@ -50,6 +54,7 @@ sel = c(#statcode = "code",
k_40PersonenMetLaagsteInkomen_73 = "income_low",
k_20PersonenMetHoogsteInkomen_74 = "income_high")

# some data manipulation
wb22b = wb22 |>
select(all_of(names(sel))) |>
rename_at(vars(names(sel)), ~sel) |>
Expand All @@ -60,6 +65,7 @@ wb22b = wb22 |>
pop_45_64 = round(population_45_64 / population * 100),
pop_65plus = round(population_65_plus / population * 100),
edu_appl_sci = round(edu_high / (edu_low + edu_middle + edu_high) * 100),
area = units::as_units(area / 100, "km2"),
edu_high = NULL,
edu_middle = NULL,
edu_low = NULL,
Expand All @@ -71,6 +77,7 @@ wb22b = wb22 |>
urbanity = factor(urbanity, levels = 1:5, labels = c("extremely urbanised", "strongly urbanised", "moderately urbanised", "hardly urbanised", "not urbanised"))) |>
relocate(starts_with("pop_"), .after = population)

# Create datasets
NLD_prov = pv22 |>
rename(code = statcode,
name = statnaam) |>
Expand All @@ -83,19 +90,39 @@ NLD_dist = wk22 |>
select(code, name) |>
mutate(name = stringi::stri_trans_general(name, "Latin-ASCII")) |>
left_join(wb22b, by = "code") |>
mutate(prov_code = NLD_prov$code[unlist(st_intersects(st_point_on_surface(geometry), NLD_prov))]) |>
relocate(prov_code, .after = name)
mutate(province = NLD_prov$name[unlist(st_intersects(st_point_on_surface(geometry), NLD_prov))]) |>
relocate(province, .after = name)

NLD_muni = gm22 |>
rename(code = statcode,
name = statnaam) |>
select(code, name) |>
mutate(name = stringi::stri_trans_general(name, "Latin-ASCII")) |>
left_join(wb22b, by = "code") |>
mutate(prov_code = NLD_prov$code[unlist(st_intersects(st_point_on_surface(geometry), NLD_prov))]) |>
relocate(prov_code, .after = name)
mutate(province = NLD_prov$name[unlist(st_intersects(st_point_on_surface(geometry), NLD_prov))]) |>
relocate(province, .after = name)


# set precision to 1: round coordinates by meters
# recalculate total area per muni, because it includes water (other than rivers):
if (FALSE) {
sum(NLD_muni$area)
sum(NLD_dist$area)
sum(st_area(NLD_dist))
sum(st_area(NLD_muni))
}
NLD_muni$area = local({
aggr_area_from_dist = NLD_dist |>
st_drop_geometry() |>
mutate(code_muni = substr(code, 3, 6)) |>
group_by(code_muni) |>
summarize(area_sum = sum(area)) |>
ungroup() |>
mutate(code = paste0("GM", code_muni))
aggr_area_from_dist$area_sum[match(NLD_muni$code, aggr_area_from_dist$code)]
})


# Set precision to 1: round coordinates by meters (to reduce file size)
NLD_prov$geometry = NLD_prov |> st_geometry() |> st_sfc(precision = 1) %>% st_as_binary %>% st_as_sfc
st_crs(NLD_prov) = st_crs(wk22)

Expand Down
25 changes: 21 additions & 4 deletions data-raw/World.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ co2 = co[setdiff(2:80, 48), ]
co2 = rbind(co2, co2[1,])
W$geometry[[15]][[1]][[1]] = co2[,1:2]

all(st_is_valid(W))

# also problems with Fiji (1) and Russia (19) because they cross the 180 meridian
split_multipolyon_at_180 = function(mp) {
co = st_coordinates(mp)
Expand All @@ -59,7 +57,7 @@ split_multipolyon_at_180 = function(mp) {
st_polygon(list(c))
}), list(crs = 4326)))

m180 = st_sfc(st_linestring(cbind(180, c(80,0,-80))), crs = 4326)
m180 = st_sfc(st_linestring(cbind(180, c(88,0,-88))), crs = 4326)
sfc1b = st_split(sfc1, m180)

colist2 = unlist(lapply(sfc1b, function(x) {
Expand All @@ -80,11 +78,30 @@ split_multipolyon_at_180 = function(mp) {

st_multipolygon(colist2)
}
W1 = split_multipolyon_at_180(W$geometry[[1]])

W$geometry[[1]] = split_multipolyon_at_180(W$geometry[[1]])
W$geometry[[19]] = split_multipolyon_at_180(W$geometry[[19]])

# Manually fix Antarctica
mp = W$geometry[[160]]
co = st_coordinates(mp)
colist = unname(split.data.frame(co[,1:2], co[,4]))
col = colist[[8]]

mn = which.min(col[,1])
x1 = unname(col[mn, 1])
x2 = unname(col[mn+1, 1])

col = rbind(cbind(X = 180, Y = -86), col[1:mn, ], cbind(X = c(-180, 180), Y = -86))
colist[[8]] = col

colist2 = lapply(colist, function(x) {
list(x)
})

res = st_multipolygon(colist2)
W$geometry[[160]] = st_multipolygon(colist2)


##################
# area
Expand Down
Binary file modified data/NLD_dist.rda
Binary file not shown.
Binary file modified data/NLD_muni.rda
Binary file not shown.
Binary file modified data/World.rda
Binary file not shown.
15 changes: 15 additions & 0 deletions examples/tm_facets.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
tm_shape(NLD_dist) +
tm_polygons("edu_appl_sci",
fill.scale = tm_scale_intervals(values = "pu_gn", style = "kmeans", n = 7)) +
tm_facets(by = "province") +
tm_shape(NLD_muni) +
tm_borders(lwd = 3) +
tm_facets(by = "province") +
tm_title("Population with a univeristy degree (incl appl. sciences), percentages")

tm_shape(World) +
tm_polygons(c("gender", "press"),
fill.scale = list(tm_scale_intervals(values = "bu_br_div", midpoint = 0.5),
tm_scale_intervals(values = "pu_gn_div", midpoint = 50)),
fill.legend = tm_legend("")) +
tm_layout(panel.labels = c("Gender Inequality Index (GII)", "World Press Freedom Index"))
Loading

0 comments on commit c797aaf

Please sign in to comment.