README.qmd

---
execute: 
  cache: true
  echo: false
  message: false
  warning: false
# format: gfm
# For sharing interactive results:
format:
  html:
    embed-resources: true
---

```{bash}
#| eval: false
# rename interactive version of html to severance.html
cp README.html severance.html
gh release upload v1.0 severance.html --clobber
# see https://github.com/acteng/severance/releases/download/v1.0/severance.html
```

# Calculating active travel potential and severance around major transport infrastructure

Note: an interactive version of this report is available [here](https://acteng.github.io/severance/severance.html).

```{r}
#| include: false
library(tidyverse)
library(terra)
library(tmap)
```

```{r}
#| eval: false
#| echo: false
remotes::install_github("robinlovelace/styler.equals")
remotes::install_dev("tmap")
```

This repo builds on the following work:

- The [srn](https://github.com/acteng/srn) repo, demonstrating how to calculate on active travel potential within land owned by National Highways
- A demonstration of severance calculation based on a small case study area, in the [robinlovelace/infra-active](https://github.com/robinlovelace/infra-active?tab=readme-ov-file#a-case-study-of-lewes-uckfield) repo

# Input datasets

## Road network data

The Strategic Road Network (SRN) and Major Road Network (MRN) are illustrated below (source: [browse tool of ATIP](https://acteng.github.io/atip/browse.html?style=dataviz#6.2/52.917/-1.327)).

![](images/paste-1.png)


```{r}
#| label: dl-mrn
#| include: false
if (!file.exists("mrn.gpkg")) {
  message("Missing the MRN dataset locally, download it from the DfT or releases")
  u = "https://maps.dft.gov.uk/major-road-network-shapefile/Major_Road_Network_2018_Open_Roads.zip"
  f = basename(u)
  if (!file.exists(f)) download.file(u, f)
  unzip(f)
  mrn = sf::read_sf("Major_Road_Network_2018_Open_Roads.shp")
  names(mrn)
  #  [1] "fid_"       "id"         "endNode"    "startNode"  "roadNumber"
  #  [6] "roadNameTO" "fictitious" "roadClassi" "roadFuncti" "formOfWay"
  # [11] "length"     "length_uom" "loop"       "primaryRou" "trunkRoad"
  # [16] "roadClas_1" "name1"      "name1_lang" "name2"      "name2_lang"
  # [21] "roadStruct" "geom_Lengt" "identifier" "geom"
  mrn = mrn |>
    transmute(
      name = name1,
      road_function = roadFuncti,
      form_of_way = formOfWay,
      road_classification = roadClassi,
      infrastructure_type = "MRN"
    )
  sf::st_write(mrn, "mrn.gpkg", delete_dsn = TRUE)
  # Release data:
  if (FASLE) {
    # system("gh release create v1.0 mrn.gpkg")
    # Re-upload with clobber
    system("gh release upload v1.0 mrn.gpkg --clobber")
  }
}
mrn = sf::read_sf("mrn.gpkg")
```

```{r}
#| label: dl-srn
#| include: false
if (!file.exists("srn.gpkg")) {
  message("Missing the SRN dataset locally, download it from the DfT or releases")
  u = "https://api.os.uk/downloads/v1/products/OpenRoads/downloads?area=GB&format=GeoPackage&redirect"
  f = "oproad_gpkg_gb.zip"
  if (!file.exists(f)) download.file(u, f)
  unzip(f)
  list.files("Data")
  # See https://github.com/acteng/atip-data-prep/blob/d2a5d0058932a00e0130048cf407448f4b75a477/layers/srn.py#L6
  # for filter:             "SELECT name_1 as name, geometry FROM road_link WHERE trunk_road"
  # q = "SELECT name_1 as name, geometry FROM road_link WHERE trunk_road"
  # srn = sf::read_sf("Data/oproad_gb.gpkg", query = q) # failed:
  open_roads_national = sf::read_sf("Data/oproad_gb.gpkg", layer = "road_link")
  names(open_roads_national)
  head(table(open_roads_national$road_classification_number))
  #    [1] "id"                         "fictitious"
  #  [3] "road_classification"        "road_function"
  #  [5] "form_of_way"                "road_classification_number"
  #  [7] "name_1"                     "name_1_lang"
  #  [9] "name_2"                     "name_2_lang"
  # [11] "road_structure"             "length"
  # [13] "length_uom"                 "loop"
  # [15] "primary_route"              "trunk_road"
  # [17] "start_node"                 "end_node"
  # [19] "road_number_toid"           "road_name_toid"
  # [21] "geometry"
  table(open_roads_national$trunk_road)
  srn = open_roads_national |>
    filter(trunk_road) |>
    transmute(
      id,
      name = name_1,
      road_function = road_function,
      form_of_way = form_of_way,
      road_classification = road_classification,
      road_classification_number = road_classification_number,
      infrastructure_type = "SRN"
    )
  sf::write_sf(srn, "srn.gpkg", delete_dsn = TRUE)
  # Release data:
  if (FALSE) {
    system("gh release upload v1.0 srn.gpkg --clobber")
  }
  # Let's also take a random sample of roads from the open roads dataset:
  open_roads_sample = open_roads_national |>
    sample_n(1000)
  plot(sf::st_geometry(open_roads_sample))
  sf::write_sf(open_roads_sample, "open_roads_sample.gpkg", delete_dsn = TRUE)
  # Release data:
  if (FALSE) {
    system("gh release upload v1.0 open_roads_sample.gpkg --clobber")
  }
}
srn = sf::read_sf("srn.gpkg")
# names(srn)
```

<!-- After downloading and combining the datasets, we can plot them as follows with R: -->

```{r}
#| label: combine-srn-mrn
#| include: false
mrn_srn = rbind(
  mrn,
  srn |> transmute(
    name = name,
    road_function = road_function,
    form_of_way = form_of_way,
    road_classification = road_classification,
    infrastructure_type = "SRN"
  )
)
# mrn_srn |>
#   ggplot() +
#   geom_sf(aes(color = infrastructure_type)) +
#   theme_void()
names(mrn_srn)
mrn |>
  tm_shape() +
  tm_lines(col = "grey") +
  tm_shape(srn) +
  tm_lines() +
  tm_title("SRN (black) and MRN (grey)", just = c("center", "top"))
```

The main focus of the analysis presented in this report is the SRN, the road network controlled by National Highways (NH).

## Propensity to cycle tool

Data on active travel potential was taken from the Propensity to Cycle Tool (PCT), a Department for Transport funded project that builds on official data to model cycling potential nationwide.
In future we would like to add data representing walking potential, possibly based on travel to school data.

# Active travel potential

As a first approximation of active travel potential we used Propensity to Cycle Tool (PCT) data.
To ensure consistency across areas, we defined 'cycling potential' in this case as the distance that could be cycled *within* each grid cell, reducing the influence of variable road lengths on the results (10 100 m segments with cycling potential of 20 has the same impact as a single 5 km segment with cycling potential of 20 trips per day).
Aggregating to 5 km resolution ensures the operations work fast for national data and remove noise from the results.
Higher levels of spatial resolution could be used in future work.

```{r}
#| label: dl-pct
if (!file.exists("pct.gpkg")) {
  remotes::install_cran("pct")
  pct_rnet = pct::get_pct(layer = "rnet", national = TRUE)
  pct_projected = pct_rnet |>
    sf::st_transform(27700)
  pct_projected$segment_length_km = sf::st_length(pct_projected) / 1000 |>
    as.numeric()
  pct_projected = pct_projected |>
    mutate(
      cycling_km_baseline = bicycle * segment_length_km,
      cycling_km_go_dutch = dutch_slc * segment_length_km
    )
  sf::write_sf(pct_projected, "pct.gpkg", delete_dsn = TRUE)
}
pct = sf::read_sf("pct.gpkg")
```

<!-- We'll convert the pct linestring data to a 5 km raster grid with the {terra} package. -->

```{r}
#| label: pct-raster
#| layout-ncol: 2
pct_raster = terra::rast(pct, res = 5000)
# ?rasterize
pct_raster_baseline = terra::rasterize(pct, pct_raster, field = "cycling_km_baseline", fun = sum)
pct_raster_go_dutch = terra::rasterize(pct, pct_raster, field = "cycling_km_go_dutch", fun = sum)
# par(mfrow = c(1, 2))
# plot(pct_raster_baseline, type = "interval", breaks = c(0, 1, 2, 5, 10, 20, 100)*1000)
# plot(pct_raster_go_dutch, type = "interval", breaks = c(0, 1, 2, 5, 10, 20, 100)*1000)
# par(mfrow = c(1, 1))
brks = c(0, 1, 5, 10, 20, 100) * 1000
m1 = tm_shape(pct_raster_baseline) +
  tm_raster("cycling_km_baseline", palette = "viridis", breaks = brks, title = "Baseline (km/day)")
m2 = tm_shape(pct_raster_go_dutch) +
  tm_raster("cycling_km_go_dutch", palette = "viridis", breaks = brks, title = "Go Dutch (km/day)")
tmap_arrange(m1, m2)
```

<!-- We can subset all grids with high cycling potential (e.g. 5000 km cycling potential) and extract nearby roads to calculate severance as follows: -->
This approach allows identification of sections of the SRN and MRN in areas with high (more than 5000 km cycled within each 5km grid cell under the Go Dutch scenario) active travel potential nationwide, as illustrated below.

```{r}
#| cache: false
if (!file.exists("pct_raster_sf.gpkg")) {
  pct_raster_sf_baseline = pct_raster_baseline |>
    terra::as.points() |>
    sf::st_as_sf()
  pct_raster_sf_go_dutch = pct_raster_go_dutch |>
    terra::as.points() |>
    sf::st_as_sf()
  pct_raster_sf = sf::st_sf(
    data.frame(
      cycling_km_baseline = pct_raster_sf_baseline$cycling_km_baseline,
      cycling_km_go_dutch = pct_raster_sf_go_dutch$cycling_km_go_dutch,
      geometry = pct_raster_sf_baseline$geometry
    )
  )
  sf::st_write(pct_raster_sf, "pct_raster_sf.gpkg", delete_dsn = TRUE)
}
pct_raster_sf = sf::read_sf("pct_raster_sf.gpkg")
```

```{r}
#| label: severance
pct_high_potential_points = pct_raster_sf |>
  filter(cycling_km_go_dutch > 5000)
pct_high_buffer = pct_high_potential_points |>
  sf::st_buffer(5000) |>
  sf::st_union()
if (!file.exists("mrn_srn_high.gpkg")) {
  mrn_srn_high = mrn_srn[pct_high_buffer, ]
  sf::write_sf(mrn_srn_high, "mrn_srn_high.gpkg", delete_dsn = TRUE)
}
mrn_srn_high = sf::read_sf("mrn_srn_high.gpkg")
tm_shape(mrn_srn) +
  tm_lines(col = "grey") +
  tm_shape(mrn_srn_high) +
  tm_lines(col = "infrastructure_type")
```

<!-- Building on the simplistic approach, we'll assign active travel potential to each road segment based on the values of the 'active travel potential cells' that they intersect with. -->
<!-- This approach is illustrated below (a logical extension of this approach would be to use PCT section centroids as the basis for road active travel potential values): -->
The approach also allows estimation of current and potential future levels of cycling *for every road segment*.
The results of joining the current estimated and potential cycling values to road segments, taking the mean values of cell centroids that are within 5km of each road segment centroid, are shown below.

```{r}
srn_buffer = srn |>
  sf::st_buffer(5000) |>
  sf::st_union()
pct_centroids_near = pct_raster_sf[srn_buffer, ]
# pct_centroids_near |>
#   tm_shape() +
#   tm_dots("cycling_km_go_dutch", palette = "viridis", breaks = brks, title = "Go Dutch (km/day)")
# centroids for speed of calculation:
srn$length_m = sf::st_length(srn) |> as.numeric()
# summary(srn$length_m)
#     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.
#     0.02    45.42   142.10   388.39   405.19 19085.20
srn_centroids = sf::st_centroid(srn)
srn_distances = sf::st_join(
  srn_centroids,
  pct_centroids_near,
  join = sf::st_is_within_distance,
  dist = 5000
)
# summary(duplicated(srn_distances$name))
srn_distances_aggregated = srn_distances |>
  sf::st_drop_geometry() |>
  group_by(id) |>
  summarise(
    Baseline = mean(cycling_km_baseline, na.rm = TRUE),
    Potential = mean(cycling_km_go_dutch, na.rm = TRUE),
    .groups = "drop"
  )
srn_joined = left_join(srn, srn_distances_aggregated, by = "id")

tm_shape(srn_joined) +
  tm_lines(
    col = c(
      "Baseline",
      "Potential"
    ),
    title = "Go Dutch (km/day)",
    col.scale = tm_scale(breaks = brks, values = "viridis")
  )
```

```{r}
# We will do the same calculation for the random sample of roads from the open roads dataset:
open_roads_sample = sf::read_sf("open_roads_sample.gpkg")
open_roads_centroids = sf::st_centroid(open_roads_sample)
open_roads_distances = sf::st_join(
  open_roads_centroids,
  pct_centroids_near,
  join = sf::st_is_within_distance,
  dist = 5000
)
open_roads_distances_aggregated = open_roads_distances |>
  sf::st_drop_geometry() |>
  group_by(id) |>
  summarise(
    Baseline = mean(cycling_km_baseline, na.rm = TRUE),
    Potential = mean(cycling_km_go_dutch, na.rm = TRUE),
    .groups = "drop"
  )
open_roads_sample_joined = left_join(open_roads_sample, open_roads_distances_aggregated, by = "id")
```

Note: the units of the values presented above are in estimated km cycling per day per 5 km grid cell, derived from the route network layer in the PCT, representing active travel potential for utility cycling in the vicinity of roads (estimated commuter cycling potential is a function of number of cyclable trips to work and is a good proxy for utility cycling potential and to a lesser extent leisure cycling potential).
We assign more meaningful units of estimated number of potential trips in the next section.

After classifying the roads into named groups and keeping only road sections with a level of cycling potential above a certain threshold (2000 km cycled within each 5 km grid cell in the first instance), we filtered out the top n. (e.g. 500) road sections based on cycling potential.
Roads were grouped in the following ways:

- by road classification number and name (if available)
- by active travel potential quantile (currently set to 20 quantiles although this can be varied to increase or decrease the number of groups and average length of road sections in each group)

In cases where the same road (e.g. M56) has multiple sections with high cycling potential, the sections are given an id, e.g. M56 1, M56 2 etc.

```{r}
#| include: false
summary(sf::st_length(srn_joined))
# Min.  1st Qu.   Median     Mean  3rd Qu.     Max.
# 0.02    45.42   142.10   388.39   405.19 19085.20
summary(srn_joined$Baseline)
#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's
#   0.0   715.7  2445.1  5009.3  7171.3 50691.6   11102
srn_filtered = srn_joined |>
  # filter(length_m > 200) |>
  filter(Potential > 2000) |>
  # Create categorical variable with cycling potential split into 10 categories:
  # filter(!is.na(name))
  # Quantiles of cycling potential:
  mutate(Potential_quantile = cut(Potential, breaks = quantile(Potential, probs = seq(0, 1, 0.05), na.rm = TRUE), include.lowest = TRUE, labels = FALSE))
summary(srn_filtered$Potential_quantile)
srn_filtered$group = stplanr::rnet_group(srn_filtered)
# srn_filtered$group_fast = stplanr::rnet_group(srn_filtered, igraph::cluster_fast_greedy)
# walktrap grouping:
# srn_filtered$group_walktrap = stplanr::rnet_group(srn_filtered, igraph::cluster_walktrap)
# summary(srn_filtered$group_walktrap)
srn_grouped = srn_filtered |>
  # mutate(Name = paste(road_classification_number, name, group_walktrap, sep = " ")) |>
  mutate(Name = paste(road_classification_number, ifelse(is.na(name), "", name), group, Potential_quantile, sep = " ")) |>
  # group_by(name, road_classification_number, road_function, form_of_way, road_classification) |>
  group_by(Name) |>
  summarise(
    Baseline = mean(Baseline, na.rm = TRUE),
    Potential = mean(Potential, na.rm = TRUE),
    segment_length = round(sum(length_m)),
    name = first(name),
    road_classification_number = first(road_classification_number),
    .groups = "drop"
  ) |>
  group_by(name, road_classification_number) |>
  arrange(desc(Potential)) |>
  mutate(id = as.character(row_number())) |>
  # Update id so that it's "" if there's only 1 in that group:
  mutate(
    id = case_when(
      n() == 1 ~ "",
      TRUE ~ id
    )
  ) |>
  mutate(Name = paste(road_classification_number, ifelse(is.na(name), "", name), id, sep = " ")) |>
  # Remove whitespace before and after Name:
  mutate(Name = trimws(Name)) |>
  filter(segment_length > 1000) |>
  # replace double space with single space:
  mutate(Name = gsub("  ", " ", Name)) |>
  ungroup() |>
  arrange(desc(Potential)) |>
  # Round all numeric cols:
  mutate(across(where(is.numeric), round))
head(srn_grouped$Name)


srn_grouped_top = srn_grouped |>
  head(500) |>
  # Again, remove id if there's only 1 in that group:
  group_by(name, road_classification_number) |>
  mutate(id = as.character(row_number())) |>
  mutate(
    id = case_when(
      n() == 1 ~ "",
      TRUE ~ id
    )
  ) |>
  mutate(Name = paste(road_classification_number, ifelse(is.na(name), "", name), id, sep = " ")) |>
  # Remove whitespace before and after Name:
  mutate(Name = trimws(Name)) |>
  # replace double space with single space:
  mutate(Name = gsub("  ", " ", Name)) |>
  ungroup() |>
  arrange(desc(Potential))
head(srn_grouped_top$Name)
# |>
# sf::st_cast("LINESTRING")
```

In total, `r nrow(srn_grouped)` road sections at least 1km in length (another parameter that can be adjusted) were identified with high cycling potential, with the top 500 shown in the interactive table below.


```{r}
#| label: top-roads
# look at roads with greatest length
# srn_grouped |>
#   arrange(desc(segment_length)) |>
#   select(name, segment_length) |>
#   head(10)

# srn_grouped_top |>
#   select(Name, Potential, segment_length) |>
#   sf::st_drop_geometry() |>
#   knitr::kable()

# interactive version of table:
library(DT)
datatable(srn_grouped_top |>
  transmute(Name, Baseline, Potential, `Length km` = round(segment_length / 1000, 1)) |>
  sf::st_drop_geometry())

# Split the SRN:

# summary(duplicated(srn_top$name))
pvars = c("Name", "Potential", "segment_length")
m_top_srn = tm_shape(srn_filtered) +
  tm_lines(col = "darkgrey", lwd = 4) +
  tm_shape(srn_grouped_top) +
  tm_lines(col = "Potential", title = "Go Dutch (km/day)", lwd = 8, popup.vars = pvars, col.scale = tm_scale(breaks = c(10, 15, 25, 50) * 1000, values = "viridis"))
# m_top_srn
tmap_save(m_top_srn, "m_top_srn.html")
system("gh release upload v1.0 m_top_srn.html --clobber")
```

The interactive map can be viewed by downloading resulting [file](https://github.com/acteng/severance/releases/download/v1.0/m_top_srn.html)

<!-- and below: -->

```{r}
#| label: m_top_srn
#| include: false
webshot2::webshot("m_top_srn.html")
# tmap_mode("view")
# m_top_srn
```


```{r}
#| eval: false
browseURL("m_top_srn.html")
```

The baseline and future potential cycling levels of the SRN, relative to a random sample of roads nationwide, is illustrated in the table below:

```{r}
srn_v_open_roads = bind_rows(
  srn_joined |> transmute(id, Baseline, Potential, road_type = "SRN") |> sf::st_drop_geometry(),
  open_roads_sample_joined |> transmute(id, Baseline, Potential, road_type = "Open Roads") |> sf::st_drop_geometry()
)
```

```{r}
# Table showing median and mean values of Baseline and Potential for SRN and Open Roads:
srn_v_open_roads_summary = srn_v_open_roads |>
  group_by(road_type) |>
  summarise(
    Baseline_mean = mean(Baseline, na.rm = TRUE),
    Potential_mean = mean(Potential, na.rm = TRUE)
  ) 
srun_v_open_roads_summary_multiplier = srn_v_open_roads_summary |>
  summarise_at(vars(contains("mean")), ~ .x / .[1] * 100)   
srn_v_open_roads_summary = bind_rows(
  srn_v_open_roads_summary,
  srun_v_open_roads_summary_multiplier |>
    slice(2) |>
    mutate(road_type = "SRN / Open Roads (%)")
)
srn_v_open_roads_summary |>  
  knitr::kable(digits = 0)
```

A comparison between the SRN values and a representative sample of roads nationwide shows that while cycling potential around the SRN is lower than around the sample of roads (50% of the average value nationwide), the baseline level of cycling in the vicinity of the SRN is proportionally lower (46% of the average value for all roads nationwide).
This suggests that hostile cycling conditions around the SRN, including high motor traffic volumes and speeds and poor provision for cycling, are suppressing cycling.

```{r}
#| include: false
srn_above_mean_potential = srn_joined |>
  filter(Potential > mean(open_roads_sample_joined$Potential, na.rm = TRUE))
srn_above_mean_baseline = srn_joined |>
  filter(Baseline > mean(open_roads_sample_joined$Baseline, na.rm = TRUE))
```

There are, however, many sections of the SRN with high cycling potential.
Of the `r round(sum(srn_joined$length_m) / 1000) |> format(big.mark = ",")` km of the SRN, `r round(sum(srn_above_mean_potential$length_m) / 1000)` km (around `r round(sum(srn_above_mean_potential$length_m) / sum(srn_joined$length_m) * 100)`%) have higher than average cycling potential and `r round(sum(srn_above_mean_baseline$length_m) / 1000)` km have higher than average cycling levels in the vicinity of the road network (around `r round(sum(srn_above_mean_baseline$length_m) / sum(srn_joined$length_m) * 100)`% of the total length of the SRN).

# Desire lines approach

```{r}
# include: false
if (!file.exists("pct_desire_lines.gpkg")) {
  pct_desire_lines = pct::get_pct(layer = "l", national = TRUE)
  sf::st_crs(pct_desire_lines) = 4326
  sf::write_sf(pct_desire_lines, "pct_desire_lines.gpkg", delete_dsn = TRUE)
  system("gh release upload v1.0 pct_desire_lines.gpkg --clobber")
  if (!file.exists("pct_desire_lines.gpkg")) {
    message("Download the desire lines data from the releases")
    system("gh release download v1.0 --pattern pct_desire_lines.gpkg")
  }
}
pct_desire_lines = sf::read_sf("pct_desire_lines.gpkg")
# dim(pct_desire_lines)
# [1] 1408275     146
# names(pct_desire_lines)
#   [1] "id"                      "geo_code1"
#   [3] "geo_code2"               "geo_name1"
#   [5] "geo_name2"               "lad11cd1"
#   [7] "lad11cd2"                "lad_name1"
#   [9] "lad_name2"               "all"
#  [11] "bicycle"                 "foot"
#  [13] "car_driver"              "car_passenger"
#  [15] "motorbike"               "train_tube"
#  [17] "bus"                     "taxi_other"
#  [19] "govtarget_slc"           "govtarget_sic"
#  [21] "govtarget_slw"           "govtarget_siw"
#  [23] "govtarget_sld"           "govtarget_sid"
#  [25] "govtarget_slp"           "govtarget_sip"
#  [27] "govtarget_slm"           "govtarget_sim"
#  [29] "govtarget_slpt"          "govtarget_sipt"
#  [31] "govnearmkt_slc"          "govnearmkt_sic"
#  ...
#  [127] "gendereq_sicarkm"        "gendereq_sico2"
# [129] "dutch_sicarkm"           "dutch_sico2"
# [131] "ebike_sicarkm"           "ebike_sico2"
# [133] "e_dist_km"               "rf_dist_km"
# [135] "rq_dist_km"              "dist_rf_e"
# [137] "dist_rq_rf"              "rf_avslope_perc"
# [139] "rq_avslope_perc"         "rf_time_min"
# [141] "rq_time_min"             "geom"
```

The results presented in the previous section are based on estimates of active travel (commuter cycling) potential at the road network level in the vicinity (around 5 km) of each road segment.
This approach is good for a 'first pass' national analysis but has limitations:

- There is no guarantee that the active travel on the segments in the vicinity of the roads need to interact with the roads, e.g. they could just run parallel to them.
- The approach leads to estimates of cycling potential in units that are not conducive to benefits estimation.
- The approach does not take into account the desire lines of cyclists, which may not follow the road network.

Using origin-destination data, which can be represented as 'desire lines' between origin and destination points or zones, can address these limitations.

The desire lines approach scales nationally although is more computationally intensive than the road network approach.
There are `r nrow(pct_desire_lines) |> format(big.mark = ",")`  desire lines in the national dataset from the PCT (compared with `r nrow(pct) |> format(big.mark = ",")` road segments).
An overview of the desire lines, based on a sample of the top 10,000 desire lines longer than 5km by current active travel mode share, is shown below.

```{r}
#| label: national-desire-lines-map
pct_desire_lines = pct_desire_lines |>
  mutate(
    `% cycling` = bicycle / all,
    `% walking` = foot / all,
    `% car` = (car_driver + car_passenger) / all,
    `% active` = (`% cycling` + `% walking`)
  ) |>
  # multiply all cols starting with % by 100:
  mutate(across(starts_with("%"), ~ round(. * 100, 1)))
# summary(pct_desire_lines$`% active`)
brks_active = c(0, 2, 5, 10, 100)
pct_desire_lines |>
  # Only those of more than 5 km in length:
  filter(e_dist_km > 5) |>
  slice_max(all, n = 10000) |>
  arrange(`% active`) |>
  tm_shape() +
  tm_lines(
    col = c("% walking", "% cycling", "% active", "% car"),
    title = "% Active Travel",
    tm_scale(breaks = brks_active, values = "viridis"),
    col.legend = tm_legend(position = tm_pos_out())
  ) +
  tm_facets_wrap(ncols = 4)
```

Because of the computational resources needed to process the national desire lines dataset, we'll focus on case study areas in this section.
A sample of the ~20k desire lines in the Manchester area are shown below.

```{r}
dl_mcr = pct_desire_lines |>
  filter(str_detect(lad_name1, "Manchester")) |>
  arrange(`% active`)
```

```{r}
srn_wgs = sf::st_transform(srn, 4326)
sf::st_crs(dl_mcr) = 4326
srn_mcr = srn_wgs[dl_mcr, ]
proportion_srn_in_mcr = nrow(srn_mcr) / nrow(srn_wgs)
```

```{r}
dl_mcr |>
  filter(all > 10) |>
  tm_shape() +
  tm_lines(
    col = c("% active"),
    title = "% Active Travel",
    tm_scale(breaks = brks_active, values = "viridis"),
    col.legend = tm_legend(position = tm_pos_out())
  ) +
  tm_shape(srn_mcr) +
  tm_lines(col = "road_classification_number", lwd = 5) +
  tm_scale_bar()
```

Each road segment was joined to intersecting desire lines, allowing summary statistics from the desire lines to be assigned to each road segment.
We calculated the angle (azimuth, relative to North) of each desire line and each road segment, allowing us to select only those desire lines that run roughly perpendicular to the road segments (with a minum angle difference of 20 degrees in the results shown).

The resulting summary data allows identification of roads with high levels of current active travel (indicating a crossing that could be in need of maintenance), active travel potential (with the difference between current levels and potential indicating severance and the need for crossings), and current number of trips by mode.
Static maps for current walking levels and other modes are illustrated below. See the [GitHub release](https://github.com/acteng/severance/releases/download/v1.0/srn_mcr_joined_foot.html) for an interactive version of the map.

```{r}
dl_mcr$angle_d = stplanr::line_bearing(dl_mcr, bidirectional = TRUE)
srn_mcr$angle_m = stplanr::line_bearing(srn_mcr, bidirectional = TRUE)
tic = Sys.time()
dl_mcr_joined = sf::st_join(
  srn_mcr,
  dl_mcr |>
    select(all, foot, bicycle, dutch_slc, angle_d),
  join = sf::st_intersects
)
toc = Sys.time()
# toc - tic
# Filter out desire lines in which the anglular difference is less than 20 degrees:
dl_mcr_joined = dl_mcr_joined |>
  filter(abs(angle_d - angle_m) > 20)
# Group by road segment and calculate the sum of active travel potential:
dl_mcr_joined_aggregated = dl_mcr_joined |>
  sf::st_drop_geometry() |>
  group_by(id) |>
  summarise(
    all = sum(all, na.rm = TRUE),
    foot = sum(foot, na.rm = TRUE),
    bicycle = sum(bicycle, na.rm = TRUE),
    dutch_slc = sum(dutch_slc, na.rm = TRUE),
    .groups = "drop"
  )
srn_mcr_joined = left_join(srn_mcr, dl_mcr_joined_aggregated, by = "id")
# srn_mcr_joined |>
#   select(foot, bicycle, dutch_slc) |>
#   sf::st_drop_geometry() |>
#   summary()
```


```{r}
tmap_mode("plot")
m1 = srn_mcr_joined |>
  tm_shape() +
  tm_lines(
    col = c("foot"),
    lwd = 9,
    title = "Active Travel Potential",
    tm_scale(breaks = c(0, 10, 100, 1000, 10000), values = "viridis"),
    col.legend = tm_legend(position = tm_pos_out())
  )
m1
m2 = srn_mcr_joined |>
  tm_shape() +
  tm_lines(
    col = c("all", "foot", "bicycle", "dutch_slc"),
    lwd = 9,
    title = "Active Travel Potential",
    tm_scale(breaks = c(0, 10, 100, 1000, 10000), values = "viridis"),
    col.legend = tm_legend(show = FALSE),
    popup.vars = c("all", "foot", "bicycle", "dutch_slc", "id")
  ) +
  tm_facets(nrows = 2)
m2
tmap_save(m1, "srn_mcr_joined_foot.html")
# upload
system("gh release upload v1.0 srn_mcr_joined_foot.html --clobber")
# URL: https://github.com/acteng/severance/releases/download/v1.0/srn_mcr_joined_foot.html
```

The following table summarises the results in tabular form, with National Highways ID identifying the road segment ordered by cycling potential (an interactive version in the HTML version of the document allows flexible ranking and filtering).

```{r}
#| label: severence-mode-table
srn_mcr_joined |>
  select(road_classification_number, all, foot, bicycle, dutch_slc, id) |>
  arrange(desc(foot)) |>
  sf::st_drop_geometry() |>
  mutate_if(is.numeric, round) |>
  datatable()
```

Another way of tackling the question of severance is to look at the subset of desire lines below threshold 'walkable' and 'cyclable' distances (e.g. 1.5 km straight line distance for walking and 5 km for cycling).
The equivalent table for this approach is shown below.

```{r}
#| include: false
dl_mcr$length_euclidean = sf::st_length(dl_mcr) |> as.numeric()
dl_mcr_joined2 = sf::st_join(
  srn_mcr,
  dl_mcr |>
    select(all, foot, bicycle, dutch_slc, angle_d, length_euclidean),
  join = sf::st_intersects
)
summary(dl_mcr_joined2$length_euclidean)
dl_mcr_joined2_walk = dl_mcr_joined2 |>
  filter(abs(angle_d - angle_m) > 20) |>
  filter(length_euclidean < 1500)
dl_mcr_joined2_cycle = dl_mcr_joined2 |>
  filter(abs(angle_d - angle_m) > 20) |>
  filter(length_euclidean < 5000)
dl_mcr_joined2_walk_aggregated = dl_mcr_joined2_walk |>
  sf::st_drop_geometry() |>
  group_by(id) |>
  summarise(
    all_walkable = sum(all, na.rm = TRUE),
    .groups = "drop"
  )
dl_mcr_joined2_cycle_aggregated = dl_mcr_joined2_cycle |>
  sf::st_drop_geometry() |>
  group_by(id) |>
  summarise(
    all_cyclable = sum(all, na.rm = TRUE),
    .groups = "drop"
  )
srn_mcr_joined2 = left_join(srn_mcr, dl_mcr_joined2_walk_aggregated, by = "id") |>
  left_join(dl_mcr_joined2_cycle_aggregated, by = "id")
```

```{r}
#| label: severence-mode-table-2
srn_mcr_joined2 |>
  select(road_classification_number, all_walkable, all_cyclable, id) |>
  arrange(desc(all_walkable)) |>
  sf::st_drop_geometry() |>
  datatable()
```

To summarise the results, there are `r nrow(dl_mcr_joined2_walk) |> format(big.mark = ",")` desire lines in the Manchester area that are below the walkable threshold and `r nrow(dl_mcr_joined2_cycle) |> format(big.mark = ",")` desire lines that are below the cyclable threshold.
In terms of number of trips, there are `r sum(dl_mcr_joined2_walk$all) |> format(big.mark = ",")` trips below the walkable threshold and `r sum(dl_mcr_joined2_cycle$all) |> format(big.mark = ",")` trips below the cyclable threshold.
In terms of observed walking and cycling levels and cycling potential, there are `r sum(dl_mcr_joined$foot) |> format(big.mark = ",")` walking trips, `r sum(dl_mcr_joined$bicycle) |> format(big.mark = ",")` cycling trips, and `r round(sum(dl_mcr_joined$dutch_slc)) |> format(big.mark = ",")` trips under the Go Dutch scenario.


```{r}
# Let's get the PCT values for each segment in manchester to observe the correlation between PCT and desire lines approach:
srn_mcr_joined3 = left_join(
  srn_mcr_joined,
  srn_mcr_joined2 |>
    select(id, all_walkable, all_cyclable) |>
    sf::st_drop_geometry(),
  by = "id"
)
srn_mcr_joined3_centroids = sf::st_centroid(srn_mcr_joined3)
srn_mcr_joined3_distances = sf::st_join(
  srn_mcr_joined3_centroids,
  pct_centroids_near,
  join = sf::st_is_within_distance,
  dist = 5000
)

srn_mcr_distances_aggregated = srn_mcr_joined3_distances |>
  sf::st_drop_geometry() |>
  group_by(id) |>
  summarise(
    Baseline = mean(cycling_km_baseline, na.rm = TRUE),
    Potential = mean(cycling_km_go_dutch, na.rm = TRUE),
    .groups = "drop"
  )
srn_mcr_joined4 = left_join(srn_mcr_joined3, srn_mcr_distances_aggregated, by = "id")
```

```{r}
#| include: false
#| label: pct-vs-desire-lines
# Plot Baseline from PCT vs Baseline from desire lines approach:
# srn_mcr_joined4 |>
#   ggplot(aes(Baseline, bicycle)) +
#   geom_point() +
#   # lm:
#   geom_smooth(method = "lm", se = FALSE) +
#   labs(
#     x = "Baseline (PCT)",
#     y = "Baseline (Desire Lines)"
#   )
# and potential:
srn_mcr_joined4 |>
  ggplot(aes(Potential, dutch_slc)) +
  geom_point() +
  # lm:
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    x = "Potential (PCT)",
    y = "Potential (Desire Lines)"
  )
m1 = lm(dutch_slc ~ Potential, data = srn_mcr_joined4)
# R^2:
summary(m1)$r.squared
# # Predict national dutch_slc based on all SRN Potential:
# dutch_slc_predicted = predict(m1, newdata = srn_grouped)
# summary(dutch_slc_predicted)
# sum(dutch_slc_predicted)
srn_grouped_mcr = srn_grouped[srn_grouped$road_classification_number %in% srn_mcr_joined4$road_classification_number, ]
proportion_total_cycling_potential_mcr = sum(srn_grouped_mcr$Potential) / sum(srn_grouped$Potential)
proportion_of_srn_by_length = sum(srn_mcr$length_m) / sum(srn$length_m)
```

Around 12% of active travel on the SRN is represented in the Greater Manchester area (compared with only around 1.5% of the SRN by length).
Assuming that the relationship between cycling potential from the PCT and the desire lines approach is consistent across the UK, we can estimate the total walking and cycling potential for the SRN in the UK.
We estimate that there are around 200k people who could travel to work by active modes (mostly cycling) if suitable infrastructure were provided on the SRN, including crossings in appropriate places.
These are preliminary estimates that ignore the potential for active travel integration with public transport, only considering the potential for active travel directly to work. 
The potential would be much greater accounting for other trip purposes, including travel to school, travel to shops, and leisure trips.
Accounting for these factors, and given that commuting accounts for only around 1/5th of trips, we estimate that around 1 million people could regularly walk or cycle in or around the SRN in the UK if appropriate walking and cycling infrastructure were provided.

```{r}
total_cycling_5km = sum(srn_mcr_joined4$all_cyclable, na.rm = TRUE) / proportion_total_cycling_potential_mcr
total_walking_1km = sum(srn_mcr_joined4$all_walkable, na.rm = TRUE) / proportion_total_cycling_potential_mcr
```


<!-- # Outakes -->


```{r}
#| eval: false
# failed attempt to split the lines (fails as they require LINESTRING geometry type input)

srn_grouped |>
  sf::st_geometry_type() |>
  table()
srn_grouped_linestring = srn_grouped |>
  sf::st_line_merge()
srn_grouped_linestring |>
  sf::st_geometry_type() |>
  table()
# srn_grouped_linestring |>
#   sf::st_cast("LINESTRING") |>
#   select(Name) |>
#   plot()
?st_cast
# srn_grouped_split = srn_top_split = stplanr::line_segment(srn_grouped_linestring, segment_length = 1000, use_rsgeo = FALSE)
# nrow(srn_grouped_split) # 1414
srn_grouped_split$id_km = formatC(seq(nrow(srn_grouped_split)), flag = "00", width = 4)
head(srn_grouped_split$id_km)
srn_grouped_split = srn_grouped_split |>
  mutate(Name = paste(Name, id_km))
```

```{r}
#| eval: false
m1 = srn_grouped |>
  tm_shape() +
  tm_lines(col = "Name", legend = FALSE)
m2 = srn_grouped_split |>
  tm_shape() +
  tm_lines(col = "Name", legend = FALSE)
tmap_arrange(m1, m2)
```


<!-- ## Breaking-up SRN into small sections -->


```{r}
#| eval: false
srn_split = stplanr::line_segment(srn, segment_length = 1000)
srn_split = stplanr::line_segment(srn, segment_length = 1000, use_rsgeo = FALSE)
nrow(srn) / nrow(srn_split)

summary(sf::st_length(srn_split))
summary(sf::st_length(srn))
plot(srn_split$geometry)

# TODO: use this as an input
sf::st_write(srn_split, "srn_split.gpkg", delete_dsn = TRUE)
system("gh release upload v1.0 srn_split.gpkg --clobber")
```


<!-- ## Demo PCT modelling: -->


```{r}
#| eval: false
pct_desire_lines$pct_desire_lines_computed = pct::uptake_pct_godutch_2020(
  distance = pct_desire_lines$rf_dist_km,
  gradient = pct_desire_lines$rf_avslope_perc
)
pct_desire_lines = pct_desire_lines |>
  # correlation between computed and observed:
  cor(pct_desire_lines$pct_desire_lines_computed, pct_desire_lines$dutch_slc / pct_desire_lines$all)
pct_desire_lines |>
  sample_n(10000) |>
  ggplot(aes(rf_dist_km, dutch_slc / all)) +
  geom_point() +
  geom_smooth()
pct_desire_lines_high
```