From 914f3d72ca2b6a18abde9abc6e8001fbdacd4ef1 Mon Sep 17 00:00:00 2001 From: Robin Lovelace Date: Sat, 2 Apr 2022 22:56:54 +0100 Subject: [PATCH 1/7] Buffer post for #16 --- .../post/2022/buffer-vs-within-distance.Rmd | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 content/post/2022/buffer-vs-within-distance.Rmd diff --git a/content/post/2022/buffer-vs-within-distance.Rmd b/content/post/2022/buffer-vs-within-distance.Rmd new file mode 100644 index 00000000..ad395a95 --- /dev/null +++ b/content/post/2022/buffer-vs-within-distance.Rmd @@ -0,0 +1,40 @@ +--- +title: "Using buffers vs 'within distance' approaches" +author: Olivier +date: "2022-04-10" +slug: geocompr-solutions +categories: [vignette] +tags: [geocompr2, buffers, rstats] +draft: true +--- + +This post explores two ways of calculating whether or not objects are within a certain distance of another object. +This may sound rather abstract and the situation is perhaps best illustrated with reference to a simple reproducible example starting with (psuedo) randomly located points. + +```{r} +library(sf) +library(dplyr) +library(tmap) +set.seed(2022) +point_locations = data.frame( + x = rnorm(n = 100, mean = 0, sd = 1), + y = rnorm(n = 100, mean = 0, sd = 1), + n = 1:100 +) %>% + st_as_sf(coords = c("x", "y")) + +target_points = data.frame( + x = rnorm(n = 5, mean = 0, sd = 1), + y = rnorm(n = 5, mean = 0, sd = 1), + n = 1:5 +) %>% + st_as_sf(coords = c("x", "y")) +target_buffers = st_buffer(target_points, dist = 1) +``` + +```{r} +tm_shape(point_locations) + + tm_dots() + + tm_shape(target_buffers) + + tm_borders() +``` From 35c685da8355f0c392713ee019a4c1c08a022ad3 Mon Sep 17 00:00:00 2001 From: Robin Lovelace Date: Sun, 3 Apr 2022 10:04:56 +0100 Subject: [PATCH 2/7] Cite PostGIS book --- .../post/2022/buffer-vs-within-distance.Rmd | 20 +++++++++++++++---- .../post/2022/buffer-vs-within-distance.bib | 15 ++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 content/post/2022/buffer-vs-within-distance.bib diff --git a/content/post/2022/buffer-vs-within-distance.Rmd b/content/post/2022/buffer-vs-within-distance.Rmd index ad395a95..fbf9fd53 100644 --- a/content/post/2022/buffer-vs-within-distance.Rmd +++ b/content/post/2022/buffer-vs-within-distance.Rmd @@ -2,14 +2,23 @@ title: "Using buffers vs 'within distance' approaches" author: Olivier date: "2022-04-10" -slug: geocompr-solutions +slug: buffer-distance categories: [vignette] tags: [geocompr2, buffers, rstats] draft: true --- -This post explores two ways of calculating whether or not objects are within a certain distance of another object. -This may sound rather abstract and the situation is perhaps best illustrated with reference to a simple reproducible example starting with (psuedo) randomly located points. +```{r, eval=TRUE, echo=FALSE} +# Add references +rbbt::bbt_update_bib(path_rmd = "buffer-vs-within-distance.Rmd", path_bib = "buffer-vs-within-distance.bib") +``` + +This post explores ways of calculating whether or not objects are within a certain distance of another object. +This may sound rather abstract and the situation is perhaps best understood with reference to a concrete example: how many restaurants are within one mile of a highway? +This scenario is described in [@obe_postgis_2015]. + + +illustrated with reference to a simple reproducible example starting with (psuedo) randomly located points. ```{r} library(sf) @@ -29,7 +38,7 @@ target_points = data.frame( n = 1:5 ) %>% st_as_sf(coords = c("x", "y")) -target_buffers = st_buffer(target_points, dist = 1) +target_buffers = st_buffer(target_points, dist = 0.2) ``` ```{r} @@ -38,3 +47,6 @@ tm_shape(point_locations) + tm_shape(target_buffers) + tm_borders() ``` + +# References + diff --git a/content/post/2022/buffer-vs-within-distance.bib b/content/post/2022/buffer-vs-within-distance.bib new file mode 100644 index 00000000..38d76a1f --- /dev/null +++ b/content/post/2022/buffer-vs-within-distance.bib @@ -0,0 +1,15 @@ + +@book{obe_postgis_2015, + title = {{{PostGIS}} in Action}, + author = {Obe, Regina O. and Hsu, Leo S.}, + date = {2015}, + edition = {Second edition}, + publisher = {{Manning}}, + location = {{Shelter Island, NY}}, + isbn = {978-1-61729-139-5}, + pagetotal = {570}, + keywords = {Database searching,Geographic information systems}, + annotation = {OCLC: ocn872985108} +} + + From 96b4f19b9d43b700ce3114c2e8ab1178fea52082 Mon Sep 17 00:00:00 2001 From: Robin Lovelace Date: Sun, 3 Apr 2022 10:23:47 +0100 Subject: [PATCH 3/7] Test outputs --- content/post/2022/buffer-vs-within-distance.Rmd | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/content/post/2022/buffer-vs-within-distance.Rmd b/content/post/2022/buffer-vs-within-distance.Rmd index fbf9fd53..ebf20c12 100644 --- a/content/post/2022/buffer-vs-within-distance.Rmd +++ b/content/post/2022/buffer-vs-within-distance.Rmd @@ -1,7 +1,7 @@ --- title: "Using buffers vs 'within distance' approaches" author: Olivier -date: "2022-04-10" +date: "2022-04-02" slug: buffer-distance categories: [vignette] tags: [geocompr2, buffers, rstats] @@ -15,10 +15,14 @@ rbbt::bbt_update_bib(path_rmd = "buffer-vs-within-distance.Rmd", path_bib = "buf This post explores ways of calculating whether or not objects are within a certain distance of another object. This may sound rather abstract and the situation is perhaps best understood with reference to a concrete example: how many restaurants are within one mile of a highway? -This scenario is described in [@obe_postgis_2015]. +This question is posed by @obe_postgis_2015 who demonstrate methods, implemented in reproducible SQL code, to answer it. +Before reproducing their example in R and PostGIS, which can be called from R, we will demonstrate the basic scenario with reference to a minimal reproducible example. -illustrated with reference to a simple reproducible example starting with (psuedo) randomly located points. +## Minimal reproducible example + +Imagine that there are 100 restaurants and only five highways. +Based on the simplifying assumption that highways can be represented as points, we can generate a minimal example to demonstrate the question with a few lines of R code to generate (psuedo) randomly located points. ```{r} library(sf) From 38f42a1b9d83cffdefb1a9e6142987f36beae420 Mon Sep 17 00:00:00 2001 From: Robin Lovelace Date: Sun, 3 Apr 2022 10:26:27 +0100 Subject: [PATCH 4/7] Do not eval bib command --- content/post/2022/buffer-vs-within-distance.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/post/2022/buffer-vs-within-distance.Rmd b/content/post/2022/buffer-vs-within-distance.Rmd index ebf20c12..2f6b18d8 100644 --- a/content/post/2022/buffer-vs-within-distance.Rmd +++ b/content/post/2022/buffer-vs-within-distance.Rmd @@ -8,8 +8,8 @@ tags: [geocompr2, buffers, rstats] draft: true --- -```{r, eval=TRUE, echo=FALSE} -# Add references +```{r, eval=FALSE, echo=FALSE} +# Add references, set eval=TRUE temporarily to update rbbt::bbt_update_bib(path_rmd = "buffer-vs-within-distance.Rmd", path_bib = "buffer-vs-within-distance.bib") ``` From 2d7df2dd60a518e418b53ac4ad31d32f4c6d1660 Mon Sep 17 00:00:00 2001 From: Robin Lovelace Date: Tue, 5 Apr 2022 21:37:35 +0100 Subject: [PATCH 5/7] Big progress on #16 --- .../post/2022/buffer-vs-within-distance.Rmd | 94 ++++++++++++++++++- 1 file changed, 89 insertions(+), 5 deletions(-) diff --git a/content/post/2022/buffer-vs-within-distance.Rmd b/content/post/2022/buffer-vs-within-distance.Rmd index 2f6b18d8..b0756c20 100644 --- a/content/post/2022/buffer-vs-within-distance.Rmd +++ b/content/post/2022/buffer-vs-within-distance.Rmd @@ -29,28 +29,112 @@ library(sf) library(dplyr) library(tmap) set.seed(2022) -point_locations = data.frame( + +pnts = data.frame( x = rnorm(n = 100, mean = 0, sd = 1), y = rnorm(n = 100, mean = 0, sd = 1), n = 1:100 ) %>% st_as_sf(coords = c("x", "y")) -target_points = data.frame( +tgrt = data.frame( x = rnorm(n = 5, mean = 0, sd = 1), y = rnorm(n = 5, mean = 0, sd = 1), n = 1:5 ) %>% st_as_sf(coords = c("x", "y")) -target_buffers = st_buffer(target_points, dist = 0.2) +tgrt_buff = st_buffer(tgrt, dist = 0.2) ``` ```{r} -tm_shape(point_locations) + +tm_shape(pnts) + tm_dots() + - tm_shape(target_buffers) + + tm_shape(tgrt_buff) + tm_borders() ``` +This benchmark is just about how to do the same operations with `st_is_within` + + + + + + + +```{r} +points_1 = pnts[tgrt, , op = st_is_within_distance, dist = 0.2] +points_1 +points_2 = pnts[ + lengths( + st_is_within_distance(pnts, tgrt, dist = 0.2) + ) > 0, # this can be changed if needed for greater than one for particular cases +] +points_2 +points_3 = pnts[tgrt_buff, ] + +waldo::compare(points_1, points_2) +waldo::compare(points_1, points_3) + +bench::mark( + st_within_square = pnts[tgrt, , op = st_is_within_distance, dist = 0.2], + st_within_verbose = {points_2 = pnts[ + lengths(st_is_within_distance(pnts, tgrt, dist = 0.2)) > 0, + ]}, + with_st_buffer = { + tgrt_buff = st_buffer(tgrt, dist = 0.2) + pnts[tgrt_buff,] + } +) +``` + +## Full example + +```{r} +library(RPostgreSQL) +conn = dbConnect(drv = PostgreSQL(), + dbname = "rtafdf_zljbqm", host = "db.qgiscloud.com", + port = "5432", user = "rtafdf_zljbqm", password = "d3290ead") + +dbListTables(conn) + +library(spData) + +Utah = us_states[us_states$NAME == "Utah",] +Utah = sf::st_transform(Utah, 2163) + +rsts = sf::st_read(conn, query = "select * from restaurants;") +highways = sf::st_read(conn, query = "select * from highways;") +# franchises.dat = DBI::dbGetQuery(conn, statement = "select * from ch01.lu_franchises;") + +rsts_u = rsts[Utah,] +road_utah.shp = highways[Utah,] + +RPostgreSQL::postgresqlCloseConnection(conn) +``` + + +```{r} +dist_to_check = 1609 + +bench::mark(check = FALSE, + buffer = { + rsts_buff = sf::st_buffer(rsts_u, dist = dist_to_check) + resto_near_road = rsts_buff[road_utah.shp,] + }, + buffer2 = { + rsts_buff = sf::st_buffer(rsts_u, dist = dist_to_check, nQuadSegs = 4) + resto_near_road = rsts_buff[road_utah.shp,] + }, + within_distance ={ + near_or_not = lengths( + sf::st_is_within_distance(rsts_u, road_utah.shp, dist_to_check)) > 0 + resto_near_roadv2 = rsts[near_or_not,] + }, + times = 100 + ) +``` + + + # References From 5ba2fd948953dcd4f6f26914eb8c1850a57ed2d9 Mon Sep 17 00:00:00 2001 From: Robin Lovelace Date: Tue, 5 Apr 2022 21:57:40 +0100 Subject: [PATCH 6/7] Tests --- .../post/2022/buffer-vs-within-distance.Rmd | 33 ++++++++++++++++--- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/content/post/2022/buffer-vs-within-distance.Rmd b/content/post/2022/buffer-vs-within-distance.Rmd index b0756c20..c6306787 100644 --- a/content/post/2022/buffer-vs-within-distance.Rmd +++ b/content/post/2022/buffer-vs-within-distance.Rmd @@ -107,11 +107,21 @@ highways = sf::st_read(conn, query = "select * from highways;") # franchises.dat = DBI::dbGetQuery(conn, statement = "select * from ch01.lu_franchises;") rsts_u = rsts[Utah,] -road_utah.shp = highways[Utah,] +highways_u = highways[Utah,] +highway_p = st_cast(highways_u, "POINT") RPostgreSQL::postgresqlCloseConnection(conn) ``` +What happens when we calculate distances to points (vertices) on the road network: + +```{r} +highway_points = st_cast(highways, "POINT") +nrow(highway_points) +nrow(st_coordinates(highways)) +``` + + ```{r} dist_to_check = 1609 @@ -119,16 +129,29 @@ dist_to_check = 1609 bench::mark(check = FALSE, buffer = { rsts_buff = sf::st_buffer(rsts_u, dist = dist_to_check) - resto_near_road = rsts_buff[road_utah.shp,] + resto_near_road1 = rsts_buff[highways_u, ] }, buffer2 = { rsts_buff = sf::st_buffer(rsts_u, dist = dist_to_check, nQuadSegs = 4) - resto_near_road = rsts_buff[road_utah.shp,] + resto_near_road2 = rsts_buff[highways_u, ] + }, + buffer3 = { + highway_buff = sf::st_buffer(highways_u, dist = dist_to_check) + resto_near_road3 = rsts_u[highway_buff, ] + }, + buffer4 = { + highway_buff = sf::st_buffer(highways_u, dist = dist_to_check, nQuadSegs = 4) + resto_near_road3 = rsts_u[highway_buff, ] }, within_distance ={ near_or_not = lengths( - sf::st_is_within_distance(rsts_u, road_utah.shp, dist_to_check)) > 0 - resto_near_roadv2 = rsts[near_or_not,] + sf::st_is_within_distance(rsts_u, highways_u, dist_to_check)) > 0 + resto_near_road4 = rsts_u[near_or_not, ] + }, + within_distance2 ={ + near_or_not = lengths( + sf::st_is_within_distance(rsts_u, highway_p, dist_to_check)) > 0 + resto_near_road5 = rsts_u[near_or_not, ] }, times = 100 ) From 6c5b60f92aff78101c00327103df9621f9acba86 Mon Sep 17 00:00:00 2001 From: Robin Lovelace Date: Tue, 5 Apr 2022 22:04:45 +0100 Subject: [PATCH 7/7] Enable references --- content/post/2022/buffer-vs-within-distance.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/content/post/2022/buffer-vs-within-distance.Rmd b/content/post/2022/buffer-vs-within-distance.Rmd index c6306787..297a0f67 100644 --- a/content/post/2022/buffer-vs-within-distance.Rmd +++ b/content/post/2022/buffer-vs-within-distance.Rmd @@ -6,6 +6,7 @@ slug: buffer-distance categories: [vignette] tags: [geocompr2, buffers, rstats] draft: true +bibliography: buffer-vs-within-distance.bib --- ```{r, eval=FALSE, echo=FALSE}