Skip to content

Commit

Permalink
build_sitemap() improvements (#2583)
Browse files Browse the repository at this point in the history
* Only message if sitemap changed
* Simplify generation
* Exclude redirects from sitemap.
* Exclude invalid filenames from alias redirects

Fixes #2582
  • Loading branch information
hadley authored May 29, 2024
1 parent 67d4110 commit b958b53
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 37 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pkgdown (development version)

* `build_sitemap()` no longer includes redirected pages (#2582).
* All external assets (JS, CSS, fonts) are now directly included in the site instead of fetched from external CDN (@salim-b, #2249)
* `build_reference_index()` now displays function lifecycle badges next to the function name (#2123). The badges are extracted only from the function description. You can now also use `has_lifecycle()` to select functions by their lifecycle status.
* `build_articles()` now recognises a new `external-articles` top-level field that allows you to define articles that live in other packages (#2028).
Expand Down
7 changes: 7 additions & 0 deletions R/build-redirects.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,19 @@ reference_redirects <- function(pkg) {

names(redirects) <- paste0(names(redirects), ".html")

# Ensure we don't create an invalid file name
redirects <- redirects[valid_filename(names(redirects))]

# Ensure we don't override an existing file
redirects <- redirects[setdiff(names(redirects), pkg$topics$file_out)]

unname(purrr::imap(redirects, function(to, from) paste0("reference/", c(from, to))))
}

valid_filename <- function(x) {
x == path_sanitize(x)
}

article_redirects <- function(pkg) {
is_vig_in_articles <- path_has_parent(pkg$vignettes$name, "articles")
if (!any(is_vig_in_articles)) {
Expand Down
35 changes: 10 additions & 25 deletions R/build-search-docs.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,35 +35,16 @@ build_sitemap <- function(pkg = ".") {
}

urls <- paste0(url, get_site_paths(pkg))

doc <- xml2::read_xml(
paste0("<urlset xmlns = 'http://www.sitemaps.org/schemas/sitemap/0.9'></urlset>")
doc <- paste0(
"<urlset xmlns = 'http://www.sitemaps.org/schemas/sitemap/0.9'>\n",
paste0("<url><loc>", escape_html(urls), "</loc></url>\n", collapse = ""),
"</urlset>\n"
)

url_nodes <- unwrap_purrr_error(purrr::map(urls, url_node))
for (url in url_nodes) {
xml2::xml_add_child(doc, url)
}

xml_path <- path(pkg$dst_path, "sitemap.xml")
cli::cli_inform("Writing {dst_path(path_rel(xml_path, pkg$dst_path))}")

xml2::write_xml(doc, file = xml_path)

write_if_different(pkg, doc, "sitemap.xml", check = FALSE)
invisible()
}

url_node <- function(url) {
withCallingHandlers(
xml2::read_xml(
paste0("<url><loc>", url, "</loc></url>")
),
error = function(err) {
cli::cli_abort("Failed to wrap URL {.str {url}}", parent = err)
}
)
}

#' Build search index
#'
#' @description
Expand Down Expand Up @@ -315,5 +296,9 @@ get_site_paths <- function(pkg) {

# do not include dev package website in search index / sitemap
dev_destination <- meta_development(pkg$meta, pkg$version)$destination
paths_rel[!path_has_parent(paths_rel, "dev")]
paths_rel <- paths_rel[!path_has_parent(paths_rel, "dev")]

# do not include redirects
redirects <- purrr::map_chr(data_redirects(pkg, has_url = TRUE), 1)
setdiff(paths_rel, redirects)
}
4 changes: 2 additions & 2 deletions R/render.R
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,8 @@ same_contents <- function(path, contents) {
new_hash <- digest::digest(contents, serialize = FALSE)

cur_contents <- paste0(read_lines(path), collapse = "\n")
cur_hash <- digest::digest(cur_contents, serialize = FALSE)

cur_hash <- digest::digest(cur_contents, serialize = FALSE)
identical(new_hash, cur_hash)
}

Expand Down
16 changes: 9 additions & 7 deletions tests/testthat/_snaps/build-search-docs.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# url_node gives informative error
# build sitemap only messages when it updates

Code
url_node("<")
Condition
Error in `url_node()`:
! Failed to wrap URL "<"
Caused by error in `read_xml.raw()`:
! StartTag: invalid element name [68]
build_sitemap(pkg)
Message
-- Building sitemap ------------------------------------------------------------
Writing `sitemap.xml`
Code
build_sitemap(pkg)
Message
-- Building sitemap ------------------------------------------------------------

16 changes: 15 additions & 1 deletion tests/testthat/test-build-redirects.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ test_that("generates redirects only for non-name aliases", {
)
})

test_that("doesn't generates redirect for aliases that can't be file names", {
pkg <- list(
meta = list(url = "http://foo.com"),
topics = list(
name = "bar",
alias = list(c("bar", "baz", "[<-.baz")),
file_out = "bar.html"
)
)
expect_equal(
reference_redirects(pkg),
list(c("reference/baz.html", "reference/bar.html"))
)
})

test_that("never redirects away from existing topic", {
pkg <- list(
meta = list(url = "http://foo.com"),
Expand All @@ -89,4 +104,3 @@ test_that("no redirects if no aliases", {
)
expect_equal(reference_redirects(pkg), list())
})

24 changes: 22 additions & 2 deletions tests/testthat/test-build-search-docs.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@ test_that("build_search() builds the expected search.json with an URL", {
expect_snapshot_file(json_path, "search.json")
})

test_that("build sitemap only messages when it updates", {
pkg <- local_pkgdown_site(test_path("assets/news"), '
url: https://example.com
template:
bootstrap: 5
')

suppressMessages(init_site(pkg))
suppressMessages(build_home(pkg))
expect_snapshot({
build_sitemap(pkg)
build_sitemap(pkg)
})
})

test_that("build_search() builds the expected search.json with no URL", {
pkg <- local_pkgdown_site(test_path("assets/news"), '
template:
Expand All @@ -51,6 +66,11 @@ test_that("build_search() builds the expected search.json with no URL", {
expect_snapshot_file(json_path, "search-no-url.json")
})

test_that("url_node gives informative error", {
expect_snapshot(url_node("<"), error = TRUE)
test_that("sitemap excludes redirects", {
pkg <- local_pkgdown_site(meta = list(
url = "https://example.com",
redirects = list(c("a.html", "b.html"))
))
suppressMessages(build_redirects(pkg))
expect_equal(get_site_paths(pkg), character())
})

0 comments on commit b958b53

Please sign in to comment.