From d5f073bee566956f83ac8525ea6c0a26540cd106 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Wed, 29 May 2024 16:52:39 -0500 Subject: [PATCH] build_sitemap() improvements (#2583) * Only message if sitemap changed * Simplify generation * Exclude redirects from sitemap. * Exclude invalid filenames from alias redirects Fixes #2582 --- NEWS.md | 1 + R/build-redirects.R | 7 +++++ R/build-search-docs.R | 35 +++++++--------------- R/render.R | 4 +-- tests/testthat/_snaps/build-search-docs.md | 16 +++++----- tests/testthat/test-build-redirects.R | 16 +++++++++- tests/testthat/test-build-search-docs.R | 24 +++++++++++++-- 7 files changed, 66 insertions(+), 37 deletions(-) diff --git a/NEWS.md b/NEWS.md index b55d432223..cfbc53f000 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # pkgdown (development version) +* `build_sitemap()` no longer includes redirected pages (#2582). * All external assets (JS, CSS, fonts) are now directly included in the site instead of fetched from external CDN (@salim-b, #2249) * `build_reference_index()` now displays function lifecycle badges next to the function name (#2123). The badges are extracted only from the function description. You can now also use `has_lifecycle()` to select functions by their lifecycle status. * `build_articles()` now recognises a new `external-articles` top-level field that allows you to define articles that live in other packages (#2028). diff --git a/R/build-redirects.R b/R/build-redirects.R index d61b7cf72e..f4e4fc7356 100644 --- a/R/build-redirects.R +++ b/R/build-redirects.R @@ -82,12 +82,19 @@ reference_redirects <- function(pkg) { names(redirects) <- paste0(names(redirects), ".html") + # Ensure we don't create an invalid file name + redirects <- redirects[valid_filename(names(redirects))] + # Ensure we don't override an existing file redirects <- redirects[setdiff(names(redirects), pkg$topics$file_out)] unname(purrr::imap(redirects, function(to, from) paste0("reference/", c(from, to)))) } +valid_filename <- function(x) { + x == path_sanitize(x) +} + article_redirects <- function(pkg) { is_vig_in_articles <- path_has_parent(pkg$vignettes$name, "articles") if (!any(is_vig_in_articles)) { diff --git a/R/build-search-docs.R b/R/build-search-docs.R index 6ce4ded870..53409a6b1a 100644 --- a/R/build-search-docs.R +++ b/R/build-search-docs.R @@ -35,35 +35,16 @@ build_sitemap <- function(pkg = ".") { } urls <- paste0(url, get_site_paths(pkg)) - - doc <- xml2::read_xml( - paste0("") + doc <- paste0( + "\n", + paste0("", escape_html(urls), "\n", collapse = ""), + "\n" ) - url_nodes <- unwrap_purrr_error(purrr::map(urls, url_node)) - for (url in url_nodes) { - xml2::xml_add_child(doc, url) - } - - xml_path <- path(pkg$dst_path, "sitemap.xml") - cli::cli_inform("Writing {dst_path(path_rel(xml_path, pkg$dst_path))}") - - xml2::write_xml(doc, file = xml_path) - + write_if_different(pkg, doc, "sitemap.xml", check = FALSE) invisible() } -url_node <- function(url) { - withCallingHandlers( - xml2::read_xml( - paste0("", url, "") - ), - error = function(err) { - cli::cli_abort("Failed to wrap URL {.str {url}}", parent = err) - } - ) -} - #' Build search index #' #' @description @@ -315,5 +296,9 @@ get_site_paths <- function(pkg) { # do not include dev package website in search index / sitemap dev_destination <- meta_development(pkg$meta, pkg$version)$destination - paths_rel[!path_has_parent(paths_rel, "dev")] + paths_rel <- paths_rel[!path_has_parent(paths_rel, "dev")] + + # do not include redirects + redirects <- purrr::map_chr(data_redirects(pkg, has_url = TRUE), 1) + setdiff(paths_rel, redirects) } diff --git a/R/render.R b/R/render.R index 6570652ef6..cc177ad90f 100644 --- a/R/render.R +++ b/R/render.R @@ -253,8 +253,8 @@ same_contents <- function(path, contents) { new_hash <- digest::digest(contents, serialize = FALSE) cur_contents <- paste0(read_lines(path), collapse = "\n") - cur_hash <- digest::digest(cur_contents, serialize = FALSE) - + cur_hash <- digest::digest(cur_contents, serialize = FALSE) + identical(new_hash, cur_hash) } diff --git a/tests/testthat/_snaps/build-search-docs.md b/tests/testthat/_snaps/build-search-docs.md index 8b97b6be30..02b4825a4d 100644 --- a/tests/testthat/_snaps/build-search-docs.md +++ b/tests/testthat/_snaps/build-search-docs.md @@ -1,10 +1,12 @@ -# url_node gives informative error +# build sitemap only messages when it updates Code - url_node("<") - Condition - Error in `url_node()`: - ! Failed to wrap URL "<" - Caused by error in `read_xml.raw()`: - ! StartTag: invalid element name [68] + build_sitemap(pkg) + Message + -- Building sitemap ------------------------------------------------------------ + Writing `sitemap.xml` + Code + build_sitemap(pkg) + Message + -- Building sitemap ------------------------------------------------------------ diff --git a/tests/testthat/test-build-redirects.R b/tests/testthat/test-build-redirects.R index 5b254cd70e..ddc8bc87d6 100644 --- a/tests/testthat/test-build-redirects.R +++ b/tests/testthat/test-build-redirects.R @@ -63,6 +63,21 @@ test_that("generates redirects only for non-name aliases", { ) }) +test_that("doesn't generates redirect for aliases that can't be file names", { + pkg <- list( + meta = list(url = "http://foo.com"), + topics = list( + name = "bar", + alias = list(c("bar", "baz", "[<-.baz")), + file_out = "bar.html" + ) + ) + expect_equal( + reference_redirects(pkg), + list(c("reference/baz.html", "reference/bar.html")) + ) +}) + test_that("never redirects away from existing topic", { pkg <- list( meta = list(url = "http://foo.com"), @@ -89,4 +104,3 @@ test_that("no redirects if no aliases", { ) expect_equal(reference_redirects(pkg), list()) }) - diff --git a/tests/testthat/test-build-search-docs.R b/tests/testthat/test-build-search-docs.R index 6bec99bbe3..d9432c4823 100644 --- a/tests/testthat/test-build-search-docs.R +++ b/tests/testthat/test-build-search-docs.R @@ -31,6 +31,21 @@ test_that("build_search() builds the expected search.json with an URL", { expect_snapshot_file(json_path, "search.json") }) +test_that("build sitemap only messages when it updates", { + pkg <- local_pkgdown_site(test_path("assets/news"), ' + url: https://example.com + template: + bootstrap: 5 + ') + + suppressMessages(init_site(pkg)) + suppressMessages(build_home(pkg)) + expect_snapshot({ + build_sitemap(pkg) + build_sitemap(pkg) + }) +}) + test_that("build_search() builds the expected search.json with no URL", { pkg <- local_pkgdown_site(test_path("assets/news"), ' template: @@ -51,6 +66,11 @@ test_that("build_search() builds the expected search.json with no URL", { expect_snapshot_file(json_path, "search-no-url.json") }) -test_that("url_node gives informative error", { - expect_snapshot(url_node("<"), error = TRUE) +test_that("sitemap excludes redirects", { + pkg <- local_pkgdown_site(meta = list( + url = "https://example.com", + redirects = list(c("a.html", "b.html")) + )) + suppressMessages(build_redirects(pkg)) + expect_equal(get_site_paths(pkg), character()) })