diff --git a/DESCRIPTION b/DESCRIPTION index d68d109..11140bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: osmdata Title: Import 'OpenStreetMap' Data as Simple Features or Spatial Objects -Version: 0.2.5.022 +Version: 0.2.5.023 Authors@R: c( person("Mark", "Padgham", , "mark.padgham@email.com", role = c("aut", "cre")), person("Bob", "Rudis", role = "aut"), diff --git a/NEWS.md b/NEWS.md index c1082aa..b30d5c7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,7 @@ along with geometries (#338 thanks to @RegularnaMatrica) - Mention key-only feature requests in README (#342 thanks to @joostschouppe) - Merge any columns in `osmdata_sf()` with mixed-case duplicated names (#348) +- Set encoding to UTF-8 for tags and user names (#347) 0.2.5 diff --git a/R/get-osmdata-df.R b/R/get-osmdata-df.R index 12ed530..8d01979 100644 --- a/R/get-osmdata-df.R +++ b/R/get-osmdata-df.R @@ -75,7 +75,8 @@ osmdata_data_frame <- function (q, colClasses = "character", # osm_id doesn't fit in integer check.names = FALSE, comment.char = "", - stringsAsFactors = stringsAsFactors + stringsAsFactors = stringsAsFactors, + encoding = "UTF-8" ) } else if (isTRUE (obj$meta$query_type == "adiff")) { datetime_from <- obj$meta$datetime_from @@ -122,13 +123,15 @@ xml_to_df <- function (doc, stringsAsFactors = FALSE) { tags <- mapply (function (i, k) { i <- i [, k, drop = FALSE] # remove osm_id column if exists + out <- matrix ( + NA_character_, + nrow = nrow (i), ncol = length (keys), + dimnames = list (NULL, keys) + ) + out <- enc2utf8 (out) out <- data.frame ( - matrix ( - nrow = nrow (i), ncol = length (keys), - dimnames = list (NULL, keys) - ), - stringsAsFactors = stringsAsFactors, - check.names = FALSE + out, + stringsAsFactors = stringsAsFactors, check.names = FALSE ) out [, names (i)] <- i return (out) @@ -214,6 +217,7 @@ xml_adiff_to_df <- function (doc, tags_u <- xml2::xml_find_all (osm_actions, xpath = ".//tag") col_names <- sort (unique (xml2::xml_attr (tags_u, attr = "k"))) m <- matrix ( + NA_character_, nrow = length (osm_obj), ncol = length (col_names), dimnames = list (NULL, col_names) ) @@ -225,6 +229,7 @@ xml_adiff_to_df <- function (doc, tagV <- vapply (tag, function (x) x, FUN.VALUE = character (2)) m [i, tagV [1, ]] <- tagV [2, ] } + m <- enc2utf8 (m) osm_type <- xml2::xml_name (osm_obj) osm_id <- xml2::xml_attr (osm_obj, "id") @@ -325,6 +330,7 @@ get_meta_from_xml <- function (osm_obj) { osm_uid = xml2::xml_attr (osm_obj, attr = "uid"), osm_user = xml2::xml_attr (osm_obj, attr = "user") ) + out$osm_user <- enc2utf8 (out$osm_user) } else { out <- matrix (nrow = length (osm_obj), ncol = 0) diff --git a/R/get-osmdata-sc.R b/R/get-osmdata-sc.R index 935c448..8aeb1a3 100644 --- a/R/get-osmdata-sc.R +++ b/R/get-osmdata-sc.R @@ -75,6 +75,12 @@ osmdata_sc <- function (q, doc, quiet = TRUE) { overpass_version = temp$obj$meta$overpass_version ) + has_tags <- c ("nodes", "relation_properties", "object") + obj [has_tags] <- lapply(obj [has_tags], function (x) { + x [, c ("key", "value")] <- setenc_utf8 (x [, c ("key", "value")]) + x + }) + if (!missing (q)) { if (!is.character (q)) { obj$meta$bbox <- q$bbox diff --git a/R/get-osmdata-sf.R b/R/get-osmdata-sf.R index 8d5ccdc..0e94d95 100644 --- a/R/get-osmdata-sf.R +++ b/R/get-osmdata-sf.R @@ -62,8 +62,9 @@ osmdata_sf <- function (q, doc, quiet = TRUE, stringsAsFactors = FALSE) { # noli if (!"osm_id" %in% names (res$polygons_kv) [1]) { res <- fill_kv (res, "polygons_kv", "polygons", stringsAsFactors) } - kv_df <- grep ("_kv$", names (res)) + kv_df <- grep ("_kv$", names (res)) # objects with tags res [kv_df] <- fix_columns_list (res [kv_df]) + res [kv_df] <- lapply (res [kv_df], setenc_utf8) if (missing (q)) { obj$bbox <- paste (res$bbox, collapse = " ") diff --git a/R/get-osmdata-sp.R b/R/get-osmdata-sp.R index c14c1d6..3eec7e1 100644 --- a/R/get-osmdata-sp.R +++ b/R/get-osmdata-sp.R @@ -71,7 +71,11 @@ osmdata_sp <- function (q, doc, quiet = TRUE) { obj$osm_multipolygons <- res$multipolygons osm_items <- grep ("^osm_", names (obj)) - obj[osm_items] <- fix_columns_list (obj[osm_items]) + obj [osm_items] <- fix_columns_list (obj [osm_items]) + obj [osm_items] <- lapply (obj [osm_items], function (x) { + x@data <- setenc_utf8 (x@data) + x + }) class (obj) <- c (class (obj), "osmdata_sp") return (obj) diff --git a/R/get-osmdata.R b/R/get-osmdata.R index 3282216..e575d51 100644 --- a/R/get-osmdata.R +++ b/R/get-osmdata.R @@ -344,3 +344,19 @@ get_center_from_cpp_output <- function (res, what = "points") { return (as.data.frame (this)) } + + +#' Set encoding to UTF-8 +#' +#' @param x a data.frame or a list. +#' +#' @return `x` with all the columns or items of type character with UTF-8 encoding set. +#' @noRd +setenc_utf8 <- function (x) { + char_cols <- which (vapply (x, is.character, FUN.VALUE = logical (1))) + x [char_cols] <- lapply (x [char_cols], function (y) { + enc2utf8 (y) + }) + + return (x) +} diff --git a/R/getbb.R b/R/getbb.R index a786836..6be594b 100644 --- a/R/getbb.R +++ b/R/getbb.R @@ -229,6 +229,8 @@ getbb <- function (place_name, ) if (format_out == "data.frame") { + utf8cols <- c ("licence", "name", "display_name") + obj [, utf8cols] <- setenc_utf8 (obj [, utf8cols]) return (obj) } diff --git a/codemeta.json b/codemeta.json index fa07a72..5c42de7 100644 --- a/codemeta.json +++ b/codemeta.json @@ -4,20 +4,17 @@ "identifier": "osmdata", "description": "Download and import of 'OpenStreetMap' ('OSM') data as 'sf' or 'sp' objects. 'OSM' data are extracted from the 'Overpass' web server () and processed with very fast 'C++' routines for return to 'R'.", "name": "osmdata: Import 'OpenStreetMap' Data as Simple Features or Spatial Objects", - "relatedLink": [ - "https://docs.ropensci.org/osmdata/", - "https://CRAN.R-project.org/package=osmdata" - ], + "relatedLink": ["https://docs.ropensci.org/osmdata/", "https://CRAN.R-project.org/package=osmdata"], "codeRepository": "https://github.com/ropensci/osmdata/", "issueTracker": "https://github.com/ropensci/osmdata/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "0.2.5.022", + "version": "0.2.5.23", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.3.1 (2023-06-16)", + "runtimePlatform": "R version 4.4.1 (2024-06-14)", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -348,25 +345,12 @@ }, "sameAs": "https://CRAN.R-project.org/package=xml2" }, - "SystemRequirements": {} + "SystemRequirements": null }, "applicationCategory": "DataAccess", "isPartOf": "https://ropensci.org", - "keywords": [ - "open0street0map", - "openstreetmap", - "overpass0API", - "OSM", - "overpass-api", - "r", - "cpp", - "rstats", - "osm", - "osm-data", - "r-package", - "peer-reviewed" - ], - "fileSize": "2467.715KB", + "keywords": ["open0street0map", "openstreetmap", "overpass0API", "OSM", "overpass-api", "r", "cpp", "rstats", "osm", "osm-data", "r-package", "peer-reviewed"], + "fileSize": "17525.157KB", "citation": [ { "@type": "ScholarlyArticle", @@ -400,10 +384,7 @@ "issueNumber": "14", "datePublished": "2017", "isPartOf": { - "@type": [ - "PublicationVolume", - "Periodical" - ], + "@type": ["PublicationVolume", "Periodical"], "volumeNumber": "2", "name": "Journal of Open Source Software" } @@ -412,10 +393,7 @@ ], "releaseNotes": "https://github.com/ropensci/osmdata/blob/master/NEWS.md", "readme": "https://github.com/ropensci/osmdata/blob/main/README.md", - "contIntegration": [ - "https://github.com/ropensci/osmdata/actions?query=workflow%3AR-CMD-check", - "https://app.codecov.io/gh/ropensci/osmdata" - ], + "contIntegration": ["https://github.com/ropensci/osmdata/actions?query=workflow%3AR-CMD-check", "https://app.codecov.io/gh/ropensci/osmdata"], "developmentStatus": "https://www.repostatus.org/#active", "review": { "@type": "Review",