diff --git a/DESCRIPTION b/DESCRIPTION index 3e9925371..6c5ec3e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: osmdata Title: Import 'OpenStreetMap' Data as Simple Features or Spatial Objects -Version: 0.2.5.018 +Version: 0.2.5.019 Authors@R: c( person("Mark", "Padgham", , "mark.padgham@email.com", role = c("aut", "cre")), person("Bob", "Rudis", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 56ddfda..1192d3d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,7 @@ - Improved `get_bb(..., format_out = "sf_polygon")` to return full metadata along with geometries (#338 thanks to @RegularnaMatrica) - Mention key-only feature requests in README (#342 thanks to @joostschouppe) +- Set encoding to UTF-8 for tags and user names (#347) 0.2.5 diff --git a/R/get-osmdata-df.R b/R/get-osmdata-df.R index 12ed530..6f0409a 100644 --- a/R/get-osmdata-df.R +++ b/R/get-osmdata-df.R @@ -75,7 +75,8 @@ osmdata_data_frame <- function (q, colClasses = "character", # osm_id doesn't fit in integer check.names = FALSE, comment.char = "", - stringsAsFactors = stringsAsFactors + stringsAsFactors = stringsAsFactors, + encoding = "UTF-8" ) } else if (isTRUE (obj$meta$query_type == "adiff")) { datetime_from <- obj$meta$datetime_from @@ -162,7 +163,7 @@ xml_to_df <- function (doc, stringsAsFactors = FALSE) { osm_id = rownames (res [[i]]), center [[i]], meta [[i]], - tags [[i]], + setenc_utf8(tags [[i]]), stringsAsFactors = stringsAsFactors, check.names = FALSE ) @@ -225,6 +226,7 @@ xml_adiff_to_df <- function (doc, tagV <- vapply (tag, function (x) x, FUN.VALUE = character (2)) m [i, tagV [1, ]] <- tagV [2, ] } + Encoding(m) <- "UTF-8" osm_type <- xml2::xml_name (osm_obj) osm_id <- xml2::xml_attr (osm_obj, "id") @@ -325,6 +327,7 @@ get_meta_from_xml <- function (osm_obj) { osm_uid = xml2::xml_attr (osm_obj, attr = "uid"), osm_user = xml2::xml_attr (osm_obj, attr = "user") ) + Encoding(out$osm_user) <- "UTF-8" } else { out <- matrix (nrow = length (osm_obj), ncol = 0) diff --git a/R/get-osmdata-sc.R b/R/get-osmdata-sc.R index 935c448..8aeb1a3 100644 --- a/R/get-osmdata-sc.R +++ b/R/get-osmdata-sc.R @@ -75,6 +75,12 @@ osmdata_sc <- function (q, doc, quiet = TRUE) { overpass_version = temp$obj$meta$overpass_version ) + has_tags <- c ("nodes", "relation_properties", "object") + obj [has_tags] <- lapply(obj [has_tags], function (x) { + x [, c ("key", "value")] <- setenc_utf8 (x [, c ("key", "value")]) + x + }) + if (!missing (q)) { if (!is.character (q)) { obj$meta$bbox <- q$bbox diff --git a/R/get-osmdata-sf.R b/R/get-osmdata-sf.R index fed33c7..a0d9612 100644 --- a/R/get-osmdata-sf.R +++ b/R/get-osmdata-sf.R @@ -62,8 +62,9 @@ osmdata_sf <- function (q, doc, quiet = TRUE, stringsAsFactors = FALSE) { # noli if (!"osm_id" %in% names (res$polygons_kv)[1]) { res <- fill_kv (res, "polygons_kv", "polygons", stringsAsFactors) } - kv_df <- grep ("_kv$", names (res)) - res[kv_df] <- fix_columns_list (res[kv_df]) + kv_df <- grep ("_kv$", names (res)) # objects with tags + res [kv_df] <- fix_columns_list (res[kv_df]) + res [kv_df] <- lapply (res [kv_df], setenc_utf8) if (missing (q)) { obj$bbox <- paste (res$bbox, collapse = " ") diff --git a/R/get-osmdata-sp.R b/R/get-osmdata-sp.R index c14c1d6..3eec7e1 100644 --- a/R/get-osmdata-sp.R +++ b/R/get-osmdata-sp.R @@ -71,7 +71,11 @@ osmdata_sp <- function (q, doc, quiet = TRUE) { obj$osm_multipolygons <- res$multipolygons osm_items <- grep ("^osm_", names (obj)) - obj[osm_items] <- fix_columns_list (obj[osm_items]) + obj [osm_items] <- fix_columns_list (obj [osm_items]) + obj [osm_items] <- lapply (obj [osm_items], function (x) { + x@data <- setenc_utf8 (x@data) + x + }) class (obj) <- c (class (obj), "osmdata_sp") return (obj) diff --git a/R/get-osmdata.R b/R/get-osmdata.R index 3282216..0624576 100644 --- a/R/get-osmdata.R +++ b/R/get-osmdata.R @@ -344,3 +344,20 @@ get_center_from_cpp_output <- function (res, what = "points") { return (as.data.frame (this)) } + + +#' Set encoding to UTF-8 +#' +#' @param x a data.frame or a list. +#' +#' @return `x` with all the columns or items of type character with UTF-8 encoding set. +#' @noRd +setenc_utf8 <- function(x) { + char_cols <- vapply (x, is.character, FUN.VALUE = logical (1)) + x [char_cols] <- lapply (x [char_cols], function (y) { + Encoding (y) <- "UTF-8" + y + }) + + return(x) +} diff --git a/codemeta.json b/codemeta.json index 24d3334..b9c2242 100644 --- a/codemeta.json +++ b/codemeta.json @@ -11,13 +11,13 @@ "codeRepository": "https://github.com/ropensci/osmdata/", "issueTracker": "https://github.com/ropensci/osmdata/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "0.2.5.018", + "version": "0.2.5.19", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.3.1 (2023-06-16)", + "runtimePlatform": "R version 4.4.1 (2024-06-14)", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization",