Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/ipeaGIT/geobr
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed Sep 10, 2023
2 parents 2d0daa2 + 8222e16 commit 78b8fb8
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 99 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
- {os: windows-latest, r: 'release'}
- {os: windows-latest, r: 'oldrel'}
- {os: macOS-latest, r: 'release'}
- {os: macOS-latest, r: 'oldrel'}
# - {os: macOS-latest, r: 'oldrel'}
- {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
Expand Down
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
/r-package/.Rhistory
/prep_data/.Rhistory
/data-raw/
data_prep/data
data_prep/data-raw

/data_prep/data$
/data_prep/data-raw$
/data_prep/_targets$

inst/doc
Meta
Expand Down
18 changes: 10 additions & 8 deletions data_prep/R/muni_clean.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
#' output: save clean data as geopackage
clean_muni <- function( muni_raw_paths ){

# year = 2000
# all_muni_raw_paths <- list.files(path = paste0('./data-raw/municipios/', year),
# year = 2021
# all_muni_raw_paths <- list.files(path = paste0('./data_raw/municipios/', year),
# pattern = '.rds',
# full.names = TRUE)
# f <- all_muni_raw_paths[15]

## 6666
# muni_raw_paths <- muni_raw_paths[1]
## 6666
# f <- all_muni_raw_paths[1]
#
# # 6666
# muni_raw_paths <- all_muni_raw_paths[1]
# # 6666


# detect corresponding year of files
Expand All @@ -41,6 +41,7 @@ clean_muni <- function( muni_raw_paths ){
# read raw file
temp_sf <- readRDS(f)
names(temp_sf) <- tolower(names(temp_sf))
# mapview(temp_sf)

# select columns
if (year %like% "2000|2001|2005") {
Expand Down Expand Up @@ -108,7 +109,8 @@ clean_muni <- function( muni_raw_paths ){

# Make any invalid geom valid
# st_is_valid( temp_sf)
temp_sf <- fix_topoly(temp_sf)
temp_sf2 <- fix_topoly(temp_sf)
# mapview(temp_sf2)

# strange error in SC 2000 but it could happen elsewhere
# remove geometries with area == 0
Expand Down
7 changes: 4 additions & 3 deletions data_prep/R/muni_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# 0. Download Raw zipped files for all years ---------------------------------
#' input: year
#' download raw data from source website to temp zip file
#' save raw data in .rds format in the data-raw dir
#' save raw data in .rds format in the data_raw dir
#' output: returns path to all raw files of that year
download_muni <- function(year){

Expand All @@ -13,7 +13,7 @@ download_muni <- function(year){
ftp <- "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/"

# create dir if it has not been created already
dest_dir <- paste0('./data-raw/municipios/', year)
dest_dir <- paste0('./data_raw/municipios/', year)
if (isFALSE(dir.exists(dest_dir))) { dir.create(dest_dir,
recursive = T,
showWarnings = FALSE) }
Expand Down Expand Up @@ -143,7 +143,7 @@ download_muni <- function(year){
# 1. read raw zipped file in temporary dir ---------------------------------
#' input: tempfile of raw data, temp dir of raw data, dest dir to save raw data
#' unzip and read raw data
#' output: save raw data in .rds format in the data-raw dir
#' output: save raw data in .rds format in the data_raw dir
muni_saveraw <- function(tempf, temp_dir, dest_dir) {

## 1.1 Unzip original data
Expand All @@ -159,6 +159,7 @@ muni_saveraw <- function(tempf, temp_dir, dest_dir) {
year <- detect_year_from_string(tempf)
year <- year[year != '2500']
year <- year[year != '0807']
year <- year[year != '1701']
year <- year[1]

# Encoding for different years
Expand Down
53 changes: 24 additions & 29 deletions data_prep/R/support_fun.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,32 @@ add_state_info <- function(temp_sf, column){
# IF only the "name_state" column is present
# Add code_state
if (!is.null(temp_sf$code_muni) & "name_state" %in% names(temp_sf) ) {
temp_sf <- dplyr::mutate(code_state = ifelse(name_state== "Rondonia" | name_state== "Território de Rondonia" | name_state== "Territorio de Rondonia",11,
ifelse(name_state== "Acre" | name_state== "Território do Acre",12,
temp_sf <- dplyr::mutate(code_state = ifelse(name_state== "Rondonia" | name_state== "Territ\u00f3rio de Rondonia" | name_state== "Territorio de Rondonia",11,
ifelse(name_state== "Acre" | name_state== "Territ\u00f3rio do Acre",12,
ifelse(name_state== "Amazonas",13,
ifelse(name_state== "Roraima" | name_state=="Território de Roraima",14,
ifelse(name_state== "Pará",15,
ifelse(name_state== "Amapá" | name_state=="Territorio do Amapa",16,
ifelse(name_state== "Roraima" | name_state=="Territ\u00f3rio de Roraima",14,
ifelse(name_state== "Par\u00e1",15,
ifelse(name_state== "Amap\u00e1" | name_state=="Territorio do Amapa",16,
ifelse(name_state== "Tocantins",17,
ifelse(name_state== "Maranhão",21,
ifelse(name_state== "Maranh\u00e3o",21,
ifelse(name_state== "Piaui" | name_state== "Piauhy",22,
ifelse(name_state== "Ceará",23,
ifelse(name_state== "Cear\u00e1",23,
ifelse(name_state== "Rio Grande do Norte",24,
ifelse(name_state== "Paraiba" | name_state== "Parahyba",25,
ifelse(name_state== "Pernambuco",26,
ifelse(name_state== "Alagoas" | name_state=="Alagôas",27,
ifelse(name_state== "Alagoas" | name_state=="Alag\u00f4as",27,
ifelse(name_state== "Sergipe",28,
ifelse(name_state== "Bahia",29,
ifelse(name_state== "Minas Gerais" | name_state== "Minas Geraes",31,
ifelse(name_state== "Espirito Santo" | name_state== "Espirito Santo",32,
ifelse(name_state== "Rio de Janeiro",33,
ifelse(name_state== "São Paulo",35,
ifelse(name_state== "Paraná",41,
ifelse(name_state== "S\u00e3o Paulo",35,
ifelse(name_state== "Paran\u00e1",41,
ifelse(name_state== "Santa Catarina" | name_state== "Santa Catharina",42,
ifelse(name_state== "Rio Grande do Sul",43,
ifelse(name_state== "Mato Grosso do Sul",50,
ifelse(name_state== "Mato Grosso" | name_state== "Matto Grosso",51,
ifelse(name_state== "Goiás" | name_state== "Goyaz",52,
ifelse(name_state== "Goi\u00e1s" | name_state== "Goyaz",52,
ifelse((name_state== "Distrito Federal" | name_state=="Brasilia") & (year>1950),53,NA
))))))))))))))))))))))))))))
}
Expand All @@ -63,39 +63,36 @@ add_state_info <- function(temp_sf, column){
temp_sf$code_state <- substr( temp_sf[[ column ]] , 1,2) |> as.numeric()

# # add name_state ENCODING ISSUES
# stringi::stri_encode(from='latin1', to="utf8", str= "São Paulo")
# stringi::stri_encode(from='latin1', to="utf8", str= "S\u00e3o Paulo")
# stringi::stri_encode('S\u00e3o Paulo', to="UTF-8")
# gtools::ASCIIfy('São Paulo')
# gtools::ASCIIfy('S\u00e3o Paulo')
temp_sf <- temp_sf |> dplyr::mutate(name_state =
data.table::fcase(code_state== 11, utf8::as_utf8("Rondônia"),
data.table::fcase(code_state== 11, "Rond\u00f4nia",
code_state== 12, "Acre",
code_state== 13, "Amazonas",
code_state== 14, "Roraima",
code_state== 15, utf8::as_utf8("Pará"),
code_state== 16, utf8::as_utf8("Amapá"),
code_state== 15, "Par\u00e1",
code_state== 16, "Amap\u00e1",
code_state== 17, "Tocantins",
code_state== 21, utf8::as_utf8("Maranhão"),
code_state== 22, utf8::as_utf8("Piauí"),
code_state== 23, utf8::as_utf8("Ceará"),
code_state== 21, "Maranh\u00e3o",
code_state== 22, "Piau\u00ed",
code_state== 23, "Cear\u00e1",
code_state== 24, "Rio Grande do Norte",
code_state== 25, utf8::as_utf8("Paraíba"),
code_state== 25, "Para\u00edba",
code_state== 26, "Pernambuco",
code_state== 27, "Alagoas",
code_state== 28, "Sergipe",
code_state== 29, "Bahia",
code_state== 31, "Minas Gerais",
code_state== 32, "Espirito Santo",
code_state== 33, "Rio de Janeiro",
# code_state== 35, stringi::stri_encode(from='latin1', to="utf8", str="São Paulo"),
# code_state== 35, stringi::stri_escape_unicode("São Paulo"),
# OK code_state== 35, stringi::stri_encode('S\u00e3o Paulo', to="UTF-8"),
code_state== 35, utf8::as_utf8("São Paulo"),
code_state== 41, utf8::as_utf8("Paraná"),
code_state== 35, "S\u00e3o Paulo",
code_state== 41, "Paran\u00e1",
code_state== 42, "Santa Catarina",
code_state== 43, "Rio Grande do Sul",
code_state== 50, "Mato Grosso do Sul",
code_state== 51, "Mato Grosso",
code_state== 52, utf8::as_utf8("Goiás"),
code_state== 52, "Goi\u00e1s",
code_state== 53, "Distrito Federal",
default = NA))
}
Expand Down Expand Up @@ -276,7 +273,7 @@ to_multipolygon <- function(temp_sf){
fix_topoly <- function(temp_sf){

temp_sf <- sf::st_make_valid(temp_sf)
temp_sf <- sf::st_buffer(temp_sf, dist = 0)
# temp_sf <- sf::st_buffer(temp_sf, dist = 0)

return(temp_sf)
}
Expand Down Expand Up @@ -367,8 +364,6 @@ remove_state_repetition <- function(temp_sf){
}




#####fixing municipality repetition---------

# https://github.com/ipeaGIT/geobr/blob/49534a6b19dc765e43e4c2f4404342f4fd0fdb4e/r-package/prep_data/prep_state_muni_regions.R#L987
12 changes: 6 additions & 6 deletions data_prep/_targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ list(
# # 2013, 2014, 2015, 2016, 2017,
# 2018, 2019, 2020, 2021, 2022)),

tar_target(years_muni, c(2000, 2001,
2005, 2007,
2010 ,
2013,
2014, 2015, 2016, 2017,
2018, 2019, 2020,
tar_target(years_muni, c(#2000, 2001,
#2005, 2007,
#2010 ,
#2013,
#2014, 2015, 2016, 2017,
#2018, 2019, 2020,
2021,
2022
)
Expand Down
Loading

0 comments on commit 78b8fb8

Please sign in to comment.