diff --git a/README.md b/README.md index 58b77ce2..afa909be 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ More examples [here](https://github.com/ipeaGIT/geobr/tree/master/python-package |`read_conservation_units` | Environmental Conservation Units | 201909 | MMA | |`read_disaster_risk_area` | Disaster risk areas | 2010 | CEMADEN and IBGE | |`read_indigenous_land` | Indigenous lands | 201907, 202103 | FUNAI | -|`read_semiarid` | Semi Arid region | 2005, 2017 | IBGE | +|`read_semiarid` | Semi Arid region | 2005, 2017, 2021, 2022 | IBGE | |`read_health_facilities` | Health facilities | 201505, 202303 | CNES, DataSUS | |`read_health_region` | Health regions and macro regions | 1991, 1994, 1997, 2001, 2005, 2013 | DataSUS | |`read_neighborhood` | Neighborhood limits | 2010 | IBGE | diff --git a/data_prep/R/semiarid.R b/data_prep/R/semiarid.R new file mode 100644 index 00000000..f9dfbd23 --- /dev/null +++ b/data_prep/R/semiarid.R @@ -0,0 +1,173 @@ +#> DATASET: Brazilian semi-arid +#> Source: IBGE - https://www.ibge.gov.br/geociencias/cartas-e-mapas/mapas-regionais/15974-semiarido-brasileiro.html?=&t=downloads +#> Metadata: +# Titulo: Semiarido brasileiro +# Titulo alternativo: Semiarido brasileiro +# Frequencia de atualizacao: ? +# +# Forma de apresentacao: Shape +# Linguagem: Pt-BR +# Character set: Utf-8 +# +# Resumo: Poligonos e Pontos do semiarido brasileiro. +# Informacoes adicionais: Dados produzidos pelo IBGE com base em decretos administrativos do Ministério da Integração Nacional. +# -"Resolução nº 115 do Ministério da Integração Nacional, de 23 de novembro de 2017" +# -"Portaria N°89 de 16 de março de 2005, do Ministério da Integração Nacional" +# Proposito: Identificao do semiarido brasileiro. + +# Estado: Em desenvolvimento +# Informacao do Sistema de Referencia: SIRGAS 2000 + + + + +####### Load Support functions to use in the preprocessing of the data + +source("./R/support_fun.R") + + + +prep_semiarid <- function(year){ # year = 2022 + + + ###### 0. Create Root folder to save the data ----------------- + + # Directory to keep raw zipped files + dir.create("./semiarid") + dir_raw <- paste0("./data_raw/semiarid/", year) + dir.create(dir_raw, recursive = T) + + + # Create folders to save clean sf.rds files + dir_clean <- paste0("./data/semiarid/", year) + dir.create(dir_clean, recursive = T) + + + + #### 2. Download original data sets from source website ----------------- + + # get correct ftp url link + + if(year == 2005) { + ftp <- 'https://geoftp.ibge.gov.br/organizacao_do_territorio/estrutura_territorial/semiarido_brasileiro/Situacao_2005a2017/lista_municipios_semiarido.xls' + } + + if (year == 2017) { + ftp <- 'https://geoftp.ibge.gov.br/organizacao_do_territorio/estrutura_territorial/semiarido_brasileiro/Situacao_23nov2017/lista_municipios_Semiarido_2017_11_23.xlsx' + } + + if (year == 2021) { + ftp <- 'https://geoftp.ibge.gov.br/organizacao_do_territorio/estrutura_territorial/semiarido_brasileiro/Situacao_2021/lista_municipios_Semiarido_2021.xls' + } + + if (year == 2022) { + ftp <- 'https://geoftp.ibge.gov.br/organizacao_do_territorio/estrutura_territorial/semiarido_brasileiro/Situacao_2022/lista_municipios_Semiarido_2022.xlsx' + } + + + + + # download file + file_raw <- paste0(dir_raw,"/", year, "_lista_municipios_semiarido.xlsx") + + # fix file extension + if (year %in% c(2005, 2021)){ + file_raw <- gsub( '.xlsx', '.xls', file_raw) + } + + + + httr::GET(url = ftp, + httr::progress(), + httr::write_disk(path = file_raw, + overwrite = T)) + + + + + #### 3. Clean data set ----------------- + + if (year==2005){ + # read IBGE data frame + munis_semiarid <- readxl::read_xls(path = file_raw, + skip = 1, n_max = 1133) + # Rename columns + munis_semiarid <- dplyr::select(munis_semiarid, + code_muni = `Código do Município`, + name_muni = `Nome do Município`) + } + + + + if (year==2017){ + # read IBGE data frame + munis_semiarid <- readxl::read_xlsx(path = file_raw, + skip = 1, n_max = 1262) + + # Rename columns + munis_semiarid <- dplyr::select(munis_semiarid, + code_muni = `Código do Município`, + name_muni = `Nome do Município`) + } + + + + + if (year==2021) { + # read IBGE data frame + munis_semiarid <- readxl::read_xls(path = file_raw, + n_max = 1263) + # Rename columns + munis_semiarid <- dplyr::select(munis_semiarid, + code_muni = CD_MUN, + name_muni = NM_MUN) + } + + + + if (year==2022) { + # read IBGE data frame + munis_semiarid <- readxl::read_xlsx(path = file_raw, + n_max = 1477) + # Rename columns + munis_semiarid <- dplyr::select(munis_semiarid, + code_muni = CD_MUN, + name_muni = NM_MUN) + } + + + + #### 3. Clean data set ----------------- + + # load all munis sf + all_munis <- geobr::read_municipality(code_muni = 'all', + year = year, + simplified = FALSE) + + # subset municipalities + temp_sf <- subset(all_munis, code_muni %in% munis_semiarid$code_muni) + + # Harmonize spatial projection CRS, using SIRGAS 2000 epsg (SRID): 4674 + temp_sf <- harmonize_projection(temp_sf) + + # Make any invalid geometry valid # st_is_valid( sf) + temp_sf <- fix_topoly(temp_sf) + + # 4 lighter version + temp_sf_simplified <- simplify_temp_sf(temp_sf, tolerance = 100) + + + + #### save data set ----------------- + + sf::st_write(temp_sf, dsn= paste0(dir_clean,"/semiarid_", year, ".gpkg"), delete_dsn=TRUE) + sf::st_write(temp_sf_simplified, dsn= paste0(dir_clean,"/semiarid_", year, "_simplified.gpkg"), delete_dsn=TRUE ) + + } + + + +prep_semiarid(2005) +prep_semiarid(2017) +prep_semiarid(2021) +prep_semiarid(2022) diff --git a/r-package/prep_data/prep_semiarid.R b/r-package/prep_data/prep_semiarid.R deleted file mode 100644 index 31c3ba64..00000000 --- a/r-package/prep_data/prep_semiarid.R +++ /dev/null @@ -1,177 +0,0 @@ -#> DATASET: Brazilian semi-arid -#> Source: IBGE - https://www.ibge.gov.br/geociencias/cartas-e-mapas/mapas-regionais/15974-semiarido-brasileiro.html?=&t=downloads -#> Metadata: -# Titulo: Semiarido brasileiro -# Titulo alternativo: Semiarido brasileiro -# Frequencia de atualizacao: ? -# -# Forma de apresentacao: Shape -# Linguagem: Pt-BR -# Character set: Utf-8 -# -# Resumo: Poligonos e Pontos do semiarido brasileiro. -# Informacoes adicionais: Dados produzidos pelo IBGE com base em decretos administrativos do Ministério da Integração Nacional. -# -"Resolução nº 115 do Ministério da Integração Nacional, de 23 de novembro de 2017" -# -"Portaria N°89 de 16 de março de 2005, do Ministério da Integração Nacional" -# Proposito: Identificao do semiarido brasileiro. - -# Estado: Em desenvolvimento -# Informacao do Sistema de Referencia: SIRGAS 2000 - -### Libraries (use any library as necessary) - -library(RCurl) -library(stringr) -library(sf) -library(dplyr) -library(readr) -library(data.table) -library(magrittr) -library(lwgeom) -library(stringi) - -####### Load Support functions to use in the preprocessing of the data - -source("./prep_data/prep_functions.R") - - - -###### 0. Create Root folder to save the data ----------------- - -# Root directory -root_dir <- "L:\\# DIRUR #\\ASMEQ\\geobr\\data-raw" -setwd(root_dir) - - -# Directory to keep raw zipped files -dir.create("./semiarid") -dir_raw_2005 <- paste0("./semiarid/", 2005) -dir_raw_2017 <- paste0("./semiarid/", 2017) - -dir.create(dir_raw_2005) -dir.create(dir_raw_2017) - -# Create folders to save clean sf.rds files -dir.create("./semiarid/shapes_in_sf_cleaned", showWarnings = FALSE) -dir_clean_2005 <- paste0("./semiarid/shapes_in_sf_cleaned/", 2005) -dir_clean_2017 <- paste0("./semiarid/shapes_in_sf_cleaned/", 2017) -dir.create(dir_clean_2005) -dir.create(dir_clean_2017) - - - -#### 2. Download original data sets from source website ----------------- - -# Download and read into CSV at the same time -ftp_2005 <- 'ftp://geoftp.ibge.gov.br/organizacao_do_territorio/estrutura_territorial/semiarido_brasileiro/Situacao_2005a2017/lista_municipios_semiarido.xls' -ftp_2017 <- 'ftp://geoftp.ibge.gov.br/organizacao_do_territorio/estrutura_territorial/semiarido_brasileiro/Situacao_23nov2017/lista_municipios_Semiarido_2017_11_23.xlsx' - - -# 2005 -download.file(url = ftp_2005, destfile = paste0(dir_raw_2005,"/","lista_municipios_semiarido.xls"), mode = 'wb') - -# httr::GET(url=ftp_2005, httr::progress(), -# httr::write_disk(paste0(dir_raw_2005,"/","lista_municipios_semiarido.xlsx"))) - -# 2017 -download.file(url = ftp_2017, - destfile = paste0(dir_raw_2017,"/","lista_municipios_semiarido.xlsx") , mode = 'wb') - - - -#### 3. 2005 Clean data set and save it in compact .rds format----------------- - -# read IBGE data frame -semi_arid_munis <- readxl::read_xls(path = paste0(dir_raw_2005,"/","lista_municipios_semiarido.xls"), - skip = 1, n_max = 1133) -semi_arid_munis <- as.data.frame(semi_arid_munis) - - -# Remove linha con info da fonte de dados -# semi_arid_munis[1263,1] -# semi_arid_munis <- na.exclude(semi_arid_munis) - - -# Rename columns -colnames(semi_arid_munis) <- c("code_state","name_state","code_muni","name_muni","year_muni") - - -# load all munis sf -all_munis <- geobr::read_municipality(code_muni = 'all', year=2005) - - - -# subset municipalities -semi_arid_sf <- subset(all_munis, code_muni %in% semi_arid_munis$code_muni) - - -# Harmonize spatial projection CRS, using SIRGAS 2000 epsg (SRID): 4674 -temp_sf <- harmonize_projection(temp_sf) - - -# Make any invalid geometry valid # st_is_valid( sf) -semi_arid_sf <- lwgeom::st_make_valid(semi_arid_sf) - - -# Save cleaned sf in the cleaned directory -setwd(root_dir) -readr::write_rds(semi_arid_sf, path= paste0(dir_clean_2005,"/semiarid_2005",".rds"), compress = "gz") - - - - -#### 3. 2017 Clean data set and save it in compact .rds format----------------- - - -# read IBGE data frame -semi_arid_munis <- readxl::read_xlsx(path = paste0(dir_raw_2017,"/","lista_municipios_semiarido.xlsx"), - skip = 1, n_max = 1262) -semi_arid_munis <- as.data.frame(semi_arid_munis) - -# Remove linha con info da fonte de dados -#semi_arid_munis[1263,1] -#semi_arid_munis <- na.exclude(semi_arid_munis) - - - -# Rename columns -colnames(semi_arid_munis) <- c("code_state","name_state","code_muni","name_muni","year_muni") - - -# load all munis sf -all_munis <- geobr::read_municipality(code_muni = 'all', year=2017) - - - -# subset municipalities -semi_arid_sf <- subset(all_munis, code_muni %in% semi_arid_munis$code_muni) - - -# Harmonize spatial projection CRS, using SIRGAS 2000 epsg (SRID): 4674 -semi_arid_sf <- if( is.na(st_crs(semi_arid_sf)) ){ st_set_crs(semi_arid_sf, 4674) } else { st_transform(semi_arid_sf, 4674) } -st_crs(semi_arid_sf) <- 4674 - -# Make any invalid geometry valid # st_is_valid( sf) -semi_arid_sf <- lwgeom::st_make_valid(semi_arid_sf) - - -###### convert to MULTIPOLYGON ----------------- -semi_arid_sf <- to_multipolygon(semi_arid_sf) - - - -###### 6. generate a lighter version of the dataset with simplified borders ----------------- -# skip this step if the dataset is made of points, regular spatial grids or rater data - -# simplify -semi_arid_sf_simplified <- st_transform(semi_arid_sf, crs=3857) %>% - sf::st_simplify(preserveTopology = T, dTolerance = 100) %>% - st_transform(crs=4674) -head(semi_arid_sf_simplified) - -# Save cleaned sf in the cleaned directory -setwd(root_dir) -readr::write_rds(semi_arid_sf, path= paste0(dir_clean_2017,"/semiarid_2017",".rds"), compress = "gz") -sf::st_write(semi_arid_sf, dsn= paste0(dir_clean_2017,"/semiarid_2017",".gpkg") ) -sf::st_write(semi_arid_sf_simplified, dsn= paste0(dir_clean_2017,"/semiarid_2017"," _simplified", ".gpkg")) - diff --git a/r-package/prep_data/update_metadata_table.R b/r-package/prep_data/update_metadata_table.R index ffec3a07..fe0f0f49 100644 --- a/r-package/prep_data/update_metadata_table.R +++ b/r-package/prep_data/update_metadata_table.R @@ -12,8 +12,11 @@ library(piggyback) ######### Step 1 - create github release where data will be uploaded to ---------------------- # https://docs.ropensci.org/piggyback/articles/intro.html # https://github.com/settings/tokens +# https://usethis.r-lib.org/articles/git-credentials.html + +usethis::edit_r_environ() # ttt +gitcreds::gitcreds_set() -usethis::edit_r_environ() # create new release pb_new_release("ipeaGIT/geobr", @@ -93,7 +96,7 @@ pb_new_release("ipeaGIT/geobr", # head(metadata) - metadata[geo=='municipality' & year==2022] + metadata[geo=='semiarid'] a <- metadata[geo=='health_facilities'] @@ -101,8 +104,8 @@ a <- metadata[geo=='health_facilities'] ######### Step 3 - upload data to github ---------------------- all_files <- list.files("//storage1/geobr/data_gpkg", full.names = T, recursive = T) - all_files <- all_files[all_files %like% 'census_tract'] - all_files <- all_files[all_files %like% '2022'] + all_files <- all_files[all_files %like% 'semiarid'] + # all_files <- all_files[all_files %like% '2022'] # upload data piggyback::pb_upload(all_files, @@ -164,7 +167,7 @@ piggyback::pb_upload(to_go, table(metadata$geo) table(metadata$year) - subset(metadata, geo == 'pop_arrengements') + subset(metadata, geo == 'semiarid') subset(metadata, geo == 'urban_concentrations') subset(metadata, geo == 'meso_region')[1:4,] subset(metadata, geo == 'micro_region')[1:4,] @@ -174,19 +177,14 @@ piggyback::pb_upload(to_go, # save updated metadata table # readr::write_csv(metadata,"//storage1/geobr/metadata/metadata_1.7.0_gpkg.csv") - # # upload updated metadata table github - # piggyback::pb_upload("//storage1/geobr/metadata/metadata_1.7.0_gpkg.csv", - # "ipeaGIT/geobr", - # "v1.7.0" - # #, .token = ttt - # ) - # + # upload updated metadata table github + piggyback::pb_upload("//storage1/geobr/metadata/metadata_1.7.0_gpkg.csv", + "ipeaGIT/geobr", + "v1.7.0" + # , .token = gh::gh_token() + ) + + -f <- list.files('//storage1/geobr/data_gpkg/census_tract/2022', full.names = T) -piggyback::pb_upload(f, - "ipeaGIT/geobr", - "v1.7.0" - #, .token = ttt - )