Skip to content

Commit

Permalink
neighborhoods_2022
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed Nov 15, 2024
1 parent 8aa6650 commit 189fcb6
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 44 deletions.
74 changes: 30 additions & 44 deletions data_prep/R/census_tract_2022.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,60 +18,46 @@ dir.create(dest_dir, recursive = T)

if(year == 2022){

ftp <- 'https://ftp.ibge.gov.br/Censos/Censo_Demografico_2022/Agregados_por_Setores_Censitarios_preliminares/malha_com_atributos/setores/gpkg/BR/BR_Malha_Preliminar_2022.zip'
# ftp <- 'https://ftp.ibge.gov.br/Censos/Censo_Demografico_2022/Agregados_por_Setores_Censitarios_preliminares/malha_com_atributos/setores/gpkg/BR/BR_Malha_Preliminar_2022.zip'
ftp <- 'https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2022/setores/gpkg/BR/BR_setores_CD2022.gpkg'
dest_file <- download_file(file_url = ftp, dest_dir = raw_dir)

}


#### 1. unzip -----------------

temp_dir <- tempdir()

unzip(dest_file, exdir = temp_dir)
local_file <- list.files(temp_dir, full.names = T, pattern = 'gpkg')



# read and save original raw data
df <- sf::st_read(local_file)
saveRDS(df, paste0(raw_dir,'/BR_Malha_Preliminar_2022.rds'))





#### 1. clean and save data -----------------
df <- readRDS(paste0(raw_dir,'/BR_Malha_Preliminar_2022.rds'))

temp_sf <- dplyr::select(df,
code_tract = CD_SETOR,
code_muni = CD_MUN,
name_muni = NM_MUN,
code_subdistrict = CD_SUBDIST,
name_subdistrict = NM_SUBDIST,
code_district = CD_DIST,
name_district = NM_DIST,
code_urban_concentration = CD_CONCURB,
name_urban_concentration = NM_CONCURB,
code_state = CD_UF,
name_state = NM_UF,
code_micro = CD_MICRO,
name_micro = NM_MICRO,
code_meso = CD_MESO,
name_meso = NM_MESO,
code_immediate = CD_RGI,
name_immediate = NM_RGI,
code_intermediate = CD_RGINT,
name_intermediate = NM_RGINT,
code_region = CD_REGIAO,
name_region = NM_REGIAO
)
head(temp_sf)

df <- sf::st_read(paste0(raw_dir,'/BR_setores_CD2022.gpkg'))

temp_sf <- dplyr::select(
df,
code_tract = CD_SETOR,
code_muni = CD_MUN,
name_muni = NM_MUN,
name_neighborhood = NM_BAIRRO,
code_neighborhood = CD_BAIRRO,
code_subdistrict = CD_SUBDIST,
name_subdistrict = NM_SUBDIST,
code_district = CD_DIST,
name_district = NM_DIST,
code_urban_concentration = CD_CONCURB,
name_urban_concentration = NM_CONCURB,
code_state = CD_UF,
name_state = NM_UF,
code_immediate = CD_RGI,
name_immediate = NM_RGI,
code_intermediate = CD_RGINT,
name_intermediate = NM_RGINT,
code_region = CD_REGIAO,
name_region = NM_REGIAO,
situacao = SITUACAO,
code_situacao = CD_SIT,
code_type = CD_TIPO,
area_km2 = AREA_KM2
)

# remove P from code tract
temp_sf <- mutate(temp_sf, code_tract = gsub("P","", code_tract))
head(temp_sf)


Expand Down
104 changes: 104 additions & 0 deletions data_prep/R/neighborhoods_2022.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
library(sf)
library(data.table)
library(dplyr)
library(furrr)

year <- 2022

# create dest dir
raw_dir <- paste0('./data_raw/neighborhoods/',year)
dest_dir <- paste0('./data/neighborhoods/',year)
dir.create(raw_dir, recursive = T)
dir.create(dest_dir, recursive = T)




#### 0. Download original data sets from IBGE ftp -----------------

if(year == 2022){

ftp <- 'https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_de_setores_censitarios__divisoes_intramunicipais/censo_2022/bairros/gpkg/BR/BR_bairros_CD2022.gpkg'
dest_file <- download_file(file_url = ftp, dest_dir = raw_dir)

}



#### 1. clean and save data -----------------
df <- sf::st_read(paste0(raw_dir,'/BR_bairros_CD2022.gpkg'))

temp_sf <- dplyr::select(
df,
code_muni = CD_MUN,
name_muni = NM_MUN,
name_neighborhood = NM_BAIRRO,
code_neighborhood = CD_BAIRRO,
code_subdistrict = CD_SUBDIST,
name_subdistrict = NM_SUBDIST,
code_district = CD_DIST,
name_district = NM_DIST,
code_urban_concentration = CD_CONCURB,
name_urban_concentration = NM_CONCURB,
code_immediate = CD_RGI,
name_immediate = NM_RGI,
code_intermediate = CD_RGINT,
name_intermediate = NM_RGINT,
code_state = CD_UF,
name_state = NM_UF,
code_region = CD_REGIAO,
name_region = NM_REGIAO
)

head(temp_sf)




# make all name columns as character
all_cols <- names(temp_sf)
char_cols <- all_cols[all_cols %like% 'name_']
temp_sf <- mutate(temp_sf, across(all_of(char_cols), as.character))

# make all columns as character
num_cols <- all_cols[all_cols %like% 'code_']
temp_sf <- mutate(temp_sf, across(all_of(num_cols), as.numeric))

# int_cols <- c('code_state', 'code_region', 'code_immediate', 'code_intermediate')
# temp_sf <- mutate(temp_sf, across(all_of(int_cols), as.integer))


# remove lagoa dos patos e mirim
temp_sf <- subset(temp_sf, code_muni != 430000100000000)
temp_sf <- subset(temp_sf, code_muni != 430000200000000)


# Use UTF-8 encoding
temp_sf <- use_encoding_utf8(temp_sf)

# Harmonize spatial projection CRS, using SIRGAS 2000 epsg (SRID): 4674
temp_sf <- harmonize_projection(temp_sf)
gc()




# harmonize and save
temp_sf <- fix_topoly(temp_sf)

# convert to MULTIPOLYGON
temp_sf <- to_multipolygon(temp_sf)


# simplify
temp_sf_simplified <- simplify_temp_sf(temp_sf, tolerance = 10)
temp_sf_simplified <- fix_topoly(temp_sf_simplified)

# Save cleaned sf in the cleaned directory
sf::st_write(temp_sf, paste0(dest_dir,'/', 'neighborhoods_', year, '.gpkg'))
sf::st_write(temp_sf_simplified, paste0(dest_dir,'/', 'neighborhoods_', year, '_simplified.gpkg'))





0 comments on commit 189fcb6

Please sign in to comment.