Skip to content

Latest commit

 

History

History
194 lines (153 loc) · 5.11 KB

README.md

File metadata and controls

194 lines (153 loc) · 5.11 KB

SIF 0.05º, BRAZIL

Costa, L.M and Panoso A.R

First considerations

This is a guide on how to process the data obtained on this platform, we will not use all the files available in this example because they are very extensive.

This is just the procedure applied to get to the annual databases presented in the data folder. In this folder you will have access to the data referring to this study already processed.

A table was generated for each year with the averages for each coordinate in the Brazilian territory, this table was used in the ArcGIS software to generate the variograms and use the Ordinary Kriging (O.K) interpolation.

The data generated by the variogram in Arcgis were submitted to cross validation, the script is available in the R folder, as well as the script used to generate the histograms.

Procedure

Extracting the data

In data-raw/ alocat all the .nc files, in pattern use the year that you want procedure. You can apply a filter for the region of your interest.

file_names <- list.files("data-raw/", pattern = "2015")

for( i in seq_along(file_names)){
  if(i==1){
    sif.005 <- raster::brick(paste0("data-raw/", file_names[i]))
    sif.005 <- raster::as.data.frame(sif.005[[1]],xy=T)
    sif.005 <- na.omit(sif.005)
    sif.005 <- sif.005 |> 
      dplyr::filter(
        x < -30,
        y < 10
        )
  }else{
    sif.005a <- raster::brick(paste0("data-raw/", file_names[i]))
    sif.005a <- raster::as.data.frame(sif.005a[[1]],xy=T)
    sif.005a <- na.omit(sif.005a)
    sif.005a <- sif.005a |> 
      dplyr::filter(
        x < -30,
        y < 10
        )
    sif.005 <- rbind(sif.005,sif.005a)
  }
}

Filter for Brazil

First vizualization of data

sif.005 |> 
  ggplot2::ggplot(ggplot2::aes(x,y))+
  ggplot2::geom_point()

Now we can create the polygon for Brazil and making some necessary adjustments for the country, and after that we can filter the data for the country.

br <- geobr::read_country(showProgress = FALSE)
region <- geobr::read_region(showProgress = FALSE)

pol_br <- br$geom |> purrr::pluck(1) |> as.matrix()
pol_br <- pol_br[pol_br[,1]<=-34,]
pol_br <- pol_br[!((pol_br[,1]>=-38.8 & pol_br[,1]<=-38.6) &
                              (pol_br[,2]>= -19 & pol_br[,2]<= -16)),]

pol_NE <- region$geom |> purrr::pluck(2) |> as.matrix()# pol North Easth
pol_NE <- pol_NE[pol_NE[,1]<=-34,]
pol_NE <- pol_NE[!((pol_NE[,1]>=-38.7 & pol_NE[,1]<=-38.6) & pol_NE[,2]<=-15),]

The logical function for filter the points that exist in brazil

def_pol <- function(x, y, pol){
  as.logical(sp::point.in.polygon(point.x = x,
                                  point.y = y,
                                  pol.x = pol[,1],
                                  pol.y = pol[,2]))
}

sif.005 <- sif.005 |> 
  dplyr::mutate(
    flag_br = def_pol(x,y,pol_br),
    flag_NE = def_pol(x,y,pol_NE)
    )
br |>
  ggplot2::ggplot() +
  ggplot2::geom_sf(fill="#2D3E50", color="#FEBF57",
          size=.15, show.legend = FALSE)+
  ggplot2::geom_point(data=sif.005 |> 
                      dplyr::filter(flag_br|flag_NE),
             ggplot2::aes(x=x,y=y),
             shape=3,
             col="red",
             alpha=0.2)

sif.005 <- sif.005 |> 
  dplyr::filter(flag_br|flag_NE)

Outliers

Checking for outliers

sif.005 |> 
  ggplot2:: ggplot(ggplot2::aes(y=layer))+
  ggplot2::geom_boxplot(fill="green",outlier.colour = "black")+
  ggplot2::coord_cartesian(xlim=c(-1,1))+
  ggplot2::theme_classic()

Creating a function and applaying on the data

remove_outlier <- function(x,coefi=1.5){
  med = median(x)
  li = med - coefi*IQR(x)
  ls = med + coefi*IQR(x)
  c(Lim_Inf = li, Lim_Sup = ls)
}

sif.005 <-  sif.005 |> 
  dplyr::filter(
    layer > remove_outlier(sif.005$layer)[1] &
      layer < remove_outlier(sif.005$layer)[2]
  )

Creating the mean file for Arcgis

sif.005 <- sif.005 |> 
  dplyr::mutate(coord = stringr::str_c(x,y, sep= ":")) |> 
  dplyr::group_by(coord) |> 
  dplyr::summarise(sif = mean(layer))

coords = data.frame(stringr::str_split(sif.005$coord, ":", n=Inf,simplify = T))

sif.005<- sif.005 |> 
  dplyr::mutate(
    lon = coords$X1,
    lat = coords$X2) |> 
  dplyr::select(lon, lat, sif)

head(sif.005)
## # A tibble: 6 x 3
##   lon              lat                 sif
##   <chr>            <chr>             <dbl>
## 1 -34.824999999967 -7.22500000000471 0.310
## 2 -34.824999999967 -7.27500000000471 0.218
## 3 -34.824999999967 -7.32500000000471 0.328
## 4 -34.824999999967 -7.3750000000047  0.278
## 5 -34.824999999967 -7.4250000000047  0.287
## 6 -34.824999999967 -7.4750000000047  0.229

Now you can save the file in csv on the data folder and use this data to make the O.K