-
Notifications
You must be signed in to change notification settings - Fork 0
/
clean_vaconservation.R
48 lines (41 loc) · 2.14 KB
/
clean_vaconservation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
library(tidyverse)
# data ingest for species in hampton roads region -------------------------------
# https://vanhde.org/species-search - filter by 'Hampton Roads' under Planning District
# pull in data - see above for how to access
species = read_csv("data/attributes_2022_09_12_191055.csv") %>% janitor::clean_names()
species = species %>% dplyr::filter(common_name_natural_community!='Hampton Roads') # filter out extra row
# get the "species groups"
species = species %>% mutate(species_group_tmp = ifelse(str_detect(common_name_natural_community,"^[A-Z\\s]{2,}$"),common_name_natural_community,'')) %>%
mutate(rownum=1:n()) %>% ungroup()
# loop over dataframe to apply groups to all species
species_last=''
species_df=tibble()
for(i in 1:nrow(species)){
species_current = species %>% slice(i) %>% select(species_group_tmp) %>% pull()
rownum = species %>% slice(i) %>% select(rownum) %>% pull()
if(species_current!=''){
species_val = species_current
} else{
species_val = species_last
}
tmp_df = tibble(rownum,species_val)
species_df = rbind(species_df,tmp_df)
species_last = species_val
}
# bind output dataframe to original dataframe
species = species %>% left_join(species_df,by="rownum")
# filter out more data
species = species %>%
# filter out 'groups' rows
dplyr::filter(!is.na(virginia_coastal_zone)) %>%
# filter out streams, colonies, and cave
dplyr::filter(str_detect(scientific_name_linked,"a href"))
# split column into 2
species = species %>%
mutate(scientific_name_clean=str_extract(scientific_name_linked,"\\>.*\\<") %>% str_replace(.,"<","") %>% str_replace(.,">","")) %>%
mutate(scientific_name_url=str_extract(scientific_name_linked,'\\".*\\"\\s') %>% str_replace(.,'"',"") %>% str_replace(.,'"',"") %>% str_trim()) %>%
select(-c(scientific_name,scientific_name_linked,species_group_tmp,rownum)) %>% rename("scientific_name"="scientific_name_clean")
# rename and export
species = species %>% rename("common_name"="common_name_natural_community","species_grp"="species_val") %>%
select(common_name,scientific_name,scientific_name_url,species_grp,everything())
write_csv(species,"data/native_species_VAconservation.csv")