-
Notifications
You must be signed in to change notification settings - Fork 0
/
cov_ positivity_rates_by_regions_brasil.R
164 lines (130 loc) · 6.58 KB
/
cov_ positivity_rates_by_regions_brasil.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
## Installing necessary packages
# List of required packages
pacotes <- c("plotly","tidyverse","knitr","kableExtra","cowplot", "ggplot2","dplyr", "lubridate", "tidyverse")
# Check if the packages are already installed
if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
# Install packages that are not yet installed
instalador <- pacotes[!pacotes %in% installed.packages()]
for(i in 1:length(instalador)) {
# Install missing packages
install.packages(instalador, dependencies = T)
break()}
# Load all packages after installation
sapply(pacotes, require, character = T)
} else {
# Load all packages if already installed
sapply(pacotes, require, character = T)
}
# Function to load/install multiple packages
ipak <- function(pkg){
# Identify packages that are not installed
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
# Install missing packages if necessary
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
# Load all packages
sapply(pkg, require, character.only = TRUE)
}
# List of required packages
packages <- c("plotly","tidyverse","knitr","kableExtra","cowplot", "ggplot2","dplyr", "lubridate", "tidyverse")
# Use ipak function to ensure packages are loaded/installed
ipak(pacotes)
### Set working directory
setwd("/home/gabriela/Documentos/CEVIVAS_COV/Solicitações/Paper_covid/positividade/")
## Print the current working directory
getwd()
# Load CSV files for metadata from 2021 and 2022
metadata_2021 <-read.csv(file = "INFLUD21-21-11-2022.csv", header = T, sep = ';')
metadata_2022 <-read.csv(file = "INFLUD22-21-11-2022.csv", header = T, sep = ';')
# Preview the 'CLASSI_FIN' and 'DT_COLETA' columns of the metadata
head(metadata_2021$CLASSI_FIN)
head(metadata_2021$DT_COLETA)
head(metadata_2022$CLASSI_FIN)
head(metadata_2022$DT_COLETA)
# Check the data types of 'DT_COLETA' and 'CLASSI_FIN'
str(metadata_2021$DT_COLETA) # Needs to be converted to date format
str(metadata_2021$CLASSI_FIN) # This is already correct
str(metadata_2022$DT_COLETA) # Needs to be converted to date format
str(metadata_2022$CLASSI_FIN) # This is correct
# Select only the 'DT_COLETA' and 'CLASSI_FIN' columns from both datasets
meta_2021 = metadata_2021[,c("DT_COLETA", "CLASSI_FIN")]
meta_2022 = metadata_2022[,c("DT_COLETA", "CLASSI_FIN")]
# Filter the data to include only COVID-19 cases (classified as '5')
meta_2021_2 = meta_2021[meta_2021$CLASSI_FIN %in% c("5"), ]
meta_2022_2 = meta_2022[meta_2022$CLASSI_FIN %in% c("5"), ]
# Convert 'DT_COLETA' column to date format
meta_2021_2$DT_COLETA_NEW <- as.Date(meta_2021_2$DT_COLETA, format = "%d/%m/%Y")
str(meta_2021_2$DT_COLETA_NEW)
meta_2022_2$DT_COLETA_NEW <- as.Date(meta_2022_2$DT_COLETA, format = "%d/%m/%Y")
str(meta_2022_2$DT_COLETA_NEW)
# Select relevant columns and remove rows with missing data
meta_2021_col = meta_2021_2[,c(2,3)] %>% drop_na()
meta_2022_col = meta_2022_2[,c(2,3)] %>% drop_na()
# Combine 2021 and 2022 data
positividade_data <- rbind(meta_2021_col, meta_2022_col)
# Filter the data for a specific date range
positividade_data <- positividade_data %>% filter(between(DT_COLETA_NEW, as.Date('2021-11-25'), as.Date('2022-11-13')))
# Group data by week and calculate the count of cases
positividade_group <- positividade_data %>% group_by(week = cut(DT_COLETA_NEW, "week"), CLASSI_FIN) %>% tally()
# Separate SRA data for 2021 and 2022
p1_sra_2021 = metadata_2021[,c("DT_COLETA", "CLASSI_FIN")]
p1_sra_2022 = metadata_2022[,c("DT_COLETA", "CLASSI_FIN")]
# Convert the collection date to date format
p1_sra_2021$DT_COLETA_NEW <- as.Date(p1_sra_2021$DT_COLETA, format = "%d/%m/%Y")
str(p1_sra_2021$DT_COLETA_NEW)
p1_sra_2022$DT_COLETA_NEW <- as.Date(p1_sra_2022$DT_COLETA, format = "%d/%m/%Y")
str(p1_sra_2022$DT_COLETA_NEW)
# Remove rows with missing values
p1_sra_2021 = na.omit(p1_sra_2021)
p1_sra_2022 = na.omit(p1_sra_2022)
# Select the relevant columns and filter by date range
p1_sra_2021 = p1_sra_2021[,c("DT_COLETA_NEW", "CLASSI_FIN")] %>% filter(between(DT_COLETA_NEW, as.Date('2021-11-25'), as.Date('2022-11-13')))
p1_sra_2022 = p1_sra_2022[,c("DT_COLETA_NEW", "CLASSI_FIN")] %>% filter(between(DT_COLETA_NEW, as.Date('2021-11-25'), as.Date('2022-11-13')))
# Combine the SRA data from 2021 and 2022
all_sra <- rbind(p1_sra_2021, p1_sra_2022)
str(all_sra)
# Group the combined SRA data by week and count the cases
all_sra2 <- all_sra %>% group_by(week = cut(DT_COLETA_NEW, "week"), CLASSI_FIN) %>% tally()
# Calculate the total number of tests per week
all_sra3 <- all_sra2 %>% group_by(week) %>%
mutate(testes_total= sum(n))
# Merge COVID positivity data with total tests by week
join_positividade_group_all_sra3 <- left_join(positividade_group, all_sra3, by = "week")
# Calculate positivity score (positive cases / total tests)
join_positividade_group_all_sra3$score_percen = (join_positividade_group_all_sra3$n.x/join_positividade_group_all_sra3$testes_total)
# Convert 'week' to Date format for plotting
join_positividade_group_all_sra3$week = as.Date(join_positividade_group_all_sra3$week)
str(join_positividade_group_all_sra3)
# Save the combined data to a CSV file
write.csv(join_positividade_group_all_sra3,"positividade_SRA.csv", row.names = FALSE)
# Plot the positivity rate over time
line <- ggplot(join_positividade_group_all_sra3) +
geom_line(aes(x = week, y = score_percen)) +
scale_y_continuous(position = "right", labels = scales::percent, name = "Percent rate") +
scale_x_date(date_breaks = "1 month", date_minor_breaks = "1 week",
date_labels = "%B %Y") + theme_classic() +
labs(title = "",x= "" , y = "Number of cases") +
theme(
axis.title.y = element_text(size = 14),
axis.text=element_text(size=10),
plot.title = element_text(hjust = 0.5, size = 16),
legend.title = element_text(size=12),
legend.text = element_text(size=10),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()
)
line
# Modify plot to make background transparent
p <- line + theme( rect = element_rect(fill = "transparent"))
p <- p +
theme(
panel.background = element_rect(fill = "transparent"), # Panel background
plot.background = element_rect(fill = "transparent", color = NA), # Plot background
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
legend.background = element_rect(fill = "transparent"), # Legend background
legend.box.background = element_rect(fill = "transparent") # Legend box background
)
# Save plot as PNG with transparent background
ggsave("test.png", plot= p, bg = "transparent", dpi=300, width = 8, height = 4)