Skip to content

Commit

Permalink
R CMD check passing - no warnings or errors
Browse files Browse the repository at this point in the history
  • Loading branch information
sdgamboa committed Mar 22, 2024
1 parent 551aa11 commit 4a898bb
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 95 deletions.
74 changes: 37 additions & 37 deletions R/bacdive.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,18 @@
.getTidyBD <- function(bacdive_data) {
bacdive_data |>
tidyr::pivot_longer(
cols = .data$gram_stain:tidyr::last_col(), # Attributes start in the gram_stain column
cols = gram_stain:tidyr::last_col(), # Attributes start in the gram_stain column
names_to = 'Attribute', values_to = 'Attribute_value'
) |>
dplyr::filter(.data$Attribute_value != '') |>
dplyr::mutate(Attribute = gsub('_', ' ', .data$Attribute)) |>
dplyr::filter(Attribute_value != '') |>
dplyr::mutate(Attribute = gsub('_', ' ', Attribute)) |>
dplyr::mutate(
Attribute = dplyr::case_when(
.data$Attribute == 'oxygen tolerance' ~ 'aerophilicity',
.data$Attribute == 'cell shape' ~ 'shape',
.data$Attribute == 'pathogenicity animal' ~ 'animal pathongen',
.data$Attribute == 'sample type' ~ 'isolation site',
TRUE ~ .data$Attribute
Attribute == 'oxygen tolerance' ~ 'aerophilicity',
Attribute == 'cell shape' ~ 'shape',
Attribute == 'pathogenicity animal' ~ 'animal pathongen',
Attribute == 'sample type' ~ 'isolation site',
TRUE ~ Attribute
)
) |>
dplyr::distinct()
Expand Down Expand Up @@ -163,42 +163,42 @@
)
regex <- paste0('(', paste0(valid_terms, collapse = '|'), ')')
split_df[['halophily']] <- split_df[['halophily']] |>
dplyr::mutate(Attribute_value = strsplit(.data$Attribute_value, ';')) |>
dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |>
tidyr::unnest(cols = 'Attribute_value') |>
dplyr::filter(!grepl('no growth', .data$Attribute_value)) |>
dplyr::filter(!grepl('no growth', Attribute_value)) |>
dplyr::mutate(
Attribute_value = stringr::str_squish(.data$Attribute_value),
Attribute_value = sub('NaCL', 'NaCl', .data$Attribute_value),
Attribute_value = sub('Marine', 'Sea', .data$Attribute_value),
Attribute_value = sub('Salts', 'salts', .data$Attribute_value)
Attribute_value = stringr::str_squish(Attribute_value),
Attribute_value = sub('NaCL', 'NaCl', Attribute_value),
Attribute_value = sub('Marine', 'Sea', Attribute_value),
Attribute_value = sub('Salts', 'salts', Attribute_value)
) |>
dplyr::filter(grepl(regex, .data$Attribute_value)) |>
dplyr::filter(grepl(regex, Attribute_value)) |>
dplyr::mutate(
Attribute = stringr::str_extract(.data$Attribute_value, regex),
Unit = .data$Attribute_value |>
Attribute = stringr::str_extract(Attribute_value, regex),
Unit = Attribute_value |>
stringr::str_extract(' [<>]??[0-9]+\\.??[0-9]*.*') |>
stringr::str_squish() |>
stringr::str_remove('^.* '),
Attribute_value = .data$Attribute_value |>
Attribute_value = Attribute_value |>
stringr::str_extract(' [<>]??[0-9]+\\.??[0-9]*.*') |>
stringr::str_squish() |>
stringr::str_remove(' .*$'),
Attribute_group = 'halophily',
Attribute_type = 'range'
) |>
dplyr::filter(!grepl('[0-9]', .data$Unit)) |>
dplyr::filter(!grepl('[0-9]', Unit)) |>
dplyr::distinct()

## hemolysis ####
split_df[['hemolysis']] <- split_df[['hemolysis']] |>
dplyr::mutate(
Attribute_value = strsplit(.data$Attribute_value, ';|/')
Attribute_value = strsplit(Attribute_value, ';|/')
) |>
tidyr::unnest(.data$Attribute_value) |>
dplyr::mutate(Attribute_value = stringr::str_squish(.data$Attribute_value)) |>
dplyr::filter(.data$Attribute_value != '') |>
tidyr::unnest(Attribute_value) |>
dplyr::mutate(Attribute_value = stringr::str_squish(Attribute_value)) |>
dplyr::filter(Attribute_value != '') |>
dplyr::mutate(
Attribute = .data$Attribute_value,
Attribute = Attribute_value,
Attribute_value = TRUE,
Attribute_group = 'hemolysis',
Attribute_type = 'multistate-intersection'
Expand All @@ -212,8 +212,8 @@
split_df[['motility']] <- split_df[['motility']] |>
dplyr::mutate(
Attribute_value = dplyr::case_when(
.data[['Attribute_value']] == 'yes' ~ TRUE,
.data[['Attribute_value']] == 'no' ~ FALSE
Attribute_value == 'yes' ~ TRUE,
Attribute_value == 'no' ~ FALSE
)
)
split_df[['motility']][['Attribute_group']] <- 'motility'
Expand All @@ -232,8 +232,8 @@
## metabolite production ####
mp <- split_df[['metabolite production']]
mp <- mp |>
dplyr::mutate(Attribute_value = strsplit(.data$Attribute_value, ';')) |>
tidyr::unnest(.data$Attribute_value)
dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |>
tidyr::unnest(Attribute_value)
x <- stringr::str_extract(mp[['Attribute_value']], '(yes|no)$')
mp <- mp[which(!is.na(x)),]
y <- stringr::str_extract(mp[['Attribute_value']], '(yes|no)$')
Expand All @@ -249,9 +249,9 @@
names(split_df)[pos] <- 'metabolite utilization'
mu <- split_df[['metabolite utilization']]
mu <- mu |>
dplyr::mutate(Attribute_value = strsplit(.data$Attribute_value, ';')) |>
tidyr::unnest(.data$Attribute_value) |>
dplyr::mutate(Attribute_value = stringr::str_squish(.data$Attribute_value))
dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |>
tidyr::unnest(Attribute_value) |>
dplyr::mutate(Attribute_value = stringr::str_squish(Attribute_value))
x <- sub('^.* (\\+|-|\\+/-) *.*$', '\\1', mu[['Attribute_value']])
y <- ifelse(!x %in% c('+', '-', '+/-'), NA, x)
mu <- mu[which(!is.na(y)),]
Expand All @@ -265,9 +265,9 @@
y == '+/-' ~ 'TRUE/FALSE'
)
mu <- mu |>
dplyr::mutate(Attribute_value = strsplit(.data$Attribute_value, '/')) |>
tidyr::unnest(.data$Attribute_value) |>
dplyr::mutate(Attribute_value = as.logical(.data$Attribute_value))
dplyr::mutate(Attribute_value = strsplit(Attribute_value, '/')) |>
tidyr::unnest(Attribute_value) |>
dplyr::mutate(Attribute_value = as.logical(Attribute_value))
mu[['Attribute_group']] <- 'metabolite utilization'
mu[['Attribute_type']] <- 'multistate-intersection'
split_df[['metabolite utilization']] <- mu
Expand All @@ -277,13 +277,13 @@
sf <- sf |>
dplyr::mutate(
Attribute_value = dplyr::case_when(
.data[['Attribute_value']] == 'yes' ~ TRUE,
.data[['Attribute_value']] == 'no' ~ FALSE
Attribute_value == 'yes' ~ TRUE,
Attribute_value == 'no' ~ FALSE
),
Attribute_group = 'spore formation',
Attribute_type = 'binary'
) |>
dplyr::filter(!is.na(.data$Attribute_value))
dplyr::filter(!is.na(Attribute_value))
split_df[['spore formation']] <- sf

split_df <- lapply(split_df, function(x) {
Expand Down
64 changes: 35 additions & 29 deletions R/bugphyzz.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# utils::globalVariables(c("Rank"))
Rank <- NULL
utils::globalVariables(c(
"Rank",
"Attribute", "Attribute_group", "Attribute_new", "Attribute_range",
"Attribute_value", "Attribute_value_max", "Attribute_value_min", "Br-C10:1",
"Evidence", "Frequency", "NCBI_ID", "Oxo-C19:1", "Taxon_name", "Unit",
"attribute", "functionname", "gram_stain", "physiology", "unit", "value"
))

#' Import bugphyzz
#'
#' \code{importBugphyzz} imports bugphyzz annotations as a list of
Expand Down Expand Up @@ -68,7 +74,7 @@ importBugphyzz <- function(
url = urls[i], verbose = TRUE, force = force_download
)
output[[i]] <- utils::read.csv(rpath, header = TRUE, skip = 1) |>
dplyr::mutate(Attribute = tolower(.data$Attribute))
dplyr::mutate(Attribute = tolower(Attribute))
}
}
output <- lapply(output, function(x) split(x, x$Attribute))
Expand All @@ -82,43 +88,43 @@ importBugphyzz <- function(
.x |>
dplyr::mutate(
Attribute = ifelse(
.data$Attribute == "plant pathogenity",
Attribute == "plant pathogenity",
"plant pathogenicity",
.data$Attribute
Attribute
)
)
})

names(output) <- purrr::map_chr(output, ~ unique(.x$Attribute))
val <- .validationData() |>
dplyr::filter(.data$rank == "all") |>
dplyr::select(.data$physiology, .data$attribute, .data$value) |>
dplyr::mutate(physiology = tolower(.data$physiology)) |>
dplyr::mutate(attribute = tolower(.data$attribute))
dplyr::filter(rank == "all") |>
dplyr::select(physiology, attribute, value) |>
dplyr::mutate(physiology = tolower(physiology)) |>
dplyr::mutate(attribute = tolower(attribute))

output <- purrr::map(output, ~ {
attr_type <- unique(.x$Attribute_type)
if (attr_type == "binary") {
val <- dplyr::select(val, Attribute = .data$attribute, .data$value)
val <- dplyr::select(val, Attribute = attribute, value)
o <- dplyr::left_join(.x, val, by = "Attribute" )
} else if (attr_type == "multistate-intersection" || attr_type == "multistate-union") {
val <- dplyr::select(val, Attribute = .data$physiology, Attribute_value = .data$attribute, .data$value)
o <- dplyr::left_join(dplyr::mutate(.x, Attribute_value = tolower(.data$Attribute_value)) , val, by = c("Attribute", "Attribute_value"))
val <- dplyr::select(val, Attribute = physiology, Attribute_value = attribute, value)
o <- dplyr::left_join(dplyr::mutate(.x, Attribute_value = tolower(Attribute_value)) , val, by = c("Attribute", "Attribute_value"))
} else if (attr_type == "numeric") {
val <- dplyr::select(val, Attribute = .data$attribute, .data$value)
val <- dplyr::select(val, Attribute = attribute, value)
o <- dplyr::left_join(.x, val, by = "Attribute") |>
dplyr::rename(NSTI = .data$nsti)
dplyr::rename(NSTI = nsti)
}
o |>
dplyr::filter(
!(.data$value < v & .data$Evidence == "asr")
!(value < v & Evidence == "asr")
) |>
dplyr::mutate(value = ifelse(.data$Evidence != "asr", NA, value)) |>
dplyr::rename(Validation = .data$value)
dplyr::mutate(value = ifelse(Evidence != "asr", NA, value)) |>
dplyr::rename(Validation = value)
})

if (exclude_rarely) {
output <- purrr::map(output, ~ dplyr::filter(.x, .data$Frequency != "rarely"))
output <- purrr::map(output, ~ dplyr::filter(.x, Frequency != "rarely"))
}
return(output)
}
Expand Down Expand Up @@ -170,8 +176,8 @@ makeSignatures <- function(
}
dat <- dat |>
dplyr::filter(Rank %in% tax_level) |>
dplyr::filter(.data$Evidence %in% evidence) |>
dplyr::filter(.data$Frequency %in% frequency)
dplyr::filter(Evidence %in% evidence) |>
dplyr::filter(Frequency %in% frequency)
if (!nrow(dat)) {
warning(
"Not enough data for creating signatures. Try different filtering options",
Expand Down Expand Up @@ -229,7 +235,7 @@ getTaxonSignatures <- function(tax, bp, ...) {
.makeSignaturesDiscrete <- function(dat, tax_id_type = "NCBI_ID") {
dat |>
dplyr::mutate(
Attribute = paste0("bugphyzz:", .data$Attribute, "|", .data$Attribute_value)
Attribute = paste0("bugphyzz:", Attribute, "|", Attribute_value)
) |>
{\(y) split(y, y$Attribute)}() |>
lapply(function(x) unique(x[[tax_id_type]]))
Expand All @@ -249,14 +255,14 @@ getTaxonSignatures <- function(tax, bp, ...) {
}
dat <- dat |>
dplyr::filter(
.data$Attribute_value >= min & .data$Attribute_value <= max
Attribute_value >= min & Attribute_value <= max
) |>
dplyr::mutate(
Attribute = paste0("bugphyzz:", .data$Attribute, "| >=", min, " & <=", max)
Attribute = paste0("bugphyzz:", Attribute, "| >=", min, " & <=", max)
)
} else {
thr <- .thresholds() |>
dplyr::filter(.data$Attribute_group == unique(dat$Attribute))
dplyr::filter(Attribute_group == unique(dat$Attribute))
attr_name <- thr$Attribute
min_values <- thr$lower
max_values <- thr$upper
Expand All @@ -282,15 +288,15 @@ getTaxonSignatures <- function(tax, bp, ...) {
utils::read.table(fname, header = TRUE, sep = '\t') |>
dplyr::mutate(
range = dplyr::case_when(
is.na(.data$lower) ~ paste0('<=', .data$upper),
is.na(.data$upper) ~ paste0('>=', .data$lower),
TRUE ~ paste0(.data$lower, '-', .data$upper)
is.na(lower) ~ paste0('<=', upper),
is.na(upper) ~ paste0('>=', lower),
TRUE ~ paste0(lower, '-', upper)
),
unit = ifelse(is.na(.data$unit), '', .data$unit)
unit = ifelse(is.na(unit), '', unit)
) |>
dplyr::mutate(Attribute_range = paste0(range, unit)) |>
dplyr::relocate(
.data$Attribute_group, .data$Attribute, .data$Attribute_range
Attribute_group, Attribute, Attribute_range
)
}

Expand Down
8 changes: 4 additions & 4 deletions R/fattyAcidComposition.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
## TODO Maybe a threshold should be decided to consider a FA as present or not.
.fattyAcidComposition <- function(){
link <- .customLinks() |>
dplyr::filter(.data[["functionname"]] == "fattyAcidComposition") |>
dplyr::pull(.data[["link"]])
dplyr::filter(functionname == "fattyAcidComposition") |>
dplyr::pull(link)
fac_wide <- utils::read.csv(link, check.names = FALSE)
fac_long <- fac_wide |>
tidyr::pivot_longer(
cols = .data[["Br-C10:1"]]:.data[["Oxo-C19:1"]],
cols = `Br-C10:1`:`Oxo-C19:1`,
names_to = "Attribute_new", values_to = "Attribute_value"
) |>
dplyr::mutate(NCBI_ID = as.character(.data[["NCBI_ID"]]))
dplyr::mutate(NCBI_ID = as.character(NCBI_ID))
dplyr::left_join(fac_long, ranks_parents, by = "NCBI_ID") |>
as.data.frame() |>
.addSourceInfo() |>
Expand Down
Loading

0 comments on commit 4a898bb

Please sign in to comment.