From 4a898bb10bdd782dfcf2c177156e042021edf61b Mon Sep 17 00:00:00 2001 From: sdgamboa Date: Fri, 22 Mar 2024 14:14:19 -0400 Subject: [PATCH] R CMD check passing - no warnings or errors --- R/bacdive.R | 74 ++++++++++++++++++++-------------------- R/bugphyzz.R | 64 ++++++++++++++++++---------------- R/fattyAcidComposition.R | 8 ++--- R/physiologies.R | 50 +++++++++++++-------------- 4 files changed, 101 insertions(+), 95 deletions(-) diff --git a/R/bacdive.R b/R/bacdive.R index 93ecff21..7143a207 100644 --- a/R/bacdive.R +++ b/R/bacdive.R @@ -37,18 +37,18 @@ .getTidyBD <- function(bacdive_data) { bacdive_data |> tidyr::pivot_longer( - cols = .data$gram_stain:tidyr::last_col(), # Attributes start in the gram_stain column + cols = gram_stain:tidyr::last_col(), # Attributes start in the gram_stain column names_to = 'Attribute', values_to = 'Attribute_value' ) |> - dplyr::filter(.data$Attribute_value != '') |> - dplyr::mutate(Attribute = gsub('_', ' ', .data$Attribute)) |> + dplyr::filter(Attribute_value != '') |> + dplyr::mutate(Attribute = gsub('_', ' ', Attribute)) |> dplyr::mutate( Attribute = dplyr::case_when( - .data$Attribute == 'oxygen tolerance' ~ 'aerophilicity', - .data$Attribute == 'cell shape' ~ 'shape', - .data$Attribute == 'pathogenicity animal' ~ 'animal pathongen', - .data$Attribute == 'sample type' ~ 'isolation site', - TRUE ~ .data$Attribute + Attribute == 'oxygen tolerance' ~ 'aerophilicity', + Attribute == 'cell shape' ~ 'shape', + Attribute == 'pathogenicity animal' ~ 'animal pathongen', + Attribute == 'sample type' ~ 'isolation site', + TRUE ~ Attribute ) ) |> dplyr::distinct() @@ -163,42 +163,42 @@ ) regex <- paste0('(', paste0(valid_terms, collapse = '|'), ')') split_df[['halophily']] <- split_df[['halophily']] |> - dplyr::mutate(Attribute_value = strsplit(.data$Attribute_value, ';')) |> + dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |> tidyr::unnest(cols = 'Attribute_value') |> - dplyr::filter(!grepl('no growth', .data$Attribute_value)) |> + dplyr::filter(!grepl('no growth', Attribute_value)) |> dplyr::mutate( - Attribute_value = stringr::str_squish(.data$Attribute_value), - Attribute_value = sub('NaCL', 'NaCl', .data$Attribute_value), - Attribute_value = sub('Marine', 'Sea', .data$Attribute_value), - Attribute_value = sub('Salts', 'salts', .data$Attribute_value) + Attribute_value = stringr::str_squish(Attribute_value), + Attribute_value = sub('NaCL', 'NaCl', Attribute_value), + Attribute_value = sub('Marine', 'Sea', Attribute_value), + Attribute_value = sub('Salts', 'salts', Attribute_value) ) |> - dplyr::filter(grepl(regex, .data$Attribute_value)) |> + dplyr::filter(grepl(regex, Attribute_value)) |> dplyr::mutate( - Attribute = stringr::str_extract(.data$Attribute_value, regex), - Unit = .data$Attribute_value |> + Attribute = stringr::str_extract(Attribute_value, regex), + Unit = Attribute_value |> stringr::str_extract(' [<>]??[0-9]+\\.??[0-9]*.*') |> stringr::str_squish() |> stringr::str_remove('^.* '), - Attribute_value = .data$Attribute_value |> + Attribute_value = Attribute_value |> stringr::str_extract(' [<>]??[0-9]+\\.??[0-9]*.*') |> stringr::str_squish() |> stringr::str_remove(' .*$'), Attribute_group = 'halophily', Attribute_type = 'range' ) |> - dplyr::filter(!grepl('[0-9]', .data$Unit)) |> + dplyr::filter(!grepl('[0-9]', Unit)) |> dplyr::distinct() ## hemolysis #### split_df[['hemolysis']] <- split_df[['hemolysis']] |> dplyr::mutate( - Attribute_value = strsplit(.data$Attribute_value, ';|/') + Attribute_value = strsplit(Attribute_value, ';|/') ) |> - tidyr::unnest(.data$Attribute_value) |> - dplyr::mutate(Attribute_value = stringr::str_squish(.data$Attribute_value)) |> - dplyr::filter(.data$Attribute_value != '') |> + tidyr::unnest(Attribute_value) |> + dplyr::mutate(Attribute_value = stringr::str_squish(Attribute_value)) |> + dplyr::filter(Attribute_value != '') |> dplyr::mutate( - Attribute = .data$Attribute_value, + Attribute = Attribute_value, Attribute_value = TRUE, Attribute_group = 'hemolysis', Attribute_type = 'multistate-intersection' @@ -212,8 +212,8 @@ split_df[['motility']] <- split_df[['motility']] |> dplyr::mutate( Attribute_value = dplyr::case_when( - .data[['Attribute_value']] == 'yes' ~ TRUE, - .data[['Attribute_value']] == 'no' ~ FALSE + Attribute_value == 'yes' ~ TRUE, + Attribute_value == 'no' ~ FALSE ) ) split_df[['motility']][['Attribute_group']] <- 'motility' @@ -232,8 +232,8 @@ ## metabolite production #### mp <- split_df[['metabolite production']] mp <- mp |> - dplyr::mutate(Attribute_value = strsplit(.data$Attribute_value, ';')) |> - tidyr::unnest(.data$Attribute_value) + dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |> + tidyr::unnest(Attribute_value) x <- stringr::str_extract(mp[['Attribute_value']], '(yes|no)$') mp <- mp[which(!is.na(x)),] y <- stringr::str_extract(mp[['Attribute_value']], '(yes|no)$') @@ -249,9 +249,9 @@ names(split_df)[pos] <- 'metabolite utilization' mu <- split_df[['metabolite utilization']] mu <- mu |> - dplyr::mutate(Attribute_value = strsplit(.data$Attribute_value, ';')) |> - tidyr::unnest(.data$Attribute_value) |> - dplyr::mutate(Attribute_value = stringr::str_squish(.data$Attribute_value)) + dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |> + tidyr::unnest(Attribute_value) |> + dplyr::mutate(Attribute_value = stringr::str_squish(Attribute_value)) x <- sub('^.* (\\+|-|\\+/-) *.*$', '\\1', mu[['Attribute_value']]) y <- ifelse(!x %in% c('+', '-', '+/-'), NA, x) mu <- mu[which(!is.na(y)),] @@ -265,9 +265,9 @@ y == '+/-' ~ 'TRUE/FALSE' ) mu <- mu |> - dplyr::mutate(Attribute_value = strsplit(.data$Attribute_value, '/')) |> - tidyr::unnest(.data$Attribute_value) |> - dplyr::mutate(Attribute_value = as.logical(.data$Attribute_value)) + dplyr::mutate(Attribute_value = strsplit(Attribute_value, '/')) |> + tidyr::unnest(Attribute_value) |> + dplyr::mutate(Attribute_value = as.logical(Attribute_value)) mu[['Attribute_group']] <- 'metabolite utilization' mu[['Attribute_type']] <- 'multistate-intersection' split_df[['metabolite utilization']] <- mu @@ -277,13 +277,13 @@ sf <- sf |> dplyr::mutate( Attribute_value = dplyr::case_when( - .data[['Attribute_value']] == 'yes' ~ TRUE, - .data[['Attribute_value']] == 'no' ~ FALSE + Attribute_value == 'yes' ~ TRUE, + Attribute_value == 'no' ~ FALSE ), Attribute_group = 'spore formation', Attribute_type = 'binary' ) |> - dplyr::filter(!is.na(.data$Attribute_value)) + dplyr::filter(!is.na(Attribute_value)) split_df[['spore formation']] <- sf split_df <- lapply(split_df, function(x) { diff --git a/R/bugphyzz.R b/R/bugphyzz.R index 13f1686a..7d64f50d 100644 --- a/R/bugphyzz.R +++ b/R/bugphyzz.R @@ -1,5 +1,11 @@ -# utils::globalVariables(c("Rank")) -Rank <- NULL +utils::globalVariables(c( + "Rank", + "Attribute", "Attribute_group", "Attribute_new", "Attribute_range", + "Attribute_value", "Attribute_value_max", "Attribute_value_min", "Br-C10:1", + "Evidence", "Frequency", "NCBI_ID", "Oxo-C19:1", "Taxon_name", "Unit", + "attribute", "functionname", "gram_stain", "physiology", "unit", "value" +)) + #' Import bugphyzz #' #' \code{importBugphyzz} imports bugphyzz annotations as a list of @@ -68,7 +74,7 @@ importBugphyzz <- function( url = urls[i], verbose = TRUE, force = force_download ) output[[i]] <- utils::read.csv(rpath, header = TRUE, skip = 1) |> - dplyr::mutate(Attribute = tolower(.data$Attribute)) + dplyr::mutate(Attribute = tolower(Attribute)) } } output <- lapply(output, function(x) split(x, x$Attribute)) @@ -82,43 +88,43 @@ importBugphyzz <- function( .x |> dplyr::mutate( Attribute = ifelse( - .data$Attribute == "plant pathogenity", + Attribute == "plant pathogenity", "plant pathogenicity", - .data$Attribute + Attribute ) ) }) names(output) <- purrr::map_chr(output, ~ unique(.x$Attribute)) val <- .validationData() |> - dplyr::filter(.data$rank == "all") |> - dplyr::select(.data$physiology, .data$attribute, .data$value) |> - dplyr::mutate(physiology = tolower(.data$physiology)) |> - dplyr::mutate(attribute = tolower(.data$attribute)) + dplyr::filter(rank == "all") |> + dplyr::select(physiology, attribute, value) |> + dplyr::mutate(physiology = tolower(physiology)) |> + dplyr::mutate(attribute = tolower(attribute)) output <- purrr::map(output, ~ { attr_type <- unique(.x$Attribute_type) if (attr_type == "binary") { - val <- dplyr::select(val, Attribute = .data$attribute, .data$value) + val <- dplyr::select(val, Attribute = attribute, value) o <- dplyr::left_join(.x, val, by = "Attribute" ) } else if (attr_type == "multistate-intersection" || attr_type == "multistate-union") { - val <- dplyr::select(val, Attribute = .data$physiology, Attribute_value = .data$attribute, .data$value) - o <- dplyr::left_join(dplyr::mutate(.x, Attribute_value = tolower(.data$Attribute_value)) , val, by = c("Attribute", "Attribute_value")) + val <- dplyr::select(val, Attribute = physiology, Attribute_value = attribute, value) + o <- dplyr::left_join(dplyr::mutate(.x, Attribute_value = tolower(Attribute_value)) , val, by = c("Attribute", "Attribute_value")) } else if (attr_type == "numeric") { - val <- dplyr::select(val, Attribute = .data$attribute, .data$value) + val <- dplyr::select(val, Attribute = attribute, value) o <- dplyr::left_join(.x, val, by = "Attribute") |> - dplyr::rename(NSTI = .data$nsti) + dplyr::rename(NSTI = nsti) } o |> dplyr::filter( - !(.data$value < v & .data$Evidence == "asr") + !(value < v & Evidence == "asr") ) |> - dplyr::mutate(value = ifelse(.data$Evidence != "asr", NA, value)) |> - dplyr::rename(Validation = .data$value) + dplyr::mutate(value = ifelse(Evidence != "asr", NA, value)) |> + dplyr::rename(Validation = value) }) if (exclude_rarely) { - output <- purrr::map(output, ~ dplyr::filter(.x, .data$Frequency != "rarely")) + output <- purrr::map(output, ~ dplyr::filter(.x, Frequency != "rarely")) } return(output) } @@ -170,8 +176,8 @@ makeSignatures <- function( } dat <- dat |> dplyr::filter(Rank %in% tax_level) |> - dplyr::filter(.data$Evidence %in% evidence) |> - dplyr::filter(.data$Frequency %in% frequency) + dplyr::filter(Evidence %in% evidence) |> + dplyr::filter(Frequency %in% frequency) if (!nrow(dat)) { warning( "Not enough data for creating signatures. Try different filtering options", @@ -229,7 +235,7 @@ getTaxonSignatures <- function(tax, bp, ...) { .makeSignaturesDiscrete <- function(dat, tax_id_type = "NCBI_ID") { dat |> dplyr::mutate( - Attribute = paste0("bugphyzz:", .data$Attribute, "|", .data$Attribute_value) + Attribute = paste0("bugphyzz:", Attribute, "|", Attribute_value) ) |> {\(y) split(y, y$Attribute)}() |> lapply(function(x) unique(x[[tax_id_type]])) @@ -249,14 +255,14 @@ getTaxonSignatures <- function(tax, bp, ...) { } dat <- dat |> dplyr::filter( - .data$Attribute_value >= min & .data$Attribute_value <= max + Attribute_value >= min & Attribute_value <= max ) |> dplyr::mutate( - Attribute = paste0("bugphyzz:", .data$Attribute, "| >=", min, " & <=", max) + Attribute = paste0("bugphyzz:", Attribute, "| >=", min, " & <=", max) ) } else { thr <- .thresholds() |> - dplyr::filter(.data$Attribute_group == unique(dat$Attribute)) + dplyr::filter(Attribute_group == unique(dat$Attribute)) attr_name <- thr$Attribute min_values <- thr$lower max_values <- thr$upper @@ -282,15 +288,15 @@ getTaxonSignatures <- function(tax, bp, ...) { utils::read.table(fname, header = TRUE, sep = '\t') |> dplyr::mutate( range = dplyr::case_when( - is.na(.data$lower) ~ paste0('<=', .data$upper), - is.na(.data$upper) ~ paste0('>=', .data$lower), - TRUE ~ paste0(.data$lower, '-', .data$upper) + is.na(lower) ~ paste0('<=', upper), + is.na(upper) ~ paste0('>=', lower), + TRUE ~ paste0(lower, '-', upper) ), - unit = ifelse(is.na(.data$unit), '', .data$unit) + unit = ifelse(is.na(unit), '', unit) ) |> dplyr::mutate(Attribute_range = paste0(range, unit)) |> dplyr::relocate( - .data$Attribute_group, .data$Attribute, .data$Attribute_range + Attribute_group, Attribute, Attribute_range ) } diff --git a/R/fattyAcidComposition.R b/R/fattyAcidComposition.R index a05be352..92b5c67e 100644 --- a/R/fattyAcidComposition.R +++ b/R/fattyAcidComposition.R @@ -5,15 +5,15 @@ ## TODO Maybe a threshold should be decided to consider a FA as present or not. .fattyAcidComposition <- function(){ link <- .customLinks() |> - dplyr::filter(.data[["functionname"]] == "fattyAcidComposition") |> - dplyr::pull(.data[["link"]]) + dplyr::filter(functionname == "fattyAcidComposition") |> + dplyr::pull(link) fac_wide <- utils::read.csv(link, check.names = FALSE) fac_long <- fac_wide |> tidyr::pivot_longer( - cols = .data[["Br-C10:1"]]:.data[["Oxo-C19:1"]], + cols = `Br-C10:1`:`Oxo-C19:1`, names_to = "Attribute_new", values_to = "Attribute_value" ) |> - dplyr::mutate(NCBI_ID = as.character(.data[["NCBI_ID"]])) + dplyr::mutate(NCBI_ID = as.character(NCBI_ID)) dplyr::left_join(fac_long, ranks_parents, by = "NCBI_ID") |> as.data.frame() |> .addSourceInfo() |> diff --git a/R/physiologies.R b/R/physiologies.R index be4c3f0f..c401a9ba 100644 --- a/R/physiologies.R +++ b/R/physiologies.R @@ -169,7 +169,7 @@ showPhys <- function(which_names = 'all') { } else if (unique(df[['Attribute_type']] == 'range')) { df <- .modifyRange(df) } else if (unique(df[['Attribute_type']] %in% .DISCRETE_ATTRIBUTE_TYPES())) { - df <- dplyr::filter(df, .data$Attribute_value == TRUE | .data$Attribute_value == FALSE) + df <- dplyr::filter(df, Attribute_value == TRUE | Attribute_value == FALSE) } if (all(parent_col_names %in% colnames(df))) { @@ -191,10 +191,10 @@ showPhys <- function(which_names = 'all') { ## Helper function for .importSpreadsheets .numericToRange <- function(df) { df <- df |> - dplyr::group_by(.data$NCBI_ID, .data$Taxon_name) |> + dplyr::group_by(NCBI_ID, Taxon_name) |> dplyr::mutate( - Attribute_value_min = as.double(.data$Attribute_value), - Attribute_value_max = as.double(.data$Attribute_value), + Attribute_value_min = as.double(Attribute_value), + Attribute_value_max = as.double(Attribute_value), Attribute_type = 'range' ) |> dplyr::ungroup() |> @@ -210,25 +210,25 @@ showPhys <- function(which_names = 'all') { regex2 <- paste0('^(<|>)(\\-)?', num, '$') regex <- paste0('(', regex1, '|', regex2, ')') df <- df |> - dplyr::filter(grepl(regex, .data$Attribute_value)) |> + dplyr::filter(grepl(regex, Attribute_value)) |> dplyr::mutate( - Attribute_value = sub('^(\\-)([0-9]+(\\.[0-9]+)?)', 'minus\\2', .data$Attribute_value) + Attribute_value = sub('^(\\-)([0-9]+(\\.[0-9]+)?)', 'minus\\2', Attribute_value) ) |> dplyr::mutate( - Attribute_value = gsub(' ', '', .data$Attribute_value), + Attribute_value = gsub(' ', '', Attribute_value), Attribute_value = dplyr::case_when( - grepl('<', .data$Attribute_value) ~ paste0('-', .data$Attribute_value), - grepl('>', .data$Attribute_value) ~ paste0(.data$Attribute_value, '-'), - !grepl("\\-", .data$Attribute_value) ~ paste0(.data$Attribute_value, '-', .data$Attribute_value), - grepl("^\\-", .data$Attribute_value) ~ paste0("minusInf", .data$Attribute_value), - grepl("\\-$", .data$Attribute_value) ~ paste0(.data$Attribute_value, "Inf"), - TRUE ~ .data$Attribute_value + grepl('<', Attribute_value) ~ paste0('-', Attribute_value), + grepl('>', Attribute_value) ~ paste0(Attribute_value, '-'), + !grepl("\\-", Attribute_value) ~ paste0(Attribute_value, '-', Attribute_value), + grepl("^\\-", Attribute_value) ~ paste0("minusInf", Attribute_value), + grepl("\\-$", Attribute_value) ~ paste0(Attribute_value, "Inf"), + TRUE ~ Attribute_value ), - Attribute_value = sub('(<|>)', '', .data$Attribute_value), + Attribute_value = sub('(<|>)', '', Attribute_value), Attribute_value = dplyr::case_when( - grepl("^\\-", .data$Attribute_value) ~ paste0("minusInf", .data$Attribute_value), - grepl("\\-$", .data$Attribute_value) ~ paste0(.data$Attribute_value, "Inf"), - TRUE ~ .data$Attribute_value + grepl("^\\-", Attribute_value) ~ paste0("minusInf", Attribute_value), + grepl("\\-$", Attribute_value) ~ paste0(Attribute_value, "Inf"), + TRUE ~ Attribute_value ) ) |> tidyr::separate( @@ -236,12 +236,12 @@ showPhys <- function(which_names = 'all') { into = c('Attribute_value_min', 'Attribute_value_max'), sep = '-' ) |> dplyr::mutate( - Attribute_value_min = sub('minus', '-', .data$Attribute_value_min), - Attribute_value_max = sub('minus', '-', .data$Attribute_value_min) + Attribute_value_min = sub('minus', '-', Attribute_value_min), + Attribute_value_max = sub('minus', '-', Attribute_value_min) ) |> dplyr::mutate( - Attribute_value_min = as.double(.data$Attribute_value_min), - Attribute_value_max = as.double(.data$Attribute_value_max) + Attribute_value_min = as.double(Attribute_value_min), + Attribute_value_max = as.double(Attribute_value_max) ) |> dplyr::distinct() } @@ -291,10 +291,10 @@ showPhys <- function(which_names = 'all') { .homogenizeAerophilicityAttributeNames <- function(df) { df |> dplyr::mutate( Attribute = dplyr::case_when( - .data$Attribute == 'obligately anaerobic' ~ 'anaerobic', - .data$Attribute == 'microaerophilic' ~ 'aerobic', - .data$Attribute == 'obligately aerobic' ~ 'aerobic', - TRUE ~ .data$Attribute + Attribute == 'obligately anaerobic' ~ 'anaerobic', + Attribute == 'microaerophilic' ~ 'aerobic', + Attribute == 'obligately aerobic' ~ 'aerobic', + TRUE ~ Attribute ) ) }