From 8cfd107a2e1930a7447b503df053019677a2fa65 Mon Sep 17 00:00:00 2001 From: sdgamboa Date: Tue, 9 Jan 2024 22:08:48 -0500 Subject: [PATCH] Update vignettes with new makeSignatures function and bugphyzz import. --- vignettes/articles/attributes.Rmd | 46 +++++++---------------- vignettes/bugphyzz.Rmd | 62 +++++++++++++++++-------------- 2 files changed, 49 insertions(+), 59 deletions(-) diff --git a/vignettes/articles/attributes.Rmd b/vignettes/articles/attributes.Rmd index 3d962e9d..4a724a3f 100644 --- a/vignettes/articles/attributes.Rmd +++ b/vignettes/articles/attributes.Rmd @@ -23,19 +23,20 @@ knitr::opts_chunk$set( library(DT) library(bugphyzz) library(dplyr) +library(purrr) ``` ```{r} -bp <- importBugphyzz(version = 'devel', force_download = TRUE) -bp <- bp |> - filter( - !Evidence %in% c('asr', 'inh') - ) -x <- bp |> - select(Attribute_source, Attribute_group, Attribute) |> - filter(!grepl(';', Attribute_source)) |> - count(Attribute_source, Attribute_group) |> - mutate(new_col = paste0(Attribute_group, ' (', n, ')')) +bp <- importBugphyzz(version = 'devel') +x <- map(bp, ~ { + .x |> + select(Attribute_group, Attribute, Attribute_source) |> + count(Attribute_group, Attribute, Attribute_source, name = "N_annotations") |> + distinct() +}) |> + bind_rows() |> + filter(!is.na(Attribute_source)) |> # Attribute source with NAs means it was obtained through ASR or TAX + relocate(Attribute_source, Attribute_group, Attribute, N_annotations) ``` ## Attributes @@ -80,11 +81,6 @@ datt ## Sources ```{r} -x <- x |> - select(-Attribute_group, -n) |> - group_by(Attribute_source) |> - summarise(Attribute_group = paste(sort(new_col), collapse = '; ')) |> - ungroup() fname2 <- system.file( 'extdata/attribute_sources.tsv', package = 'bugphyzz', mustWork = TRUE @@ -93,23 +89,9 @@ src <- read.table( fname2, header = TRUE, sep = '\t', quote = "" ) -src <- left_join(x, src, by = 'Attribute_source') |> - relocate( - Attribute_source, Confidence_in_curation, Evidence, - Attribute_group, full_source - ) |> - mutate( - Confidence_in_curation = factor( - Confidence_in_curation, levels = c('high', 'medium', 'low') - ) - ) |> - arrange(Confidence_in_curation, Evidence, Attribute_source) +xsrc <- left_join(x, src, by = 'Attribute_source') |> + rename(Full_source = full_source) -colnames(src) <- c( - 'Source (short)', 'Confidence in curation', 'Evidence*', 'Attribute group**', - 'Full source' -) - caption2 <- paste0( 'Table 2. Sources of attribute annotations in bugphyzz. ', '* Evidence codes: exp = experimental evidence, igc = inferred from genomic context, ', @@ -120,7 +102,7 @@ caption2 <- paste0( ) src_dt <- datatable( - data = src, + data = xsrc, filter = "top", extensions = c("Buttons","KeyTable"), # caption = caption2, diff --git a/vignettes/bugphyzz.Rmd b/vignettes/bugphyzz.Rmd index e8428f08..f5ecec60 100644 --- a/vignettes/bugphyzz.Rmd +++ b/vignettes/bugphyzz.Rmd @@ -22,7 +22,7 @@ also be accessed by direct download at **>>> insert Zenodo link here <<<**. # Import bugphyzz -```{r, message=FALSE, eval=FALSE} +```{r, eval=FALSE} if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") @@ -37,62 +37,70 @@ library(purrr) ``` ```{r import data} -bp <- importBugphyzz(version = 'devel', force_download = TRUE) -head(bp) +bp <- importBugphyzz(version = 'devel') +head(map(bp, head)) ``` ## Explore wich attribute groups are available ```{r} -unique(bp$Attribute_group) +names(bp) ``` -## Explore which attribute signatures are available +# Create signatures + +## Create signatures of taxids at the genus level for aerophilicity ```{r} -head(unique(bp$Attribute)) +aer_sigs_g <- makeSignatures( + dat = bp[["aerophilicity"]], tax_id_type = "NCBI_ID", tax_level = "genus" +) +map(aer_sigs_g, head) ``` -# Creating signatures - -## Create signatures of taxids at the genus level for aerophilicity +## Create signatures of taxa names at the species level for growth temperature ```{r} -aer <- bp[which(bp$Attribute_group == 'aerophilicity'),] -sigs1 <- getBugphyzzSignatures( - df = aer, tax.id.type = 'NCBI_ID', tax.level = 'genus', min.size = 10 +gt_sigs_sp <- makeSignatures( + dat = bp[["growth temperature"]], tax_id_type = "Taxon_name", + tax_level = 'species' ) -map(sigs1, head) +map(gt_sigs_sp, head) ``` -## Create signatures of taxa names at the species level for optimal ph +## Create signatures with custom threshold for numeric attributes + ```{r} -op <- bp[bp$Attribute_group == 'optimal ph', ] -sigs2 <- getBugphyzzSignatures( - df = op, tax.id.type = 'Taxon_name', tax.level = 'species', min.size = 10 +gt_sigs_mix <- makeSignatures( + dat = bp[["growth temperature"]], tax_id_type = "Taxon_name", + tax_level = "mixed", min = 0, max = 25 ) -map(sigs2, head) +map(gt_sigs_mix, head) ``` - -# Other examples +## Create signatures for a binary attribute ```{r} -mot <- bp[bp$Attribute_group == 'motility',] -sigs3 <- getBugphyzzSignatures( - df = mot, tax.id.type = 'Taxon_name', tax.level = 'mixed' +ap_sigs_mix <- makeSignatures( + dat = bp[["animal pathogen"]], tax_id_type = "NCBI_ID", + tax_level = "mixed", evidence = c("exp", "igc", "nas", "tas") ) -lapply(sigs3, head) +map(ap_sigs_mix, head) ``` -# Merge examples above +## Make signatures for all datasets with a single function call ```{r} -sigs <- c(sigs1, sigs2, sigs3) -lapply(sigs, head) +sigs <- map(bp, makeSignatures) |> + list_flatten() +length(sigs) ``` +```{r} +head(map(sigs, head)) +``` + ## Session information: ```{r}