diff --git a/articles/phytools.html b/articles/phytools.html index 0d64406..89bb514 100644 --- a/articles/phytools.html +++ b/articles/phytools.html @@ -659,7 +659,7 @@
res <- asr$ace
node_rows <- length(tree$tip.label) + 1:tree$Nnode
@@ -713,21 +713,37 @@ ASR with phytoolspivot_longer(
names_to = 'Attribute', values_to = 'Score', cols = 4:last_col()
) |>
- mutate(Evidence = 'asr')
+ mutate(
+ Evidence = 'asr',
+ Attribute_source = NA,
+ Confidence_in_curation = NA,
+ taxid = sub('\\w__', '', NCBI_ID),
+ Attribute_type = Attribute_type_var,
+ Attribute_group = Attribute_group_var,
+ Frequency = case_when(
+ Score == 1 ~ 'always',
+ Score > 0.9 ~ 'usually',
+ Score >= 0.5 ~ 'sometimes',
+ Score > 0 & Score < 0.5 ~ 'rarely',
+ Score == 0 ~ 'never'
+ )
+ )
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `Rank = taxizedb::taxid2rank(taxid, db = "ncbi")`.
#> Caused by warning in `taxizedb::taxid2rank()`:
#> ! No rank found for 2 of 1513 taxon IDs. The followings are left unrankd: 1740162, 66898
head(new_taxa_from_tips)
-#> # A tibble: 6 × 6
-#> NCBI_ID Taxon_name Rank Attribute Score Evidence
-#> <chr> <chr> <chr> <chr> <dbl> <chr>
-#> 1 s__2899544 Intestinirhabdus alba species aerobic 0.0631 asr
-#> 2 s__2899544 Intestinirhabdus alba species aerotolerant 0.00793 asr
-#> 3 s__2899544 Intestinirhabdus alba species anaerobic 0.00793 asr
-#> 4 s__2899544 Intestinirhabdus alba species facultative… 0.921 asr
-#> 5 s__2799638 Tenebrionibacter intestinalis species aerobic 0.888 asr
-#> 6 s__2799638 Tenebrionibacter intestinalis species aerotolerant 0.0371 asr
Most of these new taxa are in the NCBI tree:
ncbi_tree_nodes_empty <- ncbi_tree$Get(
@@ -769,22 +785,40 @@ ASR with phytoolsrelocate(NCBI_ID, Rank, Evidence) |>
pivot_longer(
cols = 4:last_col(), names_to = 'Attribute', values_to = 'Score'
+ ) |>
+ mutate(
+ Attribute_source = NA,
+ Confidence_in_curation = NA,
+ Attribute_group = Attribute_group_var,
+ Attribute_type = Attribute_type_var,
+ taxid = sub('\\w__', '', NCBI_ID),
+ Taxon_name = taxizedb::taxid2name(taxid, db = 'ncbi'),
+ Frequency = case_when(
+ Score == 1 ~ 'always',
+ Score > 0.9 ~ 'usually',
+ Score >= 0.5 ~ 'sometimes',
+ Score > 0 & Score < 0.5 ~ 'rarely',
+ Score == 0 ~ 'never'
+ )
)
head(new_taxa_from_nodes)
-#> # A tibble: 6 × 5
-#> NCBI_ID Rank Evidence Attribute Score
-#> <chr> <chr> <chr> <chr> <dbl>
-#> 1 k__2157 kingdom asr aerobic 0.0000127
-#> 2 k__2157 kingdom asr aerotolerant 0.0000122
-#> 3 k__2157 kingdom asr anaerobic 1.00
-#> 4 k__2157 kingdom asr facultatively anaerobic 0.0000122
-#> 5 k__2 kingdom asr aerobic 0.000180
-#> 6 k__2 kingdom asr aerotolerant 0.0000241
Some new taxa can be added to the empty:
+#> # A tibble: 6 × 12 +#> NCBI_ID Rank Evidence Attribute Score Attribute_source +#> <chr> <chr> <chr> <chr> <dbl> <lgl> +#> 1 k__2157 kingdom asr aerobic 0.0000127 NA +#> 2 k__2157 kingdom asr aerotolerant 0.0000122 NA +#> 3 k__2157 kingdom asr anaerobic 1.00 NA +#> 4 k__2157 kingdom asr facultatively anaerobic 0.0000122 NA +#> 5 k__2 kingdom asr aerobic 0.000180 NA +#> 6 k__2 kingdom asr aerotolerant 0.0000241 NA +#> # ℹ 6 more variables: Confidence_in_curation <lgl>, Attribute_group <chr>, +#> # Attribute_type <chr>, taxid <chr>, Taxon_name <chr>, Frequency <chr> +Some new taxa can be added to the nodes with empty attribute +table:
-Actually most are already in the tree, but we got them throuh +
Actually most are already in the tree, but we got them through taxonomic binning (genus, species, and strains)
mean(new_taxa_from_nodes$NCBI_ID %in% ncbi_tree_nodes)
@@ -792,7 +826,7 @@ ASR with phytools
new_taxa_for_ncbi_tree <- new_taxa_from_tips |>
- select(-Taxon_name) |>
+ # select(-Taxon_name) |>
relocate(NCBI_ID, Rank, Attribute, Score, Evidence) |>
bind_rows(new_taxa_from_nodes)
new_taxa_for_ncbi_tree_list <- split(
@@ -836,7 +870,23 @@ ASR with phytools= 'inh2'
) |>
select(-target_scores, -score_diff) |>
- relocate(NCBI_ID)
+ relocate(NCBI_ID) |>
+ mutate(
+ Attribute_source = NA,
+ Confidence_in_curation = NA,
+ Attribute_group = Attribute_group_var,
+ Attribute_type = Attribute_type_var,
+ taxid = node$taxid,
+ Taxon_name = node$Taxon_name,
+ Rank = node$Rank,
+ Frequency = case_when(
+ Score == 1 ~ 'always',
+ Score > 0.9 ~ 'usually',
+ Score >= 0.5 ~ 'sometimes',
+ Score > 0 & Score < 0.5 ~ 'rarely',
+ Score == 0 ~ 'never'
+ )
+ )
node$attribute_tbl <- res
}
}
@@ -853,13 +903,15 @@ ASR with phytools#> [1] 1
ncbi_tree$k__2$p__1224$c__28211$o__356$f__335928$attribute_tbl
-#> # A tibble: 4 × 5
-#> NCBI_ID Rank Attribute Score Evidence
-#> <chr> <chr> <chr> <dbl> <chr>
-#> 1 f__335928 family aerobic 1.00 asr
-#> 2 f__335928 family aerotolerant 0.0000000431 asr
-#> 3 f__335928 family anaerobic 0.00000129 asr
-#> 4 f__335928 family facultatively anaerobic 0.0000000988 asr
Get all the tables
output <- ncbi_tree$Get(
@@ -869,17 +921,33 @@ ASR with phytoolsoutput <- bind_rows(output)
head(output)
#> # A tibble: 6 × 12
-#> NCBI_ID Rank Attribute Score Evidence taxid Taxon_name Attribute_group
-#> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
-#> 1 k__2 kingdom aerobic 1.80e-4 asr NA NA NA
-#> 2 k__2 kingdom aerotolerant 2.41e-5 asr NA NA NA
-#> 3 k__2 kingdom anaerobic 1.00e+0 asr NA NA NA
-#> 4 k__2 kingdom facultative… 2.41e-5 asr NA NA NA
-#> 5 p__1224 phylum aerobic 8.46e-1 asr NA NA NA
-#> 6 p__1224 phylum aerotolerant 4.05e-4 asr NA NA NA
-#> # ℹ 4 more variables: Attribute_type <chr>, Frequency <chr>,
-#> # Attribute_source <chr>, Confidence_in_curation <ord>
Number of annotations per evidence
+table(output$Evidence, useNA = 'always')
+#>
+#> asr exp igc inh inh2 nas tas tax <NA>
+#> 7872 2710 550 170984 50988 898 2583 28258 37809
NAs are the one that were completed with tidyr::complete, so all of +them have score 0
+ +Final annotations passing the min threshold (0.25 for +aerophilicity)
+
min_thr <- 1 / length(unique(phys_data_ready$Attribute))
addTaxa1 <- phys_data_ready |>
filter(!NCBI_ID %in% unique(output$NCBI_ID))
@@ -893,7 +961,7 @@ ASR with phytools
Session information
-
+
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
diff --git a/pkgdown.yml b/pkgdown.yml
index 8c1c92a..0cb2581 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -3,7 +3,7 @@ pkgdown: 2.0.7
pkgdown_sha: ~
articles:
phytools: phytools.html
-last_built: 2023-09-28T02:38Z
+last_built: 2023-09-28T14:37Z
urls:
reference: https://sdgamboa.github.io/taxPPro/reference
article: https://sdgamboa.github.io/taxPPro/articles