diff --git a/articles/phytools.html b/articles/phytools.html index 0d64406..89bb514 100644 --- a/articles/phytools.html +++ b/articles/phytools.html @@ -659,7 +659,7 @@

ASR with phytoolsasr <- ancr(object = fit, tips = TRUE) }) #> user system elapsed -#> 356.163 121.082 244.111 +#> 358.835 120.467 244.809
 res <- asr$ace
 node_rows <- length(tree$tip.label) + 1:tree$Nnode
@@ -713,21 +713,37 @@ 

ASR with phytoolspivot_longer( names_to = 'Attribute', values_to = 'Score', cols = 4:last_col() ) |> - mutate(Evidence = 'asr') + mutate( + Evidence = 'asr', + Attribute_source = NA, + Confidence_in_curation = NA, + taxid = sub('\\w__', '', NCBI_ID), + Attribute_type = Attribute_type_var, + Attribute_group = Attribute_group_var, + Frequency = case_when( + Score == 1 ~ 'always', + Score > 0.9 ~ 'usually', + Score >= 0.5 ~ 'sometimes', + Score > 0 & Score < 0.5 ~ 'rarely', + Score == 0 ~ 'never' + ) + ) #> Warning: There was 1 warning in `mutate()`. #> In argument: `Rank = taxizedb::taxid2rank(taxid, db = "ncbi")`. #> Caused by warning in `taxizedb::taxid2rank()`: #> ! No rank found for 2 of 1513 taxon IDs. The followings are left unrankd: 1740162, 66898 head(new_taxa_from_tips) -#> # A tibble: 6 × 6 -#> NCBI_ID Taxon_name Rank Attribute Score Evidence -#> <chr> <chr> <chr> <chr> <dbl> <chr> -#> 1 s__2899544 Intestinirhabdus alba species aerobic 0.0631 asr -#> 2 s__2899544 Intestinirhabdus alba species aerotolerant 0.00793 asr -#> 3 s__2899544 Intestinirhabdus alba species anaerobic 0.00793 asr -#> 4 s__2899544 Intestinirhabdus alba species facultative… 0.921 asr -#> 5 s__2799638 Tenebrionibacter intestinalis species aerobic 0.888 asr -#> 6 s__2799638 Tenebrionibacter intestinalis species aerotolerant 0.0371 asr

+#> # A tibble: 6 × 12 +#> NCBI_ID Taxon_name Rank Attribute Score Evidence Attribute_source +#> <chr> <chr> <chr> <chr> <dbl> <chr> <lgl> +#> 1 s__2899544 Intestinirhabdus… spec… aerobic 0.0631 asr NA +#> 2 s__2899544 Intestinirhabdus… spec… aerotole… 0.00793 asr NA +#> 3 s__2899544 Intestinirhabdus… spec… anaerobic 0.00793 asr NA +#> 4 s__2899544 Intestinirhabdus… spec… facultat… 0.921 asr NA +#> 5 s__2799638 Tenebrionibacter… spec… aerobic 0.888 asr NA +#> 6 s__2799638 Tenebrionibacter… spec… aerotole… 0.0371 asr NA +#> # ℹ 5 more variables: Confidence_in_curation <lgl>, taxid <chr>, +#> # Attribute_type <chr>, Attribute_group <chr>, Frequency <chr>

Most of these new taxa are in the NCBI tree:

 ncbi_tree_nodes_empty <- ncbi_tree$Get(
@@ -769,22 +785,40 @@ 

ASR with phytoolsrelocate(NCBI_ID, Rank, Evidence) |> pivot_longer( cols = 4:last_col(), names_to = 'Attribute', values_to = 'Score' + ) |> + mutate( + Attribute_source = NA, + Confidence_in_curation = NA, + Attribute_group = Attribute_group_var, + Attribute_type = Attribute_type_var, + taxid = sub('\\w__', '', NCBI_ID), + Taxon_name = taxizedb::taxid2name(taxid, db = 'ncbi'), + Frequency = case_when( + Score == 1 ~ 'always', + Score > 0.9 ~ 'usually', + Score >= 0.5 ~ 'sometimes', + Score > 0 & Score < 0.5 ~ 'rarely', + Score == 0 ~ 'never' + ) ) head(new_taxa_from_nodes) -#> # A tibble: 6 × 5 -#> NCBI_ID Rank Evidence Attribute Score -#> <chr> <chr> <chr> <chr> <dbl> -#> 1 k__2157 kingdom asr aerobic 0.0000127 -#> 2 k__2157 kingdom asr aerotolerant 0.0000122 -#> 3 k__2157 kingdom asr anaerobic 1.00 -#> 4 k__2157 kingdom asr facultatively anaerobic 0.0000122 -#> 5 k__2 kingdom asr aerobic 0.000180 -#> 6 k__2 kingdom asr aerotolerant 0.0000241

-

Some new taxa can be added to the empty:

+#> # A tibble: 6 × 12 +#> NCBI_ID Rank Evidence Attribute Score Attribute_source +#> <chr> <chr> <chr> <chr> <dbl> <lgl> +#> 1 k__2157 kingdom asr aerobic 0.0000127 NA +#> 2 k__2157 kingdom asr aerotolerant 0.0000122 NA +#> 3 k__2157 kingdom asr anaerobic 1.00 NA +#> 4 k__2157 kingdom asr facultatively anaerobic 0.0000122 NA +#> 5 k__2 kingdom asr aerobic 0.000180 NA +#> 6 k__2 kingdom asr aerotolerant 0.0000241 NA +#> # ℹ 6 more variables: Confidence_in_curation <lgl>, Attribute_group <chr>, +#> # Attribute_type <chr>, taxid <chr>, Taxon_name <chr>, Frequency <chr> +

Some new taxa can be added to the nodes with empty attribute +table:

 mean(new_taxa_from_nodes$NCBI_ID %in% ncbi_tree_nodes_empty)
 #> [1] 0.3284844
-

Actually most are already in the tree, but we got them throuh +

Actually most are already in the tree, but we got them through taxonomic binning (genus, species, and strains)

 mean(new_taxa_from_nodes$NCBI_ID %in% ncbi_tree_nodes)
@@ -792,7 +826,7 @@ 

ASR with phytools
 new_taxa_for_ncbi_tree <- new_taxa_from_tips |> 
-    select(-Taxon_name) |> 
+    # select(-Taxon_name) |> 
     relocate(NCBI_ID, Rank, Attribute, Score, Evidence) |> 
     bind_rows(new_taxa_from_nodes)
 new_taxa_for_ncbi_tree_list <- split(
@@ -836,7 +870,23 @@ 

ASR with phytools= 'inh2' ) |> select(-target_scores, -score_diff) |> - relocate(NCBI_ID) + relocate(NCBI_ID) |> + mutate( + Attribute_source = NA, + Confidence_in_curation = NA, + Attribute_group = Attribute_group_var, + Attribute_type = Attribute_type_var, + taxid = node$taxid, + Taxon_name = node$Taxon_name, + Rank = node$Rank, + Frequency = case_when( + Score == 1 ~ 'always', + Score > 0.9 ~ 'usually', + Score >= 0.5 ~ 'sometimes', + Score > 0 & Score < 0.5 ~ 'rarely', + Score == 0 ~ 'never' + ) + ) node$attribute_tbl <- res } } @@ -853,13 +903,15 @@

ASR with phytools#> [1] 1

 ncbi_tree$k__2$p__1224$c__28211$o__356$f__335928$attribute_tbl
-#> # A tibble: 4 × 5
-#>   NCBI_ID   Rank   Attribute                      Score Evidence
-#>   <chr>     <chr>  <chr>                          <dbl> <chr>   
-#> 1 f__335928 family aerobic                 1.00         asr     
-#> 2 f__335928 family aerotolerant            0.0000000431 asr     
-#> 3 f__335928 family anaerobic               0.00000129   asr     
-#> 4 f__335928 family facultatively anaerobic 0.0000000988 asr
+#> # A tibble: 4 × 12 +#> NCBI_ID Rank Attribute Score Evidence Taxon_name Attribute_source +#> <chr> <chr> <chr> <dbl> <chr> <chr> <lgl> +#> 1 f__335928 family aerobic 1.00e+0 asr Xanthobac… NA +#> 2 f__335928 family aerotolerant 4.31e-8 asr Xanthobac… NA +#> 3 f__335928 family anaerobic 1.29e-6 asr Xanthobac… NA +#> 4 f__335928 family facultatively a… 9.88e-8 asr Xanthobac… NA +#> # ℹ 5 more variables: Confidence_in_curation <lgl>, taxid <chr>, +#> # Attribute_type <chr>, Attribute_group <chr>, Frequency <chr>

Get all the tables

 output <- ncbi_tree$Get(
@@ -869,17 +921,33 @@ 

ASR with phytoolsoutput <- bind_rows(output) head(output) #> # A tibble: 6 × 12 -#> NCBI_ID Rank Attribute Score Evidence taxid Taxon_name Attribute_group -#> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> -#> 1 k__2 kingdom aerobic 1.80e-4 asr NA NA NA -#> 2 k__2 kingdom aerotolerant 2.41e-5 asr NA NA NA -#> 3 k__2 kingdom anaerobic 1.00e+0 asr NA NA NA -#> 4 k__2 kingdom facultative… 2.41e-5 asr NA NA NA -#> 5 p__1224 phylum aerobic 8.46e-1 asr NA NA NA -#> 6 p__1224 phylum aerotolerant 4.05e-4 asr NA NA NA -#> # ℹ 4 more variables: Attribute_type <chr>, Frequency <chr>, -#> # Attribute_source <chr>, Confidence_in_curation <ord>

+#> NCBI_ID Rank Attribute Score Evidence Taxon_name Attribute_source +#> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> +#> 1 k__2 kingdom aerobic 1.80e-4 asr Bacteria NA +#> 2 k__2 kingdom aerotolerant 2.41e-5 asr Bacteria NA +#> 3 k__2 kingdom anaerobic 1.00e+0 asr Bacteria NA +#> 4 k__2 kingdom facultatively an… 2.41e-5 asr Bacteria NA +#> 5 p__1224 phylum aerobic 8.46e-1 asr Pseudomon… NA +#> 6 p__1224 phylum aerotolerant 4.05e-4 asr Pseudomon… NA +#> # ℹ 5 more variables: Confidence_in_curation <ord>, taxid <chr>, +#> # Attribute_type <chr>, Attribute_group <chr>, Frequency <chr> +

Number of annotations per evidence

+table(output$Evidence, useNA = 'always')
+#> 
+#>    asr    exp    igc    inh   inh2    nas    tas    tax   <NA> 
+#>   7872   2710    550 170984  50988    898   2583  28258  37809
+

NAs are the one that were completed with tidyr::complete, so all of +them have score 0

+
+output |> 
+    filter(is.na(Evidence)) |> 
+    pull(Score) |> 
+    unique()
+#> [1] 0
+

Final annotations passing the min threshold (0.25 for +aerophilicity)

+
 min_thr <- 1 / length(unique(phys_data_ready$Attribute))
 addTaxa1 <- phys_data_ready |> 
     filter(!NCBI_ID %in% unique(output$NCBI_ID))
@@ -893,7 +961,7 @@ 

ASR with phytools

Session information

-
+
 sessioninfo::session_info()
 #> ─ Session info ───────────────────────────────────────────────────────────────
 #>  setting  value
diff --git a/pkgdown.yml b/pkgdown.yml
index 8c1c92a..0cb2581 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -3,7 +3,7 @@ pkgdown: 2.0.7
 pkgdown_sha: ~
 articles:
   phytools: phytools.html
-last_built: 2023-09-28T02:38Z
+last_built: 2023-09-28T14:37Z
 urls:
   reference: https://sdgamboa.github.io/taxPPro/reference
   article: https://sdgamboa.github.io/taxPPro/articles