diff --git a/inst/scripts/phagorn.R b/inst/scripts/phagorn.R
index e1217ca..c5ff8e8 100644
--- a/inst/scripts/phagorn.R
+++ b/inst/scripts/phagorn.R
@@ -1,5 +1,8 @@
+library(phangorn)
 
 x <- m3
+
+
 x[x > 0] <- 1
 x <- x[,sort(colnames(x))]
 rownames(x) <- NULL
@@ -13,6 +16,7 @@ for (i in seq_along(output)) {
 }
 rownames(y) <- output
 
+
 trait_mat <- m3[, sort(colnames(m3))]
 trait_mat[trait_mat > 0] <- 1
 trait_v <- vector('character', nrow(trait_mat))
@@ -46,10 +50,29 @@ system.time(
     {
         ## This one takes too long
         fit <- pml(mpa_tree, myData)
-        fit <- optim.pml(fit, model = NULL, control = pml.control(trace=0))
+        # fit <- optim.pml(fit, model = NULL, control = pml.control(trace=0))
     }
 )
 
+system.time(
+    anc.ml <- ancestral.pml(fit, "ml")
+)
+
+
+system.time(
+    anc.ml <- ancestral.pml(fit, "ml")
+)
+
+system.time(
+    anc.bayes <- ancestral.pml(fit, "bayes")
+)
+
+
+
+
+
+
+
 anc_pars_data <- vector('list', length(anc_pars))
 for (i in seq_along(anc_pars_data)) {
     anc_pars_data[[i]] <- as.data.frame(unname(anc_pars[[i]]))
diff --git a/vignettes/articles/ape.Rmd b/vignettes/articles/ape.Rmd
new file mode 100644
index 0000000..bc08d7c
--- /dev/null
+++ b/vignettes/articles/ape.Rmd
@@ -0,0 +1,463 @@
+---
+title: "A propagation workflow"
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup, message=FALSE}
+library(bugphyzz)
+library(taxPPro)
+library(data.tree)
+library(phytools)
+library(dplyr)
+library(purrr)
+library(tidyr)
+```
+
+Import physiology data (one physiology - aerophilicity):
+
+```{r import physiology, message=FALSE}
+phys <- physiologies('aerophilicity')
+```
+
+Filter data:
+
+```{r filter data}
+select_cols <- c(
+    ## Columns with information needed for propagation
+    'NCBI_ID', 'Taxon_name', 'Attribute', 'Attribute_source',
+    'Frequency', 'Score', 'Parent_NCBI_ID', 'Confidence_in_curation',
+    'Rank', 'Evidence',
+    
+    ## Columns only used for controlling the behavior of the code
+    'Attribute_type', 'Attribute_group'
+)
+valid_ranks <- c('genus', 'species', 'strain')
+phys_data <- phys[[1]] |> 
+    as_tibble() |> 
+    filter(Attribute_value == TRUE) |> # only for discrete traits with multiple states 
+    filter(!is.na(Rank), Rank %in% valid_ranks) |>
+    filter(
+        ## Filter rows with NCBI_ID.
+        ## If NCBI_ID is missing, Parent_NCBI_ID must be present
+        !((is.na(NCBI_ID) | NCBI_ID == 'unknown') & is.na(Parent_NCBI_ID))
+    ) |> 
+    filter(!is.na('Attribute_source'), !is.na('Frequency')) |>
+    mutate( 
+        Score = case_when(
+            Frequency == 'always' ~ 1,
+            Frequency == 'usually' ~ 0.9,
+            Frequency == 'sometimes' ~ 0.5,
+            Frequency == 'unknown' ~ 0.1 ## arbitrary value
+        )
+    ) |> 
+    select(all_of(select_cols)) |> 
+    distinct()
+n_dropped_rows <- nrow(phys[[1]]) - nrow(phys_data)
+message(format(n_dropped_rows, big.mark = ','), ' rows were dropped.')
+```
+
+#> TODO - add code for dealing with duplicates, etc.
+
+The phylogenetic and taxonomic trees used for propagation with
+ASR and inheritance have NCBI IDs at the node and tip labels.
+Therefore, only annotations with NCBI IDs can be mapped for propagation
+(ASR and Inheritance).
+
+The annotations of taxa without NCBI ID (usually strains) can be used to infer
+the annotations of their parents (with NCBI ID and usually species) through
+an 'early' version of ASR (before applying a formal ASR method).
+
+This early version of ASR is just the normalization of scores among the
+child nodes of a parent node.
+
+```{r divide data in sets 1 and 2}
+lgl_vct <- is.na(phys_data$NCBI_ID) | phys_data$NCBI_ID == 'unknown'
+set_with_ids <- phys_data |> 
+    filter(!lgl_vct) |> 
+    ## I need to do this here (when rows with NCBI IDs have been
+    ## separated from rows without NCBI_IDs). Maybe I can do this before
+    ## separated with mutate_if or other code.
+    group_by(NCBI_ID) |> 
+    mutate(Taxon_name = paste0(sort(unique(Taxon_name)), collapse = '|')) |> 
+    ungroup() |> 
+    distinct() |> 
+    group_by(NCBI_ID, Attribute) |> 
+    slice_head(n = 1) |> 
+    ungroup() |> 
+    group_by(NCBI_ID) |>
+    mutate(
+        Total_score = sum(Score),
+        Score = Score / Total_score
+    ) |>
+    ungroup() |> 
+    distinct()
+dim(set_with_ids) # these data don't need to go through early ASR
+```
+
+```{r set2 for asr}
+## this will be used for early ASR
+set_without_ids <- phys_data |> 
+    filter(lgl_vct) |> 
+    select(-NCBI_ID, -Taxon_name, -Frequency) |> 
+    relocate(NCBI_ID = Parent_NCBI_ID) |> 
+    distinct()
+
+if (nrow(set_with_ids) > 0) {
+    set_without_ids <- set_without_ids |> 
+        filter(!NCBI_ID %in% unique(set_with_ids$NCBI_ID))
+}
+## add some code here in case all of the NCBI_IDs were alredy present in
+## the original data
+dim(set_without_ids)
+```
+
+Let's just add up the scores
+
+#> set_without_ids |> filter(NCBI_ID %in% set_without_ids$NCBI_ID[which(duplicated(set_without_ids$NCBI_ID))])
+#> x |> filter(NCBI_ID %in% x$NCBI_ID[which(duplicated(x$NCBI_ID))])
+
+```{r}
+## 55565
+set_without_ids <- set_without_ids |>    
+    group_by(NCBI_ID, Attribute) |> 
+    slice_head(n = 1) |> 
+    ungroup() |> 
+    group_by(NCBI_ID) |>
+    mutate(
+        Total_score = sum(Score),
+        Score = Score / Total_score
+    ) |>
+    ungroup() |>
+    distinct()
+phys_data_ready <- left_join(set_with_ids, set_without_ids) |> 
+    select(-Total_score, -Parent_NCBI_ID) |> 
+    mutate(
+        NCBI_ID = case_when(
+            Rank == 'strain' ~ paste0('t__', NCBI_ID),
+            Rank == 'species' ~ paste0('s__', NCBI_ID),
+            Rank == 'genus' ~ paste0('g__', NCBI_ID)
+    )
+)
+phys_data_list <- split(phys_data_ready, factor(phys_data_ready$NCBI_ID))
+length(phys_data_list)
+```
+
+## First part of ASR/INH
+
+Map all nodes to the data_tree file
+
+```{r}
+data("tree_list")
+data_tree <- as.Node(tree_list)
+print(data_tree, limit = 10)
+```
+
+How many tips/nodes included in the NCBI taxonomy
+
+```{r}
+get_all_nodes <- unname(data_tree$Get('name'))
+mean(names(phys_data_list) %in% get_all_nodes)
+```
+
+How many tips included in the mpa tree
+
+```{r}
+mpa_tip_data <- mpa(x = 'tips') |> 
+    modify(as.character) |> 
+    as_tibble()
+mean(mpa_tip_data$taxid %in% sub('^\\w__', '', phys_data_ready$NCBI_ID))
+```
+Add nodes to the tree
+
+```{r}
+data_tree$Do(function(node) {
+    if (node$name %in% names(phys_data_list)) {
+        node$attribute_tbl <- phys_data_list[[node$name]]
+    } else {
+        NULL
+    }
+})
+data_tree$d__2$p__1224$c__1236$o__91347$f__543$g__561$s__562$attribute_tbl
+```
+
+
+```{r}
+data_tree$d__2$p__1224$c__1236$o__91347$f__543$g__561$attribute_tbl <- NULL
+data_tree$d__2$p__1224$c__1236$o__91347$f__543$g__561$attribute_tbl
+```
+
+ASR - post-order
+
+```{r}
+asr1 <- function(node) {
+    if (!node$isLeaf) {
+        children_names <- names(node$children)
+        attribute_tbls <- children_names |> 
+            purrr::map(~ node[[.x]]$attribute_tbl) |>
+            purrr::discard(is.null)
+        not_all_children_tbls_are_null <- length(attribute_tbls) > 0
+        node_attribute_tbl_is_null <- is.null(node$table)
+        node_is_gst <- grepl('^[gst]__', node$name)
+        conds <- node_attribute_tbl_is_null & 
+            not_all_children_tbls_are_null &
+            node_is_gst
+        if (conds) {
+            res_tbl <- purrr::discard(attribute_tbls, is.null) |>
+                dplyr::bind_rows() |>
+                dplyr::select(
+                    NCBI_ID, Attribute, Score,
+                    Attribute_type, Attribute_group
+                ) |> 
+                dplyr::mutate(
+                    NCBI_ID = node$name,
+                    Evidence = 'asr1'
+                ) |>
+                group_by(NCBI_ID) |>
+                mutate(
+                    Total_score = sum(Score),
+                    Score = Score / Total_score
+                ) |>
+                ungroup() |>
+                select(-Total_score) |> 
+                group_by(NCBI_ID, Attribute) |> 
+                mutate(Score = sum(Score)) |> 
+                ungroup() |> 
+                distinct() |> 
+                mutate(
+                    Frequency = case_when(
+                        Score == 1 ~ 'always',
+                        Score > 0.9 ~ 'usually',
+                        Score >= 0.5 ~ 'sometimes',
+                        Score > 0 & Score < 0.5 ~ 'rarely',
+                        Score == 0 ~ 'never'
+                    )
+                ) |> 
+                distinct()
+                node[['attribute_tbl']] <- res_tbl
+        } 
+    }
+}
+data_tree$Do(asr1, traversal = 'post-order')
+data_tree$d__2$p__1224$c__1236$o__91347$f__543$g__561$attribute_tbl
+```
+
+```{r}
+inh1 <- function(node) {
+    ## This one has no penalty
+    if (node$isRoot) {
+        return(NULL)
+    }
+    
+
+    if (is.null(node$parent$attribute_tbl)) {
+        return(NULL)
+    }
+
+    if (is.null(node$attribute_tbl) && grepl('^[st]__', node$name)) {
+        df <- node$parent$attribute_tbl
+        df <- df |>
+            dplyr::mutate(
+                NCBI_ID = node$name,
+                Evidence = 'inh'
+            )
+        node$attribute_tbl <- df
+    }
+}
+data_tree$Do(inh1, traversal = 'pre-order')
+```
+
+
+```{r}
+new_nodes <- data_tree$Get('attribute_tbl', simplify = FALSE)
+new_nodes_not_na <- map_lgl(new_nodes, ~ !all(is.na(.x)))
+mean(new_nodes_not_na)
+```
+
+```{r}
+new_data <-  new_nodes |> 
+    discard(~ all(is.na(.x))) |> 
+    bind_rows() |> 
+    arrange(NCBI_ID, Attribute) |> 
+    mutate(taxid = sub('^\\w__', '', NCBI_ID))
+mean(mpa_tip_data$taxid %in% new_data$taxid)
+```
+
+
+```{r}
+tip <- left_join(
+    mpa_tip_data, new_data, by = 'taxid', relationship = 'many-to-many'
+)  
+tip |> 
+    filter(!is.na(Attribute)) |> 
+    dim()
+```
+
+
+```{r}
+table(tip$Evidence)
+```
+
+
+```{r}
+mpa_tree <- mpa()
+m1 <- tip |> 
+    select(tip_label, Attribute, Score) |> 
+    filter(!is.na(Attribute)) |> 
+    # complete(NCB_ID, Attribute, fill = list(Score = 0)) |> 
+    pivot_wider(
+        names_from = Attribute, values_from = 'Score', values_fill = 0
+    ) |> 
+    tibble::column_to_rownames(var = 'tip_label') |> 
+    as.matrix()
+
+no_annotated_tips <- tip |>
+    filter(!tip_label %in% rownames(m1)) |>
+    pull(tip_label)
+
+m2 <- matrix(
+    data = rep(rep(1/ncol(m1), ncol(m1)), length(no_annotated_tips)),
+    nrow = length((no_annotated_tips)),
+    byrow = TRUE,
+    dimnames = list(rownames = no_annotated_tips, colnames = colnames(m1))
+)
+
+m3 <- rbind(m1, m2)
+m3 <- m3[mpa_tree$tip.label,]
+dim(m3)
+```
+
+```{r}
+system.time(
+    fit <- fitMk.parallel(
+        tree = mpa_tree, 
+        x = m3, 
+        model = 'ER', 
+        pi = "fitzjohn", 
+        logscale = TRUE, 
+        lik.func = "pruning"
+    )
+)
+## A few seconds or minutes with fitMK.parellel
+```
+
+```{r}
+system.time(
+    # ace <- ancr(fit, type = 'marginal', tips = TRUE) 
+    ace <- ancr(fit, type = 'marginal') # don't include tips
+)
+```
+
+
+```{r}
+node_data <- as_tibble(modify(mpa('nodes'), as.character))
+res <- ace$ace
+rownames(res) <- mpa_tree$node.label
+res <- res |> 
+    as.data.frame() |> 
+    tibble::rownames_to_column(var = 'node_label') |> 
+    left_join(node_data, by = 'node_label') |> 
+    as_tibble() |> 
+    filter(!is.na(taxid))
+dim(res)
+```
+
+
+```{r}
+## It seems that at least half of the nodes would have new annotations,
+## which I think it's ok, since this should be the nodes of higher
+## ranks in the taxonomic tree
+mean(!res$taxid %in% new_data$taxid)
+```
+
+
+```{r}
+data_for_inh2 <- filter(res, !taxid %in% new_data$taxid)
+dim(data_for_inh2)
+```
+
+
+```{r}
+input_for_inh2 <- data_for_inh2 |> 
+    select(-ends_with('_taxid'), -node_label, -node, -n_labels) |> 
+    relocate(NCBI_ID = taxid) |>
+    pivot_longer(
+        names_to = 'Attribute', values_to = 'Score', cols = 2:last_col()
+    ) |> 
+    complete(Attribute, Score, fill = list(Score = 0)) |> 
+    relocate(NCBI_ID) |> 
+    arrange(NCBI_ID, Attribute) |> 
+    mutate(Evidence = 'asr2') |> 
+    {\(y) split(y, factor(y$NCBI_ID))}()
+length(input_for_inh2)
+```
+
+Add more nodes
+
+```{r}
+data_tree$Do(function(node) {
+    cond1 <- is.null(node$attribute_tbl)
+    taxid <- sub('^\\w__', '', node$name)
+    cond2 <- taxid %in% names(input_for_inh2)
+    if (cond1 && cond2) {
+        # message('Adding ', node$name, ' to the tree')
+        node$attribute_tbl <- input_for_inh2[[taxid]]
+    }
+})
+data_tree$d__2$attribute_tbl
+```
+
+Now, let's proceed to inheritance (this time let's add a penalty)
+
+```{r}
+inh2 <- function(node, adjF = 0.1) {
+    cond1 <- !node$isRoot # don't apply this to the root
+    cond2 <- is.null(node$attribute_tbl) # attribute_tbl must be empty
+    cond3 <- !is.null(node$parent$attribute_tbl) # parent must have data
+    if (cond1 && cond2 && cond3) {
+        # message('Adding ', node$name, '.')
+        tbl <- node$parent$attribute_tbl  
+        n <- nrow(tbl)
+        res <- tbl |> 
+            mutate(
+                target_scores = rep(1 / n, n),
+                score_diff = Score - target_scores,
+                Score = Score - adjF * score_diff,
+                NCBI_ID = node$name,
+                Evidence = 'inh2'
+            ) |>
+            select(-target_scores, -score_diff) |> 
+            relocate(NCBI_ID)
+        node$attribute_tbl <- res
+    }
+}
+data_tree$Do(inh2, traversal = 'pre-order')
+```
+
+
+```{r}
+data_tree$d__2$p__1224$c__28211$o__356$f__335928$attribute_tbl
+```
+
+Get all the tables
+
+```{r}
+output <- data_tree$Get(
+    attribute = 'attribute_tbl', simplify = FALSE,
+    filterFun = function(node) node$name != 'ArcBac'
+    )
+output <- bind_rows(output)
+head(output)
+```
+
+## Session information
+
+```{r}
+sessioninfo::session_info()
+```
+
diff --git a/vignettes/articles/phangorn.Rmd b/vignettes/articles/phangorn.Rmd
new file mode 100644
index 0000000..80ba32c
--- /dev/null
+++ b/vignettes/articles/phangorn.Rmd
@@ -0,0 +1,516 @@
+---
+title: "A propagation workflow, using phangorn for ASR of discrete data"
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+
+```{r setup, message=FALSE}
+library(bugphyzz)
+library(taxPPro)
+library(data.tree)
+library(ape)
+library(phangorn)
+library(dplyr)
+library(purrr)
+library(tidyr)
+```
+
+Import physiological data (e.g. aerophilicity):
+
+```{r import physiology, message=FALSE}
+phys <- physiologies('aerophilicity')
+```
+
+Filter data:
+
+```{r filter data}
+attr_values_tsv <- system.file('extdata', 'attributes.tsv', package = 'bugphyzz')
+attr_values <- read.table(
+    file = attr_values_tsv, header = TRUE, sep = '\t'
+)
+valid_attr_values <- attr_values |> 
+    filter(attribute_group == 'aerophilicity') |> 
+    pull(attribute) |> 
+    unique()
+select_cols <- c(
+    ## Columns with information needed for propagation
+    'NCBI_ID', 'Taxon_name', 'Attribute', 'Attribute_source',
+    'Frequency', 'Score', 'Parent_NCBI_ID', 'Confidence_in_curation',
+    'Rank', 'Evidence',
+    
+    ## Columns only used for controlling the behavior of the code
+    'Attribute_type', 'Attribute_group'
+)
+valid_ranks <- c('genus', 'species', 'strain')
+phys_data <- phys[[1]] |> 
+    as_tibble() |> 
+    filter(Attribute %in% valid_attr_values) |> 
+    filter(Attribute_value == TRUE) |> # only for discrete traits with multiple states 
+    filter(!is.na(Rank), Rank %in% valid_ranks) |>
+    filter(
+        ## Filter rows with NCBI_ID.
+        ## If NCBI_ID is missing, Parent_NCBI_ID must be present
+        !((is.na(NCBI_ID) | NCBI_ID == 'unknown') & is.na(Parent_NCBI_ID))
+    ) |> 
+    filter(!is.na('Attribute_source'), !is.na('Frequency')) |>
+    mutate( 
+        Score = case_when(
+            Frequency == 'always' ~ 1,
+            Frequency == 'usually' ~ 0.9,
+            Frequency == 'sometimes' ~ 0.5,
+            Frequency == 'unknown' ~ 0.1 ## arbitrary value
+        )
+    ) |> 
+    select(all_of(select_cols)) |> 
+    distinct()
+n_dropped_rows <- nrow(phys[[1]]) - nrow(phys_data)
+message(format(n_dropped_rows, big.mark = ','), ' rows were dropped.')
+```
+
+We need taxids for propagation and to hoomogenize taxonomic data. If 
+taxid is not available, then the Parent NCBI_ID will be used for ASR.
+Taxa without taxids will not be used.
+
+```{r divide data in sets 1 and 2}
+lgl_vct <- is.na(phys_data$NCBI_ID) | phys_data$NCBI_ID == 'unknown'
+set_with_ids <- phys_data |> 
+    filter(!lgl_vct) |> 
+    group_by(NCBI_ID) |> 
+    mutate(Taxon_name = paste0(sort(unique(Taxon_name)), collapse = '|')) |> 
+    ungroup() |> 
+    distinct() |> 
+    group_by(NCBI_ID, Attribute) |> 
+    slice_head(n = 1) |> 
+    ungroup() |> 
+    group_by(NCBI_ID) |>
+    mutate(
+        Total_score = sum(Score),
+        Score = Score / Total_score
+    ) |>
+    ungroup() |> 
+    distinct()
+dim(set_with_ids)
+```
+
+```{r set2 for asr}
+set_without_ids <- phys_data |> 
+    filter(lgl_vct) |> 
+    select(-NCBI_ID, -Taxon_name, -Frequency) |> 
+    relocate(NCBI_ID = Parent_NCBI_ID) |> 
+    distinct()
+if (nrow(set_with_ids) > 0) {
+    set_without_ids <- set_without_ids |> 
+        filter(!NCBI_ID %in% unique(set_with_ids$NCBI_ID))
+}
+dim(set_without_ids)
+```
+
+
+
+```{r}
+## 55565
+set_without_ids <- set_without_ids |>    
+    group_by(NCBI_ID, Attribute) |> 
+    slice_head(n = 1) |> 
+    ungroup() |> 
+    group_by(NCBI_ID) |>
+    mutate(
+        Total_score = sum(Score),
+        Score = Score / Total_score
+    ) |>
+    ungroup() |>
+    distinct()
+
+phys_data_ready <- left_join(set_with_ids, set_without_ids) |> 
+    select(-Total_score, -Parent_NCBI_ID) |> 
+    mutate(
+        NCBI_ID = case_when(
+            Rank == 'strain' ~ paste0('t__', NCBI_ID),
+            Rank == 'species' ~ paste0('s__', NCBI_ID),
+            Rank == 'genus' ~ paste0('g__', NCBI_ID)
+    )
+)
+phys_data_list <- split(phys_data_ready, factor(phys_data_ready$NCBI_ID))
+length(phys_data_list)
+```
+
+## First part of ASR/INH
+
+Map all nodes to the data_tree file
+
+```{r import data_tree}
+data("tree_list")
+data_tree <- as.Node(tree_list)
+print(data_tree, limit = 10)
+```
+
+```{r}
+get_all_nodes <- unname(data_tree$Get('name'))
+mean(names(phys_data_list) %in% get_all_nodes)
+```
+
+```{r}
+tip_data <- ltp(x = 'tips') |> 
+    modify(as.character) |> 
+    as_tibble()
+mean(tip_data$taxid %in% sub('^\\w__', '', phys_data_ready$NCBI_ID))
+```
+
+Add nodes to the tree
+
+```{r}
+data_tree$Do(function(node) {
+    if (node$name %in% names(phys_data_list)) {
+        node$attribute_tbl <- phys_data_list[[node$name]]
+    } else {
+        NULL
+    }
+})
+data_tree$d__2$p__1224$c__1236$o__91347$f__543$g__561$s__562$attribute_tbl
+```
+
+
+```{r}
+data_tree$d__2$p__1224$c__1236$o__91347$f__543$g__561$attribute_tbl <- NULL
+data_tree$d__2$p__1224$c__1236$o__91347$f__543$g__561$attribute_tbl
+```
+
+ASR - post-order
+
+```{r}
+asr1 <- function(node) {
+    if (!node$isLeaf) {
+        children_names <- names(node$children)
+        attribute_tbls <- children_names |> 
+            purrr::map(~ node[[.x]]$attribute_tbl) |>
+            purrr::discard(is.null)
+        not_all_children_tbls_are_null <- length(attribute_tbls) > 0
+        node_attribute_tbl_is_null <- is.null(node$table)
+        node_is_gst <- grepl('^[gst]__', node$name)
+        conds <- node_attribute_tbl_is_null & 
+            not_all_children_tbls_are_null &
+            node_is_gst
+        if (conds) {
+            res_tbl <- purrr::discard(attribute_tbls, is.null) |>
+                dplyr::bind_rows() |>
+                dplyr::select(
+                    NCBI_ID, Attribute, Score,
+                    Attribute_type, Attribute_group
+                ) |> 
+                dplyr::mutate(
+                    NCBI_ID = node$name,
+                    Evidence = 'asr1'
+                ) |>
+                group_by(NCBI_ID) |>
+                mutate(
+                    Total_score = sum(Score),
+                    Score = Score / Total_score
+                ) |>
+                ungroup() |>
+                select(-Total_score) |> 
+                group_by(NCBI_ID, Attribute) |> 
+                mutate(Score = sum(Score)) |> 
+                ungroup() |> 
+                distinct() |> 
+                mutate(
+                    Frequency = case_when(
+                        Score == 1 ~ 'always',
+                        Score > 0.9 ~ 'usually',
+                        Score >= 0.5 ~ 'sometimes',
+                        Score > 0 & Score < 0.5 ~ 'rarely',
+                        Score == 0 ~ 'never'
+                    )
+                ) |> 
+                distinct()
+                node[['attribute_tbl']] <- res_tbl
+        } 
+    }
+}
+data_tree$Do(asr1, traversal = 'post-order')
+data_tree$d__2$p__1224$c__1236$o__91347$f__543$g__561$attribute_tbl
+```
+
+```{r}
+inh1 <- function(node) {
+    if (node$isRoot)
+        return(NULL)
+    if (is.null(node$parent$attribute_tbl))
+        return(NULL)
+    if (is.null(node$attribute_tbl) && grepl('^[st]__', node$name)) {
+        df <- node$parent$attribute_tbl
+        df <- df |>
+            dplyr::mutate(
+                NCBI_ID = node$name,
+                Evidence = 'inh'
+            )
+        node$attribute_tbl <- df
+    }
+}
+data_tree$Do(inh1, traversal = 'pre-order')
+```
+
+
+```{r}
+new_nodes <- data_tree$Get('attribute_tbl', simplify = FALSE)
+new_nodes_not_na <- map_lgl(new_nodes, ~ !all(is.na(.x)))
+mean(new_nodes_not_na)
+```
+
+```{r}
+new_data <-  new_nodes |> 
+    discard(~ all(is.na(.x))) |> 
+    bind_rows() |> 
+    arrange(NCBI_ID, Attribute) |> 
+    mutate(taxid = sub('^\\w__', '', NCBI_ID))
+mean(tip_data$taxid %in% new_data$taxid)
+```
+
+
+```{r}
+tip <- left_join(
+    tip_data, new_data, by = 'taxid', relationship = 'many-to-many'
+)  
+tip |> 
+    filter(!is.na(Attribute)) |> 
+    dim()
+```
+
+
+```{r}
+table(tip$Evidence)
+```
+
+
+```{r}
+tree <- ltp()
+m1 <- tip |> 
+    select(tip_label, Attribute, Score) |> 
+    filter(!is.na(Attribute)) |> 
+    # complete(NCB_ID, Attribute, fill = list(Score = 0)) |> 
+    pivot_wider(
+        names_from = Attribute, values_from = 'Score', values_fill = 0
+    ) |> 
+    tibble::column_to_rownames(var = 'tip_label') |> 
+    as.matrix()
+
+no_annotated_tips <- tip |>
+    filter(!tip_label %in% rownames(m1)) |>
+    pull(tip_label)
+
+m2 <- matrix(
+    data = rep(rep(1/ncol(m1), ncol(m1)), length(no_annotated_tips)),
+    nrow = length((no_annotated_tips)),
+    byrow = TRUE,
+    dimnames = list(rownames = no_annotated_tips, colnames = colnames(m1))
+)
+
+m3 <- rbind(m1, m2)
+m3 <- m3[tree$tip.label,]
+dim(m3)
+```
+
+
+Let's better use a pruned version of the tree
+
+```{r}
+pruned_tree <- keep.tip(phy = tree, tip = rownames(m1))
+pruned_tree$node.label <- NULL # Nodes might have been lost
+m1 <- m1[pruned_tree$tip.label,]
+dim(m1)
+```
+
+
+```{r}
+x <- m1
+x[x > 0] <- 1
+x <- x[,sort(colnames(x))]
+# rownames(x) <- NULL
+contrast <- as.matrix(unique(as.data.frame(x)))
+contrast_row_names <- vector('character', nrow(contrast))
+for (i in seq_along(contrast_row_names)) {
+    pos <- which(contrast[i,] == 1)
+    attr_names <- colnames(contrast)[pos]
+    contrast_row_names[[i]] <- paste0(attr_names, collapse = ':')
+}
+rownames(contrast) <- contrast_row_names
+contrast
+```
+
+
+```{r}
+v <- vector('character', nrow(x))
+for (i in seq_along(v)) {
+    pos <- which(x[i,] == 1)
+    attr_names <- colnames(x)[pos]
+    v[i] <- paste0(attr_names, collapse = ':')
+}
+traits <- matrix(data = v, ncol = 1)
+colnames(traits) <- 'trait'
+rownames(traits) <- rownames(x)
+all(rownames(contrast) %in% unique(as.character(traits)))
+```
+
+Create a phyData
+
+```{r}
+phyData <- phyDat(
+    traits, type = "USER",
+    levels = rownames(contrast),
+    contrast = contrast
+)
+phyData
+```
+
+```{r}
+system.time({
+    fit <- pml(pruned_tree, phyData)
+    asr_output <- ancestral.pml(fit, 'ml')
+})
+```
+
+
+
+
+
+
+
+
+
+
+
+
+```{r}
+system.time(
+    fit <- fitMk.parallel(
+        tree = mpa_tree, 
+        x = m3, 
+        model = 'ER', 
+        pi = "fitzjohn", 
+        logscale = TRUE, 
+        lik.func = "pruning"
+    )
+)
+## A few seconds or minutes with fitMK.parellel
+```
+
+```{r}
+system.time(
+    # ace <- ancr(fit, type = 'marginal', tips = TRUE) 
+    ace <- ancr(fit, type = 'marginal') # don't include tips
+)
+```
+
+
+```{r}
+node_data <- as_tibble(modify(mpa('nodes'), as.character))
+res <- ace$ace
+rownames(res) <- mpa_tree$node.label
+res <- res |> 
+    as.data.frame() |> 
+    tibble::rownames_to_column(var = 'node_label') |> 
+    left_join(node_data, by = 'node_label') |> 
+    as_tibble() |> 
+    filter(!is.na(taxid))
+dim(res)
+```
+
+
+```{r}
+## It seems that at least half of the nodes would have new annotations,
+## which I think it's ok, since this should be the nodes of higher
+## ranks in the taxonomic tree
+mean(!res$taxid %in% new_data$taxid)
+```
+
+
+```{r}
+data_for_inh2 <- filter(res, !taxid %in% new_data$taxid)
+dim(data_for_inh2)
+```
+
+
+```{r}
+input_for_inh2 <- data_for_inh2 |> 
+    select(-ends_with('_taxid'), -node_label, -node, -n_labels) |> 
+    relocate(NCBI_ID = taxid) |>
+    pivot_longer(
+        names_to = 'Attribute', values_to = 'Score', cols = 2:last_col()
+    ) |> 
+    complete(Attribute, Score, fill = list(Score = 0)) |> 
+    relocate(NCBI_ID) |> 
+    arrange(NCBI_ID, Attribute) |> 
+    mutate(Evidence = 'asr2') |> 
+    {\(y) split(y, factor(y$NCBI_ID))}()
+length(input_for_inh2)
+```
+
+Add more nodes
+
+```{r}
+data_tree$Do(function(node) {
+    cond1 <- is.null(node$attribute_tbl)
+    taxid <- sub('^\\w__', '', node$name)
+    cond2 <- taxid %in% names(input_for_inh2)
+    if (cond1 && cond2) {
+        # message('Adding ', node$name, ' to the tree')
+        node$attribute_tbl <- input_for_inh2[[taxid]]
+    }
+})
+data_tree$d__2$attribute_tbl
+```
+
+Now, let's proceed to inheritance (this time let's add a penalty)
+
+```{r}
+inh2 <- function(node, adjF = 0.1) {
+    cond1 <- !node$isRoot # don't apply this to the root
+    cond2 <- is.null(node$attribute_tbl) # attribute_tbl must be empty
+    cond3 <- !is.null(node$parent$attribute_tbl) # parent must have data
+    if (cond1 && cond2 && cond3) {
+        # message('Adding ', node$name, '.')
+        tbl <- node$parent$attribute_tbl  
+        n <- nrow(tbl)
+        res <- tbl |> 
+            mutate(
+                target_scores = rep(1 / n, n),
+                score_diff = Score - target_scores,
+                Score = Score - adjF * score_diff,
+                NCBI_ID = node$name,
+                Evidence = 'inh2'
+            ) |>
+            select(-target_scores, -score_diff) |> 
+            relocate(NCBI_ID)
+        node$attribute_tbl <- res
+    }
+}
+data_tree$Do(inh2, traversal = 'pre-order')
+```
+
+
+```{r}
+data_tree$d__2$p__1224$c__28211$o__356$f__335928$attribute_tbl
+```
+
+Get all the tables
+
+```{r}
+output <- data_tree$Get(
+    attribute = 'attribute_tbl', simplify = FALSE,
+    filterFun = function(node) node$name != 'ArcBac'
+    )
+output <- bind_rows(output)
+head(output)
+```
+
+## Session information
+
+```{r}
+sessioninfo::session_info()
+```
+