JRaviLab · awasyn · Oct 26, 2024 · Oct 26, 2024 · Oct 26, 2024 · Oct 30, 2024
diff --git a/R/ipr2viz.R b/R/ipr2viz.R
@@ -94,14 +94,14 @@ getTopAccByLinDomArch <- function(infile_full,
     cln_domarch <- cln %>% select(domarch_cols)
     col_counts <- colSums(is.na(cln_domarch))
     DA_sym <- sym(names(which.min(col_counts)))
-    showNotification(paste0("Selecting representatives by unique ", DA_sym, " and lineage combinations"))
+    # showNotification(paste0("Selecting representatives by unique ", DA_sym, " and lineage combinations"))
     ## Group by Lineage, DomArch and reverse sort by group counts
     grouped <- cln %>%
         group_by({{ DA_sym }}, {{ lin_sym }}) %>%
         arrange(desc(PcPositive)) %>%
         summarise(count = n(), AccNum = dplyr::first(AccNum)) %>%
         arrange(-count) %>%
-        filter({{ lin_sym }} != "" && {{ DA_sym }} != "")
+        filter({{ lin_sym }} != "" & {{ DA_sym }} != "")
     top_acc <- grouped$AccNum[1:n]
     top_acc <- na.omit(top_acc)
     return(top_acc)
@@ -180,7 +180,7 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     ipr_out <- read_tsv(infile_ipr, col_names = T, col_types = MolEvolvR::iprscan_cols)
     ipr_out <- ipr_out %>% filter(.data$Name %in% accessions)
     analysis_cols <- paste0("DomArch.", analysis)
-    infile_full <- infile_full %>% select(.data$analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
+    infile_full <- infile_full %>% select(analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
     ## To filter by Analysis
     analysis <- paste(analysis, collapse = "|")
     ## @SAM: This can't be set in stone since the analysis may change!
@@ -212,7 +212,7 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     analysis_labeler <- analyses %>%
         pivot_wider(names_from = .data$Analysis, values_from = .data$Analysis)
 
-    lookup_tbl_path <- "/data/research/jravilab/common_data/cln_lookup_tbl.tsv"
+    lookup_tbl_path <- "~/awasyn/new_trial/cln_lookup_tbl.tsv"
     lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = MolEvolvR::lookup_table_cols)
 
     lookup_tbl <- lookup_tbl %>% select(-.data$ShortName) # Already has ShortName -- Just needs SignDesc

diff --git a/R/networks_domarch.R b/R/networks_domarch.R
@@ -24,16 +24,16 @@
 #' A network of domains is returned based on shared domain architectures.
 #'
 #' @param prot A data frame that contains the column 'DomArch'.
-#' @param column Name of column containing Domain architecture from which nodes 
+#' @param column Name of column containing Domain architecture from which nodes
 #' and edges are generated.
 #' @param domains_of_interest Character vector specifying domains of interest.
-#' @param cutoff Integer. Only use domains that occur at or above the cutoff for 
+#' @param cutoff Integer. Only use domains that occur at or above the cutoff for
 #' total counts if cutoff_type is "Total Count".
-#' Only use domains that appear in cutoff or greater lineages if cutoff_type is 
+#' Only use domains that appear in cutoff or greater lineages if cutoff_type is
 #' Lineage.
 #' @param layout Character. Layout type to be used for the network. Options are:
 #' \itemize{\item "grid" \item "circle" \item "random" \item "auto"}
-#' @param query_color Character. Color to represent the queried domain in the 
+#' @param query_color Character. Color to represent the queried domain in the
 #' network.
 #'
 #' @importFrom dplyr across add_row all_of distinct filter mutate pull select
@@ -211,7 +211,7 @@ createDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c
                 visOptions(highlightNearest = TRUE)
         },
         error = function(e) {
-            showNotification(toString(e))
+            # showNotification(toString(e))
             vis_g <- "error"
         },
         finally = {
@@ -231,18 +231,18 @@ createDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c
 #'
 #'
 #' @param prot A data frame that contains the column 'DomArch'.
-#' @param column Name of column containing Domain architecture from which nodes 
+#' @param column Name of column containing Domain architecture from which nodes
 #' and edges are generated.
 #' @param domains_of_interest Character vector specifying the domains of interest.
-#' @param cutoff Integer. Only use domains that occur at or above the cutoff for 
+#' @param cutoff Integer. Only use domains that occur at or above the cutoff for
 #' total counts if cutoff_type is "Total Count".
-#' Only use domains that appear in cutoff or greater lineages if cutoff_type is 
+#' Only use domains that appear in cutoff or greater lineages if cutoff_type is
 #' Lineage.
 #' @param layout Character. Layout type to be used for the network. Options are:
 #' \itemize{\item "grid" \item "circle" \item "random" \item "auto"}
-#' @param query_color Color that the nodes of the domains in the 
+#' @param query_color Color that the nodes of the domains in the
 #' domains_of_interest vector are colored
-#' @param partner_color Color that the nodes that are not part of the 
+#' @param partner_color Color that the nodes that are not part of the
 #' domains_of_interest vector are colored
 #' @param border_color Color for the borders of the nodes.
 #' @param IsDirected Is the network directed? Set to false to eliminate arrows

diff --git a/R/pre-msa-tree.R b/R/pre-msa-tree.R
@@ -46,7 +46,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
 #' @param y Delimitter. Default is space (" ").
 #'
 #' @importFrom rlang abort
-#' 
+#'
 #' @return A character vector in title case.
 #' @export
 #'
@@ -112,21 +112,21 @@ addLeaves2Alignment <- function(aln_file = "",
     lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
     # lin_file="data/rawdata_tsv/PspA.txt",
     reduced = FALSE) {
-    
+
     #Check if the alignment file is provided and exists
     if (nchar(aln_file) == 0) {
         abort("Error: Alignment file path must be provided.")
     }
-    
+
     if (!file.exists(aln_file)) {
         abort(paste("Error: The alignment file '", aln_file, "' does not exist."))
     }
-    
+
     # Check if the lineage file exists
     if (!file.exists(lin_file)) {
         abort(paste("Error: The lineage file '", lin_file, "' does not exist."))
     }
-    
+
     # Check that the 'reduced' parameter is logical
     if (!is.logical(reduced) || length(reduced) != 1) {
         abort("Error: 'reduced' must be a single logical value (TRUE or FALSE).")
@@ -249,15 +249,15 @@ addName <- function(data,
     if (!is.data.frame(data)) {
         abort("Error: The input 'data' must be a data frame")
     }
-    
+
     # Check that the specified columns exist in the data
     required_cols <- c(accnum_col, spec_col, lin_col)
     missing_cols <- setdiff(required_cols, names(data))
     if (length(missing_cols) > 0) {
-        abort(paste("Error: The following columns are missing from the data:", 
+        abort(paste("Error: The following columns are missing from the data:",
                    paste(missing_cols, collapse = ", ")))
     }
-    
+
     cols <- c(accnum_col, "Kingdom", "Phylum", "Genus", "Spp")
     split_data <- data %>%
         separate(
@@ -347,16 +347,16 @@ convertAlignment2FA <- function(aln_file = "",
     if (nchar(aln_file) == 0) {
         abort("Error: Alignment file path must be provided.")
     }
-    
+
     if (!file.exists(aln_file)) {
         abort(paste("Error: The alignment file '", aln_file, "' does not exist."))
     }
-    
+
     # Check if the lineage file exists
     if (!file.exists(lin_file)) {
         abort(paste("Error: The lineage file '", lin_file, "' does not exist."))
     }
-    
+
     # Check that the 'reduced' parameter is logical
     if (!is.logical(reduced) || length(reduced) != 1) {
         abort("Error: 'reduced' must be a single logical value (TRUE or FALSE).")
@@ -424,14 +424,14 @@ mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
     if (!is.data.frame(acc2name)) {
         abort("Error: acc2name must be a data frame.")
     }
-    
+
     # Check if the specified columns exist in the data frame
     if (!(acc_col %in% colnames(acc2name))) {
-        abort("Error: The specified acc_col '", acc_col, "' does not exist in 
+        abort("Error: The specified acc_col '", acc_col, "' does not exist in
              acc2name.")
     }
     if (!(name_col %in% colnames(acc2name))) {
-        abort("Error: The specified name_col '", name_col, "' does not exist in 
+        abort("Error: The specified name_col '", name_col, "' does not exist in
              acc2name.")
     }
 
@@ -475,7 +475,7 @@ rename_fasta <- function(fa_path, outpath,
         abort("Error: The input FASTA file does not exist at the specified
              path: ", fa_path)
     }
-    
+
     # Check if the output path is writable
     outdir <- dirname(outpath)
     if (!dir.exists(outdir)) {
@@ -541,20 +541,20 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
     reduced = F) {
     # Check if the alignment path exists
     if (!dir.exists(aln_path)) {
-        abort("Error: The alignment directory does not exist at the specified 
+        abort("Error: The alignment directory does not exist at the specified
              path: ", aln_path)
     }
-    
+
     # Check if the output path exists; if not, attempt to create it
     if (!dir.exists(fa_outpath)) {
         dir.create(fa_outpath, recursive = TRUE)
-        message("Note: The output directory did not exist and has been created: ", 
+        message("Note: The output directory did not exist and has been created: ",
                 fa_outpath)
     }
-    
+
     # Check if the linear file exists
     if (!file.exists(lin_file)) {
-        abort("Error: The linear file does not exist at the specified path: ", 
+        abort("Error: The linear file does not exist at the specified path: ",
              lin_file)
     }
     # library(here)
@@ -626,7 +626,7 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
     if (!is.character(accessions) || length(accessions) == 0) {
         abort("Error: 'accessions' must be a non-empty character vector.")
     }
-    
+
     if (!dir.exists(dirname(outpath))) {
         abort("Error: The output directory does not exist: ", dirname(outpath))
     }
@@ -676,7 +676,7 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
                 id = accessions_partitioned[[x]],
                 db = "protein",
                 rettype = "fasta",
-                api_key = Sys.getenv("ENTREZ_API_KEY")
+                #api_key = Sys.getenv("ENTREZ_API_KEY")
             )
         )
     })
@@ -732,21 +732,21 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
 createRepresentativeAccNum <- function(prot_data,
     reduced = "Lineage",
     accnum_col = "AccNum") {
-    
+
     # Validate input
     if (!is.data.frame(prot_data)) {
         abort("Error: 'prot_data' must be a data frame.")
     }
-    
+
     # Check if the reduced column exists in prot_data
     if (!(reduced %in% colnames(prot_data))) {
-        abort("Error: The specified reduced column '", reduced, "' does not 
+        abort("Error: The specified reduced column '", reduced, "' does not
              exist in the data frame.")
     }
-    
+
     # Check if the accnum_col exists in prot_data
     if (!(accnum_col %in% colnames(prot_data))) {
-        abort("Error: The specified accession number column '", accnum_col, "' 
+        abort("Error: The specified accession number column '", accnum_col, "'
              does not exist in the data frame.")
     }
     # Get Unique reduced column and then bind the AccNums back to get one AccNum per reduced column
@@ -808,10 +808,10 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
     if (!file.exists(fasta_file)) {
         abort("Error: The FASTA file does not exist: ", fasta_file)
     }
-    
-    if (file_ext(fasta_file) != "fasta" && file_ext(fasta_file) != "fa") {
-        abort("Error: The specified file is not a valid FASTA file: ", fasta_file)
-    }
+
+    # if (file_ext(fasta_file) != "fasta" && file_ext(fasta_file) != "fa") {
+    #    abort("Error: The specified file is not a valid FASTA file: ", fasta_file)
+    # }
     fasta <- readAAStringSet(fasta_file)
 
     aligned <- switch(tool,
@@ -857,23 +857,23 @@ writeMSA_AA2FA <- function(alignment, outpath) {
     if (!inherits(alignment, "AAMultipleAlignment")) {
         abort("Error: The alignment must be of type 'AAMultipleAlignment'.")
     }
-    
+
     # Check the output path is a character string
     if (!is.character(outpath) || nchar(outpath) == 0) {
         abort("Error: Invalid output path specified.")
     }
-    
+
     # Check if the output directory exists
     outdir <- dirname(outpath)
     if (!dir.exists(outdir)) {
         abort("Error: The output directory does not exist: ", outdir)
     }
 
-    l <- length(rownames(alignment))
+    l <- length(names(unmasked(alignment)))
     fasta <- ""
     for (i in 1:l)
     {
-        fasta <- paste0(fasta, paste(">", rownames(alignment)[i]), "\n")
+        fasta <- paste0(fasta, paste(">", names(unmasked(alignment)[i])), "\n")
         seq <- toString(unmasked(alignment)[[i]])
         fasta <- paste0(fasta, seq, "\n")
     }