Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Case study report automation #111

Draft
wants to merge 27 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions R/ipr2viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ getTopAccByLinDomArch <- function(infile_full,
cln_domarch <- cln %>% select(domarch_cols)
col_counts <- colSums(is.na(cln_domarch))
DA_sym <- sym(names(which.min(col_counts)))
showNotification(paste0("Selecting representatives by unique ", DA_sym, " and lineage combinations"))
# showNotification(paste0("Selecting representatives by unique ", DA_sym, " and lineage combinations"))
## Group by Lineage, DomArch and reverse sort by group counts
grouped <- cln %>%
group_by({{ DA_sym }}, {{ lin_sym }}) %>%
arrange(desc(PcPositive)) %>%
summarise(count = n(), AccNum = dplyr::first(AccNum)) %>%
arrange(-count) %>%
filter({{ lin_sym }} != "" && {{ DA_sym }} != "")
filter({{ lin_sym }} != "" & {{ DA_sym }} != "")
top_acc <- grouped$AccNum[1:n]
top_acc <- na.omit(top_acc)
return(top_acc)
Expand Down Expand Up @@ -180,7 +180,7 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
ipr_out <- read_tsv(infile_ipr, col_names = T, col_types = MolEvolvR::iprscan_cols)
ipr_out <- ipr_out %>% filter(.data$Name %in% accessions)
analysis_cols <- paste0("DomArch.", analysis)
infile_full <- infile_full %>% select(.data$analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
infile_full <- infile_full %>% select(analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
## To filter by Analysis
analysis <- paste(analysis, collapse = "|")
## @SAM: This can't be set in stone since the analysis may change!
Expand Down Expand Up @@ -212,7 +212,7 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
analysis_labeler <- analyses %>%
pivot_wider(names_from = .data$Analysis, values_from = .data$Analysis)

lookup_tbl_path <- "/data/research/jravilab/common_data/cln_lookup_tbl.tsv"
lookup_tbl_path <- "~/awasyn/new_trial/cln_lookup_tbl.tsv"
awasyn marked this conversation as resolved.
Show resolved Hide resolved
lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = MolEvolvR::lookup_table_cols)

lookup_tbl <- lookup_tbl %>% select(-.data$ShortName) # Already has ShortName -- Just needs SignDesc
Expand Down
20 changes: 10 additions & 10 deletions R/networks_domarch.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@
#' A network of domains is returned based on shared domain architectures.
#'
#' @param prot A data frame that contains the column 'DomArch'.
#' @param column Name of column containing Domain architecture from which nodes
#' @param column Name of column containing Domain architecture from which nodes
#' and edges are generated.
#' @param domains_of_interest Character vector specifying domains of interest.
#' @param cutoff Integer. Only use domains that occur at or above the cutoff for
#' @param cutoff Integer. Only use domains that occur at or above the cutoff for
#' total counts if cutoff_type is "Total Count".
#' Only use domains that appear in cutoff or greater lineages if cutoff_type is
#' Only use domains that appear in cutoff or greater lineages if cutoff_type is
#' Lineage.
#' @param layout Character. Layout type to be used for the network. Options are:
#' \itemize{\item "grid" \item "circle" \item "random" \item "auto"}
#' @param query_color Character. Color to represent the queried domain in the
#' @param query_color Character. Color to represent the queried domain in the
#' network.
#'
#' @importFrom dplyr across add_row all_of distinct filter mutate pull select
Expand Down Expand Up @@ -211,7 +211,7 @@ createDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c
visOptions(highlightNearest = TRUE)
},
error = function(e) {
showNotification(toString(e))
# showNotification(toString(e))
vis_g <- "error"
},
finally = {
Expand All @@ -231,18 +231,18 @@ createDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c
#'
#'
#' @param prot A data frame that contains the column 'DomArch'.
#' @param column Name of column containing Domain architecture from which nodes
#' @param column Name of column containing Domain architecture from which nodes
#' and edges are generated.
#' @param domains_of_interest Character vector specifying the domains of interest.
#' @param cutoff Integer. Only use domains that occur at or above the cutoff for
#' @param cutoff Integer. Only use domains that occur at or above the cutoff for
#' total counts if cutoff_type is "Total Count".
#' Only use domains that appear in cutoff or greater lineages if cutoff_type is
#' Only use domains that appear in cutoff or greater lineages if cutoff_type is
#' Lineage.
#' @param layout Character. Layout type to be used for the network. Options are:
#' \itemize{\item "grid" \item "circle" \item "random" \item "auto"}
#' @param query_color Color that the nodes of the domains in the
#' @param query_color Color that the nodes of the domains in the
#' domains_of_interest vector are colored
#' @param partner_color Color that the nodes that are not part of the
#' @param partner_color Color that the nodes that are not part of the
#' domains_of_interest vector are colored
#' @param border_color Color for the borders of the nodes.
#' @param IsDirected Is the network directed? Set to false to eliminate arrows
Expand Down
70 changes: 35 additions & 35 deletions R/pre-msa-tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
#' @param y Delimitter. Default is space (" ").
#'
#' @importFrom rlang abort
#'
#'
#' @return A character vector in title case.
#' @export
#'
Expand Down Expand Up @@ -112,21 +112,21 @@ addLeaves2Alignment <- function(aln_file = "",
lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
# lin_file="data/rawdata_tsv/PspA.txt",
reduced = FALSE) {

#Check if the alignment file is provided and exists
if (nchar(aln_file) == 0) {
abort("Error: Alignment file path must be provided.")
}

if (!file.exists(aln_file)) {
abort(paste("Error: The alignment file '", aln_file, "' does not exist."))
}

# Check if the lineage file exists
if (!file.exists(lin_file)) {
abort(paste("Error: The lineage file '", lin_file, "' does not exist."))
}

# Check that the 'reduced' parameter is logical
if (!is.logical(reduced) || length(reduced) != 1) {
abort("Error: 'reduced' must be a single logical value (TRUE or FALSE).")
Expand Down Expand Up @@ -249,15 +249,15 @@ addName <- function(data,
if (!is.data.frame(data)) {
abort("Error: The input 'data' must be a data frame")
}

# Check that the specified columns exist in the data
required_cols <- c(accnum_col, spec_col, lin_col)
missing_cols <- setdiff(required_cols, names(data))
if (length(missing_cols) > 0) {
abort(paste("Error: The following columns are missing from the data:",
abort(paste("Error: The following columns are missing from the data:",
paste(missing_cols, collapse = ", ")))
}

cols <- c(accnum_col, "Kingdom", "Phylum", "Genus", "Spp")
split_data <- data %>%
separate(
Expand Down Expand Up @@ -347,16 +347,16 @@ convertAlignment2FA <- function(aln_file = "",
if (nchar(aln_file) == 0) {
abort("Error: Alignment file path must be provided.")
}

if (!file.exists(aln_file)) {
abort(paste("Error: The alignment file '", aln_file, "' does not exist."))
}

# Check if the lineage file exists
if (!file.exists(lin_file)) {
abort(paste("Error: The lineage file '", lin_file, "' does not exist."))
}

# Check that the 'reduced' parameter is logical
if (!is.logical(reduced) || length(reduced) != 1) {
abort("Error: 'reduced' must be a single logical value (TRUE or FALSE).")
Expand Down Expand Up @@ -424,14 +424,14 @@ mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
if (!is.data.frame(acc2name)) {
abort("Error: acc2name must be a data frame.")
}

# Check if the specified columns exist in the data frame
if (!(acc_col %in% colnames(acc2name))) {
abort("Error: The specified acc_col '", acc_col, "' does not exist in
abort("Error: The specified acc_col '", acc_col, "' does not exist in
acc2name.")
}
if (!(name_col %in% colnames(acc2name))) {
abort("Error: The specified name_col '", name_col, "' does not exist in
abort("Error: The specified name_col '", name_col, "' does not exist in
acc2name.")
}

Expand Down Expand Up @@ -475,7 +475,7 @@ rename_fasta <- function(fa_path, outpath,
abort("Error: The input FASTA file does not exist at the specified
path: ", fa_path)
}

# Check if the output path is writable
outdir <- dirname(outpath)
if (!dir.exists(outdir)) {
Expand Down Expand Up @@ -541,20 +541,20 @@ generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
reduced = F) {
# Check if the alignment path exists
if (!dir.exists(aln_path)) {
abort("Error: The alignment directory does not exist at the specified
abort("Error: The alignment directory does not exist at the specified
path: ", aln_path)
}

# Check if the output path exists; if not, attempt to create it
if (!dir.exists(fa_outpath)) {
dir.create(fa_outpath, recursive = TRUE)
message("Note: The output directory did not exist and has been created: ",
message("Note: The output directory did not exist and has been created: ",
fa_outpath)
}

# Check if the linear file exists
if (!file.exists(lin_file)) {
abort("Error: The linear file does not exist at the specified path: ",
abort("Error: The linear file does not exist at the specified path: ",
lin_file)
}
# library(here)
Expand Down Expand Up @@ -626,7 +626,7 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
if (!is.character(accessions) || length(accessions) == 0) {
abort("Error: 'accessions' must be a non-empty character vector.")
}

if (!dir.exists(dirname(outpath))) {
abort("Error: The output directory does not exist: ", dirname(outpath))
}
Expand Down Expand Up @@ -676,7 +676,7 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
id = accessions_partitioned[[x]],
db = "protein",
rettype = "fasta",
api_key = Sys.getenv("ENTREZ_API_KEY")
#api_key = Sys.getenv("ENTREZ_API_KEY")
)
)
})
Expand Down Expand Up @@ -732,21 +732,21 @@ acc2FA <- function(accessions, outpath, plan = "sequential") {
createRepresentativeAccNum <- function(prot_data,
reduced = "Lineage",
accnum_col = "AccNum") {

# Validate input
if (!is.data.frame(prot_data)) {
abort("Error: 'prot_data' must be a data frame.")
}

# Check if the reduced column exists in prot_data
if (!(reduced %in% colnames(prot_data))) {
abort("Error: The specified reduced column '", reduced, "' does not
abort("Error: The specified reduced column '", reduced, "' does not
exist in the data frame.")
}

# Check if the accnum_col exists in prot_data
if (!(accnum_col %in% colnames(prot_data))) {
abort("Error: The specified accession number column '", accnum_col, "'
abort("Error: The specified accession number column '", accnum_col, "'
does not exist in the data frame.")
}
# Get Unique reduced column and then bind the AccNums back to get one AccNum per reduced column
Expand Down Expand Up @@ -808,10 +808,10 @@ alignFasta <- function(fasta_file, tool = "Muscle", outpath = NULL) {
if (!file.exists(fasta_file)) {
abort("Error: The FASTA file does not exist: ", fasta_file)
}
if (file_ext(fasta_file) != "fasta" && file_ext(fasta_file) != "fa") {
abort("Error: The specified file is not a valid FASTA file: ", fasta_file)
}

# if (file_ext(fasta_file) != "fasta" && file_ext(fasta_file) != "fa") {
# abort("Error: The specified file is not a valid FASTA file: ", fasta_file)
# }
fasta <- readAAStringSet(fasta_file)

aligned <- switch(tool,
Expand Down Expand Up @@ -857,23 +857,23 @@ writeMSA_AA2FA <- function(alignment, outpath) {
if (!inherits(alignment, "AAMultipleAlignment")) {
abort("Error: The alignment must be of type 'AAMultipleAlignment'.")
}

# Check the output path is a character string
if (!is.character(outpath) || nchar(outpath) == 0) {
abort("Error: Invalid output path specified.")
}

# Check if the output directory exists
outdir <- dirname(outpath)
if (!dir.exists(outdir)) {
abort("Error: The output directory does not exist: ", outdir)
}

l <- length(rownames(alignment))
l <- length(names(unmasked(alignment)))
fasta <- ""
for (i in 1:l)
{
fasta <- paste0(fasta, paste(">", rownames(alignment)[i]), "\n")
fasta <- paste0(fasta, paste(">", names(unmasked(alignment)[i])), "\n")
seq <- toString(unmasked(alignment)[[i]])
fasta <- paste0(fasta, seq, "\n")
}
Expand Down
Loading