From c3a9b11a2b466007397eabeae80c7f12b76d59e1 Mon Sep 17 00:00:00 2001 From: Dusko Mirkovic Date: Fri, 16 Aug 2024 10:29:57 +0200 Subject: [PATCH 1/2] Hotfix: Fix metadata entry with sample file matching - Ignore samples which have 0 ASVs during processing --- tasks/r-alpha-beta-diversity/main.r | 10 ++++++---- tasks/r-dada2/main.r | 10 ++++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tasks/r-alpha-beta-diversity/main.r b/tasks/r-alpha-beta-diversity/main.r index 9f4d4573..c1a47d96 100644 --- a/tasks/r-alpha-beta-diversity/main.r +++ b/tasks/r-alpha-beta-diversity/main.r @@ -176,7 +176,6 @@ perpareSampleData <- function(phyloseqObject, targetColumn) { subset_samples_custom <- function(pseq, targetColumnValue) { metadata <- data.frame(sample_data(pseq)) if (!targetColumnValue %in% unique(metadata$target)) { - print(paste("Could not find", targetColumnValue, "among the samples")) return(NULL) } @@ -210,10 +209,13 @@ validateTargetValues <- function(sampleDataFrame, targetValues) { } genusAbundancePlot <- function(pseq_bac, target_column_value, output_path, taskRun) { - print(sprintf("Creating abundance plot for %s", target_column_value)) + sa <- subset_samples_custom(pseq_bac, target_column_value) + if (is.null(sa)) { + return() + } + print(sprintf("Creating abundance plot for %s", target_column_value)) genus_col_index <- which(rank_names(pseq_bac) == "genus") - sa <- subset_samples_custom(pseq_bac, target_column_value) genus_sum = tapply(taxa_sums(sa), tax_table(sa)[, "genus"], sum, na.rm = FALSE) topgenera = names(sort(genus_sum, TRUE))[1:30] @@ -269,7 +271,7 @@ validatePhyoseq <- function(phyloseqObject) { sampleName <- rownames(metadata)[counter + 1] if (asvCount == 0) { sample_data(phyloseqObject) <- subset(metadata, metadata$sampleId != sampleName) - print(paste0("Sample ", sampleName, " has 0 ASVs in the OTU table!")) + print(paste0("Sample ", sampleName, " has 0 ASVs in the OTU table! This sample will be skipped during processing!")) } counter <- counter + 1 } diff --git a/tasks/r-dada2/main.r b/tasks/r-dada2/main.r index 48304133..6c12fa96 100644 --- a/tasks/r-dada2/main.r +++ b/tasks/r-dada2/main.r @@ -328,9 +328,15 @@ tryFilterAndTrim <- function( } getSampleName <- function(forward_path, metadata) { + forward_name <- basename(forward_path) + for (sampleId in metadata$sampleId) { - if (startsWith(basename(forward_path), sampleId)) { - return(sampleId) + sample_length <- nchar(sampleId) + if (substr(forward_name, 1, sample_length) == sampleId) { + next_char <- substr(forward_name, sample_length + 1, sample_length + 1) + if (!grepl("[[:alnum:]]", next_char)) { + return(sampleId) + } } } From a8699d08a966526380d0b9cf075c9b1c328a67e7 Mon Sep 17 00:00:00 2001 From: Dusko Mirkovic Date: Mon, 19 Aug 2024 11:12:39 +0200 Subject: [PATCH 2/2] CTX-6635: Print out if artifact upload failed --- tasks/r-dada2/main.r | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tasks/r-dada2/main.r b/tasks/r-dada2/main.r index 3500ca46..491334da 100644 --- a/tasks/r-dada2/main.r +++ b/tasks/r-dada2/main.r @@ -460,17 +460,27 @@ main <- function(taskRun) { ) for (path in filtered_forward_read_paths) { - taskRun$createArtifact( + print(paste0("Uploading filtered_reads/", basename(path), " to artifacts...")) + artifact <- taskRun$createArtifact( path, file.path("filtered_reads", basename(path)) ) + + if (is.null(artifact)) { + print(paste0("Failed to upload filtered_reads/", basename(path), " to artifacts")) + } } for (path in filtered_reverse_read_paths) { + print(paste0("Uploading filtered_reads/", basename(path), " to artifacts...")) taskRun$createArtifact( path, file.path("filtered_reads", basename(path)) ) + + if (is.null(artifact)) { + print(paste0("Failed to upload filtered_reads/", basename(path), " to artifacts")) + } } filtering_results_path <- file.path(output_path, "filtering_results.csv")