add source/qual col in alignqc summary

umccr · Mar 10, 2024 · 2a01533 · 2a01533
1 parent 8df3b65
commit 2a01533
Showing 1 changed file with 16 additions and 14 deletions.
diff --git a/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd b/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd
@@ -9,7 +9,7 @@ output:
     highlight: kate
 params:
   title: "UMCCR Alignment QC Summary Report"
-  meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-02-20_wgts.rds")
+  meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-03-08_wgts.rds")
 description: "UMCCR Alignment QC Summary Report"
 title: "`r params$title`"
 ---
@@ -89,9 +89,10 @@ dat <- meta |>
   rowwise() |>
   mutate(
     fpaths = list(filepaths(indir = .data$outdir, sampleid = .data$SampleID)),
-    umccrid = glue("{.data$SubjectID}_{.data$LibraryID}_LN{.data$Lane}")
+    umccrid = glue("{.data$SubjectID}_{.data$LibraryID}_LN{.data$Lane}"),
+    `source/qual` = glue("{.data$source}/{.data$quality}")
   ) |>
-  select("umccrid", "phenotype", "type", "fpaths") |>
+  select("umccrid", "phenotype", "type", "source/qual", "fpaths") |>
   tidyr::unnest(fpaths) |>
   filter(.data$file_exists) |>
   rowwise() |>
@@ -110,13 +111,13 @@ eval <- dat |>
 
 # filetype-specific access
 d <- dat |>
-  select("umccrid", "phenotype", "type", "ftype", "dat") |>
-  tidyr::nest(data = c("umccrid", "phenotype", "type", "dat"))
+  select("umccrid", "phenotype", "type", "source/qual", "ftype", "dat") |>
+  tidyr::nest(data = c("umccrid", "phenotype", "type", "source/qual", "dat"))
 # sample-specific access
-d_samp <- dat |>
-  select("umccrid", "phenotype", "type", "ftype", "dat") |>
-  tidyr::nest(data = c("phenotype", "type", "ftype", "dat")) |>
-  arrange(desc("umccrid"))
+# d_samp <- dat |>
+#   select("umccrid", "phenotype", "type", "ftype", "dat") |>
+#   tidyr::nest(data = c("phenotype", "type", "ftype", "dat")) |>
+#   arrange(desc("umccrid"))
 ```
 
 ```{r funcs}
@@ -129,7 +130,7 @@ dr_unnest <- function(x1) {
     ungroup() |>
     filter(nrows > 0) |>
     tidyr::unnest(dat) |>
-    select("umccrid", "phenotype", "type", everything(), -c("ftype", "nrows"))
+    select("umccrid", "phenotype", "type", "source/qual", everything(), -c("ftype", "nrows"))
 }
 
 dt_view <- function(x, caption = NULL, scroll_y = 10 + min(nrow(x) * 35, 570), ...) {
@@ -197,6 +198,7 @@ d_map <- dr_unnest("MappingMetricsFile") |>
   arrange(type, desc(umccrid)) |>
   select(
     umccrid, phenotype, type,
+    `source/qual`,
     tot = reads_tot_rg_dragen,
     dup = reads_num_dupmarked_dragen,
     `dup%` = reads_num_dupmarked_dragen_pct,
@@ -265,7 +267,7 @@ d_pl <- dr_unnest("PloidyEstimationMetricsFile") |>
   arrange(desc(umccrid))
 d_pl_metrics <- d_pl |>
   select(
-    umccrid, phenotype, type,
+    umccrid, phenotype, type, `source/qual`,
     ploidy = ploidy_est_dragen,
     cvg_auto_med_ploidy = cov_auto_median_dragen,
     cvg_x_med_ploidy = cov_x_median_dragen,
@@ -274,9 +276,9 @@ d_pl_metrics <- d_pl |>
 # cov_genome_pct_* metrics are in the Hist data, so filter out here
 d_cvg <- dr_unnest("WgsCoverageMetricsFile") |>
   arrange(type, desc(umccrid)) |>
-  left_join(d_pl_metrics, by = c("umccrid", "phenotype", "type")) |>
+  left_join(d_pl_metrics, by = c("umccrid", "phenotype", "type", "source/qual")) |>
   select(
-    umccrid, phenotype, type,
+    umccrid, phenotype, type, `source/qual`,
     ploidy,
     cvg_auto_avg = cov_avg_auto_over_genome_dragen,
     cvg_auto_med = cov_median_auto_over_genome_dragen,
@@ -318,7 +320,7 @@ d_cvg |>
 d_tr <- dr_unnest("TrimmerMetricsFile") |>
   arrange(type, desc(umccrid)) |>
   select(
-    umccrid, phenotype, type,
+    umccrid, phenotype, type, `source/qual`,
     reads_tot = reads_tot_input_dragen,
     read_len_avg = read_len_avg_dragen,
     `polygkmers3r1_remain%` = polygkmers3r1_remaining_dragen_pct,