From 8364918628e2c2b87a22d5f9ac566d114ef5e5b6 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 14 Sep 2024 00:49:12 +1000
Subject: [PATCH 1/8] use path instead of bname for regex matching

---
 R/fs_icav1.R | 2 +-
 R/fs_s3.R    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/fs_icav1.R b/R/fs_icav1.R
index 16df2c9..9ab22f8 100644
--- a/R/fs_icav1.R
+++ b/R/fs_icav1.R
@@ -139,7 +139,7 @@ gds_list_files_filter_relevant <- function(gdsdir, pattern = NULL, regexes = DR_
     no_recurse = no_recurse, page_token = page_token, recursive = recursive
   ) |>
     dplyr::rowwise() |>
-    dplyr::mutate(type = purrr::map_chr(.data$bname, \(x) match_regex(x, regexes))) |>
+    dplyr::mutate(type = purrr::map_chr(.data$path, \(x) match_regex(x, regexes))) |>
     dplyr::ungroup() |>
     dplyr::filter(!is.na(.data$type), grepl(pattern, .data$type)) |>
     dplyr::select(dplyr::any_of(cols_sel))
diff --git a/R/fs_s3.R b/R/fs_s3.R
index 34c437e..d1a57c0 100644
--- a/R/fs_s3.R
+++ b/R/fs_s3.R
@@ -78,7 +78,7 @@ s3_list_files_filter_relevant <- function(s3dir, pattern = NULL,
   d <- d_all |>
     dplyr::rowwise() |>
     dplyr::mutate(
-      type = purrr::map_chr(.data$bname, \(x) match_regex(x, regexes))
+      type = purrr::map_chr(.data$path, \(x) match_regex(x, regexes))
     ) |>
     dplyr::ungroup() |>
     dplyr::filter(!is.na(.data$type), grepl(pattern, .data$type)) |>

From 27de1ca0432b8abc16ccbb1b1c41250edb7a7859 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 14 Sep 2024 01:34:13 +1000
Subject: [PATCH 2/8] add sash support

---
 NAMESPACE                          |   2 +
 R/fs_local.R                       |   2 +-
 R/sash.R                           | 287 ++++++++++++++++++++++
 R/umccrise.R                       |   3 +-
 man/Wf_sash.Rd                     | 379 +++++++++++++++++++++++++++++
 man/Wf_sash_download_tidy_write.Rd |  57 +++++
 man/Wf_umccrise.Rd                 |   2 -
 7 files changed, 727 insertions(+), 5 deletions(-)
 create mode 100644 R/sash.R
 create mode 100644 man/Wf_sash.Rd
 create mode 100644 man/Wf_sash_download_tidy_write.Rd

diff --git a/NAMESPACE b/NAMESPACE
index b10ca1f..9f9ec02 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -30,6 +30,8 @@ export(TsoTmbFile)
 export(TsoTmbTraceTsvFile)
 export(VCMetricsFile)
 export(Wf)
+export(Wf_sash)
+export(Wf_sash_download_tidy_write)
 export(Wf_tso_ctdna_tumor_only)
 export(Wf_umccrise)
 export(Wf_umccrise_download_tidy_write)
diff --git a/R/fs_local.R b/R/fs_local.R
index 07397aa..f681bef 100644
--- a/R/fs_local.R
+++ b/R/fs_local.R
@@ -47,7 +47,7 @@ local_list_files_dir <- function(localdir, max_files = NULL) {
 local_list_files_filter_relevant <- function(localdir, regexes = DR_FILE_REGEX, max_files = NULL) {
   local_list_files_dir(localdir = localdir, max_files = max_files) |>
     dplyr::mutate(
-      type = purrr::map_chr(.data$bname, \(x) match_regex(x, regexes = regexes))
+      type = purrr::map_chr(.data$path, \(x) match_regex(x, regexes = regexes))
     ) |>
     dplyr::filter(!is.na(.data$type)) |>
     dplyr::select("type", "bname", "size", "lastmodified", localpath = "path")
diff --git a/R/sash.R b/R/sash.R
new file mode 100644
index 0000000..ad42b0e
--- /dev/null
+++ b/R/sash.R
@@ -0,0 +1,287 @@
+#' Wf_sash R6 Class
+#'
+#' @description
+#' Reads and writes tidy versions of files from the `sash` workflow
+#'
+#' @examples
+#' \dontrun{
+#'
+#' #---- Local ----#
+#' p1 <- "~/s3/org.umccr.data.oncoanalyser/analysis_data/SBJ05571/sash"
+#' p2 <- "202408270b93455e/L2401308_L2401307"
+#' p <- normalizePath(file.path(p1, p2))
+#' SubjectID <- "SBJ05571"
+#' SampleID_tumor <- "MDX240307"
+#' prefix <- glue("{SubjectID}__{SampleID_tumor}")
+#' s1 <- Wf_sash$new(path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor)
+#' s1$list_files(max_files = 20)
+#' s1$list_files_filter_relevant(max_files = 300)
+#' d <- s1$download_files(max_files = 1000, dryrun = F)
+#' d_tidy <- s1$tidy_files(d)
+#' d_write <- s1$write(
+#'   d_tidy,
+#'   outdir = file.path(p, "dracarys_tidy"),
+#'   prefix = glue("{SubjectID}_{SampleID_tumor}"),
+#'   format = "tsv"
+#' )
+#'
+#' #---- S3 ----#
+#' p1 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05571/sash"
+#' p2 <- "202408270b93455e/L2401308_L2401307"
+#' p <- file.path(p1, p2)
+#' SubjectID <- "SBJ05571"
+#' SampleID_tumor <- "MDX240307"
+#' prefix <- glue("{SubjectID}__{SampleID_tumor}")
+#' s1 <- Wf_sash$new(path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor)
+#' s1$list_files(max_files = 20)
+#' s1$list_files_filter_relevant()
+#' outdir <- sub("s3:/", "~/s3", p)
+#' d <- s1$download_files(outdir = outdir, max_files = 1000, dryrun = F)
+#' d_tidy <- s1$tidy_files(d)
+#' d_write <- s1$write(
+#'   d_tidy,
+#'   outdir = file.path(p, "dracarys_tidy"),
+#'   prefix = glue("{SubjectID}__{SampleID_tumor}"),
+#'   format = "tsv"
+#' )
+#' }
+#'
+#' @export
+Wf_sash <- R6::R6Class(
+  "Wf_sash",
+  inherit = Wf,
+  public = list(
+    #' @field SubjectID The SubjectID of the sample (needed for path lookup).
+    #' @field SampleID_tumor The SampleID of the tumor sample (needed for path lookup).
+    SubjectID = NULL,
+    SampleID_tumor = NULL,
+    #' @description Create a new Wf_sash object.
+    #' @param path Path to directory with raw workflow results (from GDS, S3, or
+    #' local filesystem).
+    #' @param SubjectID The SubjectID of the sample (needed for path lookup).
+    #' @param SampleID_tumor The SampleID of the tumor sample (needed for path lookup).
+    initialize = function(path = NULL, SubjectID = NULL, SampleID_tumor = NULL) {
+      wname <- "sash"
+      regexes <- tibble::tribble(
+        ~regex, ~fun,
+        "cancer_report/cancer_report_tables/hrd/.*-chord\\.tsv\\.gz$", "hrdchordtsv",
+        "cancer_report/cancer_report_tables/hrd/.*-hrdetect\\.tsv\\.gz$", "hrdetecttsv",
+        "cancer_report/cancer_report_tables/hrd/.*-dragen\\.tsv\\.gz$", "hrddragentsv",
+        "cancer_report/cancer_report_tables/sigs/.*-snv_2015\\.tsv\\.gz$", "sigssnv2015tsv",
+        "cancer_report/cancer_report_tables/sigs/.*-snv_2020\\.tsv\\.gz$", "sigssnv2020tsv",
+        "cancer_report/cancer_report_tables/sigs/.*-dbs\\.tsv\\.gz$", "sigsdbstsv",
+        "cancer_report/cancer_report_tables/sigs/.*-indel\\.tsv\\.gz$", "sigsindeltsv",
+        "cancer_report/cancer_report_tables/.*-qc_summary\\.tsv\\.gz$", "qcsummarytsv",
+        "smlv_somatic/report/pcgr/.*\\.pcgr_acmg\\.grch38\\.json\\.gz$", "pcgrjson"
+      ) |>
+        dplyr::mutate(fun = paste0("read_", .data$fun))
+
+      super$initialize(path = path, wname = wname, regexes = regexes)
+      self$SubjectID <- SubjectID
+      self$SampleID_tumor <- SampleID_tumor
+    },
+    #' @description Print details about the Workflow.
+    #' @param ... (ignored).
+    print = function(...) {
+      res <- tibble::tribble(
+        ~var, ~value,
+        "path", self$path,
+        "wname", self$wname,
+        "filesystem", self$filesystem,
+        "SubjectID", self$SubjectID,
+        "SampleID_tumor", self$SampleID_tumor
+      )
+      print(res)
+      invisible(self)
+    },
+    #' @description List dracarys files under given path
+    #' @param max_files Max number of files to list (for gds/s3 only).
+    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+    #' @param ... Passed on to the `gds_list_files_filter_relevant` or
+    #' the `s3_list_files_filter_relevant` function.
+    list_files_filter_relevant = function(max_files = 1000,
+                                          ica_token = Sys.getenv("ICA_ACCESS_TOKEN"), ...) {
+      path <- self$path
+      dir1 <- file.path(path, glue("{self$SubjectID}_{self$SampleID_tumor}"))
+      f1 <- super$list_files_filter_relevant(path = dir1, max_files = 500)
+      return(f1)
+    },
+    #' @description Download files from GDS/S3 to local filesystem.
+    #' @param outdir Path to output directory.
+    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+    #' @param max_files Max number of files to list.
+    #' @param dryrun If TRUE, just list the files that will be downloaded (don't
+    #' download them).
+    #' @param recursive Should files be returned recursively _in and under_ the specified
+    #' GDS directory, or _only directly in_ the specified GDS directory (def: TRUE via ICA API).
+    download_files = function(outdir, ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+                              max_files = 1000, dryrun = FALSE, recursive = NULL) {
+      super$download_files(
+        outdir = outdir, ica_token = ica_token, max_files = max_files,
+        dryrun = dryrun, recursive = recursive,
+        list_filter_fun = self$list_files_filter_relevant
+      )
+    },
+    #' @description Read `pcgr.json.gz` file.
+    #' @param x Path to file.
+    read_pcgrjson = function(x) {
+      j <- read_jsongz_jsonlite(x)
+      tmb <-
+        j[["content"]][["tmb"]][["variant_statistic"]] %||%
+        j[["content"]][["tmb"]][["v_stat"]] %||%
+        list(tmb_estimate = NA, n_tmb = NA)
+      tmb <- purrr::flatten(tmb) |>
+        tibble::as_tibble_row() |>
+        dplyr::select("tmb_estimate", "n_tmb")
+      msi <- j[["content"]][["msi"]][["prediction"]][["msi_stats"]]
+      # handle nulls
+      msi <- msi %||% list(fracIndels = NA, predicted_class = NA)
+      msi <- purrr::flatten(msi) |>
+        tibble::as_tibble_row() |>
+        dplyr::select("fracIndels", "predicted_class")
+      metrics <- dplyr::bind_cols(msi, tmb)
+      return(metrics)
+    },
+    #' @description Read `dragen.tsv.gz` cancer report hrd file.
+    #' @param x Path to file.
+    read_hrddragentsv = function(x) {
+      ct <- readr::cols(.default = "d", Sample = "c")
+      read_tsvgz(x, col_types = ct)
+    },
+    #' @description Read `chord.tsv.gz` cancer report hrd file.
+    #' @param x Path to file.
+    read_hrdchordtsv = function(x) {
+      ct <- readr::cols_only(
+        p_hrd = "d",
+        hr_status = "c",
+        hrd_type = "c",
+        p_BRCA1 = "d",
+        p_BRCA2 = "d"
+      )
+      read_tsvgz(x, col_types = ct)
+    },
+    #' @description Read `hrdetect.tsv.gz` cancer report hrd file.
+    #' @param x Path to file.
+    read_hrdetecttsv = function(x) {
+      ct <- readr::cols(
+        .default = "d",
+        sample = "c"
+      )
+      read_tsvgz(x, col_types = ct) |>
+        dplyr::select(-c("sample"))
+    },
+    #' @description Read signature cancer report file.
+    #' @param x Path to file.
+    read_sigstsv = function(x) {
+      ct <- readr::cols(
+        .default = "d",
+        Signature = "c"
+      )
+      read_tsvgz(x, col_types = ct)
+    },
+    #' @description Read `snv_2015.tsv.gz` sigs cancer report file.
+    #' @param x Path to file.
+    read_sigssnv2015tsv = function(x) {
+      self$read_sigstsv(x)
+    },
+    #' @description Read `snv_2020.tsv.gz` sigs cancer report file.
+    #' @param x Path to file.
+    read_sigssnv2020tsv = function(x) {
+      self$read_sigstsv(x)
+    },
+    #' @description Read `dbs.tsv.gz` sigs cancer report file.
+    #' @param x Path to file.
+    read_sigsdbstsv = function(x) {
+      self$read_sigstsv(x)
+    },
+    #' @description Read `indel.tsv.gz` sigs cancer report file.
+    #' @param x Path to file.
+    read_sigsindeltsv = function(x) {
+      self$read_sigstsv(x)
+    },
+    #' @description Read `qc_summary.tsv.gz` cancer report file.
+    #' @param x Path to file.
+    read_qcsummarytsv = function(x) {
+      d <- read_tsvgz(x, col_types = readr::cols(.default = "c"))
+      d |>
+        dplyr::select("variable", "value") |>
+        tidyr::pivot_wider(names_from = "variable", values_from = "value") |>
+        dplyr::rename(MSI_mb_tmp = "MSI (indels/Mb)") |>
+        dplyr::mutate(
+          purity_hmf = sub("(.*) \\(.*\\)", "\\1", .data$Purity) |> as.numeric(),
+          ploidy_hmf = sub("(.*) \\(.*\\)", "\\1", .data$Ploidy) |> as.numeric(),
+          msi_mb_hmf = sub(".* \\((.*)\\)", "\\1", .data$MSI_mb_tmp) |> as.numeric(),
+          contamination_hmf = as.numeric(.data$Contamination),
+          deleted_genes_hmf = as.numeric(.data$DeletedGenes),
+          msi_hmf = sub("(.*) \\(.*\\)", "\\1", .data$MSI_mb_tmp),
+          tmb_hmf = sub("(.*) \\(.*\\)", "\\1", .data$TMB) |> as.numeric(),
+          tml_hmf = sub("(.*) \\(.*\\)", "\\1", .data$TML) |> as.numeric(),
+          hypermutated = ifelse("Hypermutated" %in% d$variable, .data[["Hypermutated"]], NA) |> as.character()
+        ) |>
+        dplyr::select(
+          qc_status_hmf = "QC_Status",
+          sex_hmf = "Gender",
+          "purity_hmf", "ploidy_hmf", "msi_hmf", "msi_mb_hmf",
+          "contamination_hmf",
+          "deleted_genes_hmf", "tmb_hmf", "tml_hmf",
+          wgd_hmf = "WGD",
+          "hypermutated"
+        )
+    }
+  ) # end public
+)
+
+#' sash Download Tidy and Write
+#'
+#' Downloads files from the `sash` workflow and writes them in a tidy format.
+#'
+#' @param path Path to directory with raw workflow results (from GDS, S3, or
+#' local filesystem).
+#' @param SubjectID The SubjectID of the sample (needed for path lookup).
+#' @param SampleID_tumor The SampleID of the tumor sample (needed for path lookup).
+#' @param outdir Path to output directory.
+#' @param format Format of output files.
+#' @param max_files Max number of files to list.
+#' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
+#' @param dryrun If TRUE, just list the files that will be downloaded (don't
+#' download them).
+#' @return List where each element is a tidy tibble of a sash file.
+#'
+#' @examples
+#' \dontrun{
+#' SubjectID <- "SBJ03043"
+#' SampleID_tumor <- "PRJ230004"
+#' p1_gds <- glue("gds://production/analysis_data/{SubjectID}/umccrise")
+#' p <- file.path(p1_gds, "20240830ec648f40/L2300064__L2300063")
+#' outdir <- file.path(sub("gds:/", "~/icav1/g", p))
+#' token <- Sys.getenv("ICA_ACCESS_TOKEN")
+#' d <- Wf_sash_download_tidy_write(
+#'   path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor,
+#'   outdir = outdir,
+#'   dryrun = F
+#' )
+#' }
+#' @export
+Wf_sash_download_tidy_write <- function(path, SubjectID, SampleID_tumor,
+                                        outdir, format = "rds", max_files = 1000,
+                                        ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+                                        dryrun = FALSE) {
+  s <- Wf_sash$new(
+    path = path, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor
+  )
+  d_dl <- s$download_files(
+    outdir = outdir, ica_token = ica_token,
+    max_files = max_files, dryrun = dryrun
+  )
+  if (!dryrun) {
+    d_tidy <- s$tidy_files(d_dl)
+    d_write <- s$write(
+      d_tidy,
+      outdir = file.path(outdir, "dracarys_tidy"),
+      prefix = glue("{SubjectID}__{SampleID_tumor}"),
+      format = format
+    )
+    return(d_write)
+  }
+  return(d_dl)
+}
diff --git a/R/umccrise.R b/R/umccrise.R
index ffbe5c5..2ba73e6 100644
--- a/R/umccrise.R
+++ b/R/umccrise.R
@@ -120,7 +120,6 @@ Wf_umccrise <- R6::R6Class(
     #' download them).
     #' @param recursive Should files be returned recursively _in and under_ the specified
     #' GDS directory, or _only directly in_ the specified GDS directory (def: TRUE via ICA API).
-    #' @param list_filter_fun Function to filter relevant files.
     download_files = function(outdir, ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
                               max_files = 1000, dryrun = FALSE, recursive = NULL) {
       super$download_files(
@@ -278,11 +277,11 @@ Wf_umccrise <- R6::R6Class(
 #' @param SubjectID The SubjectID of the sample (needed for path lookup).
 #' @param SampleID_tumor The SampleID of the tumor sample (needed for path lookup).
 #' @param outdir Path to output directory.
+#' @param format Format of output files.
 #' @param max_files Max number of files to list.
 #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
 #' @param dryrun If TRUE, just list the files that will be downloaded (don't
 #' download them).
-#' @param format Format of output files.
 #' @return List where each element is a tidy tibble of a umccrise file.
 #'
 #' @examples
diff --git a/man/Wf_sash.Rd b/man/Wf_sash.Rd
new file mode 100644
index 0000000..d1f9c4d
--- /dev/null
+++ b/man/Wf_sash.Rd
@@ -0,0 +1,379 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sash.R
+\name{Wf_sash}
+\alias{Wf_sash}
+\title{Wf_sash R6 Class}
+\description{
+Reads and writes tidy versions of files from the \code{sash} workflow
+}
+\examples{
+\dontrun{
+
+#---- Local ----#
+p1 <- "~/s3/org.umccr.data.oncoanalyser/analysis_data/SBJ05571/sash"
+p2 <- "202408270b93455e/L2401308_L2401307"
+p <- normalizePath(file.path(p1, p2))
+SubjectID <- "SBJ05571"
+SampleID_tumor <- "MDX240307"
+prefix <- glue("{SubjectID}__{SampleID_tumor}")
+s1 <- Wf_sash$new(path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor)
+s1$list_files(max_files = 20)
+s1$list_files_filter_relevant(max_files = 300)
+d <- s1$download_files(max_files = 1000, dryrun = F)
+d_tidy <- s1$tidy_files(d)
+d_write <- s1$write(
+  d_tidy,
+  outdir = file.path(p, "dracarys_tidy"),
+  prefix = glue("{SubjectID}_{SampleID_tumor}"),
+  format = "tsv"
+)
+
+#---- S3 ----#
+p1 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05571/sash"
+p2 <- "202408270b93455e/L2401308_L2401307"
+p <- file.path(p1, p2)
+SubjectID <- "SBJ05571"
+SampleID_tumor <- "MDX240307"
+prefix <- glue("{SubjectID}__{SampleID_tumor}")
+s1 <- Wf_sash$new(path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor)
+s1$list_files(max_files = 20)
+s1$list_files_filter_relevant()
+outdir <- sub("s3:/", "~/s3", p)
+d <- s1$download_files(outdir = outdir, max_files = 1000, dryrun = F)
+d_tidy <- s1$tidy_files(d)
+d_write <- s1$write(
+  d_tidy,
+  outdir = file.path(p, "dracarys_tidy"),
+  prefix = glue("{SubjectID}__{SampleID_tumor}"),
+  format = "tsv"
+)
+}
+
+}
+\section{Super class}{
+\code{\link[dracarys:Wf]{dracarys::Wf}} -> \code{Wf_sash}
+}
+\section{Public fields}{
+\if{html}{\out{<div class="r6-fields">}}
+\describe{
+\item{\code{SubjectID}}{The SubjectID of the sample (needed for path lookup).}
+
+\item{\code{SampleID_tumor}}{The SampleID of the tumor sample (needed for path lookup).}
+}
+\if{html}{\out{</div>}}
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-Wf_sash-new}{\code{Wf_sash$new()}}
+\item \href{#method-Wf_sash-print}{\code{Wf_sash$print()}}
+\item \href{#method-Wf_sash-list_files_filter_relevant}{\code{Wf_sash$list_files_filter_relevant()}}
+\item \href{#method-Wf_sash-download_files}{\code{Wf_sash$download_files()}}
+\item \href{#method-Wf_sash-read_pcgrjson}{\code{Wf_sash$read_pcgrjson()}}
+\item \href{#method-Wf_sash-read_hrddragentsv}{\code{Wf_sash$read_hrddragentsv()}}
+\item \href{#method-Wf_sash-read_hrdchordtsv}{\code{Wf_sash$read_hrdchordtsv()}}
+\item \href{#method-Wf_sash-read_hrdetecttsv}{\code{Wf_sash$read_hrdetecttsv()}}
+\item \href{#method-Wf_sash-read_sigstsv}{\code{Wf_sash$read_sigstsv()}}
+\item \href{#method-Wf_sash-read_sigssnv2015tsv}{\code{Wf_sash$read_sigssnv2015tsv()}}
+\item \href{#method-Wf_sash-read_sigssnv2020tsv}{\code{Wf_sash$read_sigssnv2020tsv()}}
+\item \href{#method-Wf_sash-read_sigsdbstsv}{\code{Wf_sash$read_sigsdbstsv()}}
+\item \href{#method-Wf_sash-read_sigsindeltsv}{\code{Wf_sash$read_sigsindeltsv()}}
+\item \href{#method-Wf_sash-read_qcsummarytsv}{\code{Wf_sash$read_qcsummarytsv()}}
+\item \href{#method-Wf_sash-clone}{\code{Wf_sash$clone()}}
+}
+}
+\if{html}{\out{
+<details open><summary>Inherited methods</summary>
+<ul>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="list_files"><a href='../../dracarys/html/Wf.html#method-Wf-list_files'><code>dracarys::Wf$list_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="tidy_files"><a href='../../dracarys/html/Wf.html#method-Wf-tidy_files'><code>dracarys::Wf$tidy_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="write"><a href='../../dracarys/html/Wf.html#method-Wf-write'><code>dracarys::Wf$write()</code></a></span></li>
+</ul>
+</details>
+}}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-new"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-new}{}}}
+\subsection{Method \code{new()}}{
+Create a new Wf_sash object.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$new(path = NULL, SubjectID = NULL, SampleID_tumor = NULL)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{path}}{Path to directory with raw workflow results (from GDS, S3, or
+local filesystem).}
+
+\item{\code{SubjectID}}{The SubjectID of the sample (needed for path lookup).}
+
+\item{\code{SampleID_tumor}}{The SampleID of the tumor sample (needed for path lookup).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-print"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-print}{}}}
+\subsection{Method \code{print()}}{
+Print details about the Workflow.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$print(...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{...}}{(ignored).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-list_files_filter_relevant"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-list_files_filter_relevant}{}}}
+\subsection{Method \code{list_files_filter_relevant()}}{
+List dracarys files under given path
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$list_files_filter_relevant(
+  max_files = 1000,
+  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+  ...
+)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{max_files}}{Max number of files to list (for gds/s3 only).}
+
+\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
+
+\item{\code{...}}{Passed on to the \code{gds_list_files_filter_relevant} or
+the \code{s3_list_files_filter_relevant} function.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-download_files"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-download_files}{}}}
+\subsection{Method \code{download_files()}}{
+Download files from GDS/S3 to local filesystem.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$download_files(
+  outdir,
+  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+  max_files = 1000,
+  dryrun = FALSE,
+  recursive = NULL
+)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{outdir}}{Path to output directory.}
+
+\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
+
+\item{\code{max_files}}{Max number of files to list.}
+
+\item{\code{dryrun}}{If TRUE, just list the files that will be downloaded (don't
+download them).}
+
+\item{\code{recursive}}{Should files be returned recursively \emph{in and under} the specified
+GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-read_pcgrjson"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_pcgrjson}{}}}
+\subsection{Method \code{read_pcgrjson()}}{
+Read \code{pcgr.json.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_pcgrjson(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-read_hrddragentsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_hrddragentsv}{}}}
+\subsection{Method \code{read_hrddragentsv()}}{
+Read \code{dragen.tsv.gz} cancer report hrd file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_hrddragentsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-read_hrdchordtsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_hrdchordtsv}{}}}
+\subsection{Method \code{read_hrdchordtsv()}}{
+Read \code{chord.tsv.gz} cancer report hrd file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_hrdchordtsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-read_hrdetecttsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_hrdetecttsv}{}}}
+\subsection{Method \code{read_hrdetecttsv()}}{
+Read \code{hrdetect.tsv.gz} cancer report hrd file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_hrdetecttsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-read_sigstsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigstsv}{}}}
+\subsection{Method \code{read_sigstsv()}}{
+Read signature cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigstsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-read_sigssnv2015tsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigssnv2015tsv}{}}}
+\subsection{Method \code{read_sigssnv2015tsv()}}{
+Read \code{snv_2015.tsv.gz} sigs cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigssnv2015tsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-read_sigssnv2020tsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigssnv2020tsv}{}}}
+\subsection{Method \code{read_sigssnv2020tsv()}}{
+Read \code{snv_2020.tsv.gz} sigs cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigssnv2020tsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-read_sigsdbstsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigsdbstsv}{}}}
+\subsection{Method \code{read_sigsdbstsv()}}{
+Read \code{dbs.tsv.gz} sigs cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigsdbstsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-read_sigsindeltsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigsindeltsv}{}}}
+\subsection{Method \code{read_sigsindeltsv()}}{
+Read \code{indel.tsv.gz} sigs cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigsindeltsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-read_qcsummarytsv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_qcsummarytsv}{}}}
+\subsection{Method \code{read_qcsummarytsv()}}{
+Read \code{qc_summary.tsv.gz} cancer report file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_qcsummarytsv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_sash-clone"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-clone}{}}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$clone(deep = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{deep}}{Whether to make a deep clone.}
+}
+\if{html}{\out{</div>}}
+}
+}
+}
diff --git a/man/Wf_sash_download_tidy_write.Rd b/man/Wf_sash_download_tidy_write.Rd
new file mode 100644
index 0000000..a806fdd
--- /dev/null
+++ b/man/Wf_sash_download_tidy_write.Rd
@@ -0,0 +1,57 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sash.R
+\name{Wf_sash_download_tidy_write}
+\alias{Wf_sash_download_tidy_write}
+\title{sash Download Tidy and Write}
+\usage{
+Wf_sash_download_tidy_write(
+  path,
+  SubjectID,
+  SampleID_tumor,
+  outdir,
+  format = "rds",
+  max_files = 1000,
+  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
+  dryrun = FALSE
+)
+}
+\arguments{
+\item{path}{Path to directory with raw workflow results (from GDS, S3, or
+local filesystem).}
+
+\item{SubjectID}{The SubjectID of the sample (needed for path lookup).}
+
+\item{SampleID_tumor}{The SampleID of the tumor sample (needed for path lookup).}
+
+\item{outdir}{Path to output directory.}
+
+\item{format}{Format of output files.}
+
+\item{max_files}{Max number of files to list.}
+
+\item{ica_token}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
+
+\item{dryrun}{If TRUE, just list the files that will be downloaded (don't
+download them).}
+}
+\value{
+List where each element is a tidy tibble of a sash file.
+}
+\description{
+Downloads files from the \code{sash} workflow and writes them in a tidy format.
+}
+\examples{
+\dontrun{
+SubjectID <- "SBJ03043"
+SampleID_tumor <- "PRJ230004"
+p1_gds <- glue("gds://production/analysis_data/{SubjectID}/umccrise")
+p <- file.path(p1_gds, "20240830ec648f40/L2300064__L2300063")
+outdir <- file.path(sub("gds:/", "~/icav1/g", p))
+token <- Sys.getenv("ICA_ACCESS_TOKEN")
+d <- Wf_sash_download_tidy_write(
+  path = p, SubjectID = SubjectID, SampleID_tumor = SampleID_tumor,
+  outdir = outdir,
+  dryrun = F
+)
+}
+}
diff --git a/man/Wf_umccrise.Rd b/man/Wf_umccrise.Rd
index 5fd5ade..7753970 100644
--- a/man/Wf_umccrise.Rd
+++ b/man/Wf_umccrise.Rd
@@ -187,8 +187,6 @@ download them).}
 
 \item{\code{recursive}}{Should files be returned recursively \emph{in and under} the specified
 GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
-
-\item{\code{list_filter_fun}}{Function to filter relevant files.}
 }
 \if{html}{\out{</div>}}
 }

From 91f45c0b1b5c3c95cc11073e9b8df54f1a30d558 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 14 Sep 2024 12:05:16 +1000
Subject: [PATCH 3/8] Wf.R: rm list_filter_fun req from download_files

---
 R/Wf.R                 | 12 ++++--------
 R/fs_icav1.R           |  6 ++----
 R/fs_s3.R              |  6 ++----
 man/Wf.Rd              |  5 +----
 man/dr_gds_download.Rd |  5 +----
 man/dr_s3_download.Rd  |  5 +----
 6 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/R/Wf.R b/R/Wf.R
index aa5bfae..3e00027 100644
--- a/R/Wf.R
+++ b/R/Wf.R
@@ -165,18 +165,15 @@ Wf <- R6::R6Class(
     #' download them).
     #' @param recursive Should files be returned recursively _in and under_ the specified
     #' GDS directory, or _only directly in_ the specified GDS directory (def: TRUE via ICA API).
-    #' @param list_filter_fun Function to filter relevant files.
     download_files = function(path = self$path, outdir, ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
-                              max_files = 1000, dryrun = FALSE, recursive = NULL,
-                              list_filter_fun = NULL) {
+                              max_files = 1000, dryrun = FALSE, recursive = NULL) {
       # TODO: add envvar checker
       regexes <- self$regexes
-      assertthat::assert_that(!is.null(regexes), !is.null(list_filter_fun))
+      assertthat::assert_that(!is.null(regexes))
       if (self$filesystem == "gds") {
         d <- dr_gds_download(
           gdsdir = path, outdir = outdir, regexes = regexes, token = ica_token,
-          page_size = max_files, dryrun = dryrun, recursive = recursive,
-          list_filter_fun = list_filter_fun
+          page_size = max_files, dryrun = dryrun, recursive = recursive
         )
         if (!dryrun) {
           self$filesystem <- "local"
@@ -185,8 +182,7 @@ Wf <- R6::R6Class(
       } else if (self$filesystem == "s3") {
         d <- dr_s3_download(
           s3dir = path, outdir = outdir, regexes = regexes,
-          max_objects = max_files, dryrun = dryrun,
-          list_filter_fun = list_filter_fun
+          max_objects = max_files, dryrun = dryrun
         )
         if (!dryrun) {
           self$filesystem <- "local"
diff --git a/R/fs_icav1.R b/R/fs_icav1.R
index 9ab22f8..da50daf 100644
--- a/R/fs_icav1.R
+++ b/R/fs_icav1.R
@@ -155,7 +155,6 @@ gds_list_files_filter_relevant <- function(gdsdir, pattern = NULL, regexes = DR_
 #' @param outdir Local output directory.
 #' @param dryrun If TRUE, just list the files that will be downloaded (don't
 #' download them).
-#' @param list_filter_fun Function to filter relevant GDS files.
 #' @examples
 #' \dontrun{
 #' gdsdir <- "gds://production/analysis_data/SBJ01155/umccrise/202408300c218043/L2101566__L2101565"
@@ -171,11 +170,10 @@ gds_list_files_filter_relevant <- function(gdsdir, pattern = NULL, regexes = DR_
 #' @export
 dr_gds_download <- function(gdsdir, outdir, token = Sys.getenv("ICA_ACCESS_TOKEN"),
                             pattern = NULL, page_size = 100, dryrun = FALSE,
-                            regexes = DR_FILE_REGEX, recursive = NULL,
-                            list_filter_fun = gds_list_files_filter_relevant) {
+                            regexes = DR_FILE_REGEX, recursive = NULL) {
   e <- emojifont::emoji
   fs::dir_create(outdir)
-  d <- list_filter_fun(
+  d <- gds_list_files_filter_relevant(
     gdsdir = gdsdir, pattern = pattern, regexes = regexes,
     token = token, page_size = page_size, include_url = FALSE,
     no_recurse = FALSE, page_token = NULL,
diff --git a/R/fs_s3.R b/R/fs_s3.R
index d1a57c0..eb314ce 100644
--- a/R/fs_s3.R
+++ b/R/fs_s3.R
@@ -109,7 +109,6 @@ s3_list_files_filter_relevant <- function(s3dir, pattern = NULL,
 #' @param outdir Path to output directory.
 #' @param dryrun If TRUE, just list the files that will be downloaded (don't
 #' download them).
-#' @param list_filter_fun Function to filter relevant S3 files.
 #' @examples
 #' \dontrun{
 #' p1 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05373/sash"
@@ -125,12 +124,11 @@ s3_list_files_filter_relevant <- function(s3dir, pattern = NULL,
 #' }
 #' @export
 dr_s3_download <- function(s3dir, outdir, max_objects = 100, pattern = NULL,
-                           regexes = DR_FILE_REGEX, dryrun = FALSE,
-                           list_filter_fun = s3_list_files_filter_relevant) {
+                           regexes = DR_FILE_REGEX, dryrun = FALSE) {
   s3 <- paws.storage::s3()
   e <- emojifont::emoji
   fs::dir_create(outdir)
-  d <- list_filter_fun(
+  d <- s3_list_files_filter_relevant(
     s3dir = s3dir, pattern = NULL, regexes = regexes,
     max_objects = max_objects, presign = FALSE
   )
diff --git a/man/Wf.Rd b/man/Wf.Rd
index f785399..3005725 100644
--- a/man/Wf.Rd
+++ b/man/Wf.Rd
@@ -202,8 +202,7 @@ Download files from GDS/S3 to local filesystem.
   ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
   max_files = 1000,
   dryrun = FALSE,
-  recursive = NULL,
-  list_filter_fun = NULL
+  recursive = NULL
 )}\if{html}{\out{</div>}}
 }
 
@@ -223,8 +222,6 @@ download them).}
 
 \item{\code{recursive}}{Should files be returned recursively \emph{in and under} the specified
 GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
-
-\item{\code{list_filter_fun}}{Function to filter relevant files.}
 }
 \if{html}{\out{</div>}}
 }
diff --git a/man/dr_gds_download.Rd b/man/dr_gds_download.Rd
index 61aa7f8..1faaf8a 100644
--- a/man/dr_gds_download.Rd
+++ b/man/dr_gds_download.Rd
@@ -12,8 +12,7 @@ dr_gds_download(
   page_size = 100,
   dryrun = FALSE,
   regexes = DR_FILE_REGEX,
-  recursive = NULL,
-  list_filter_fun = gds_list_files_filter_relevant
+  recursive = NULL
 )
 }
 \arguments{
@@ -34,8 +33,6 @@ download them).}
 
 \item{recursive}{Should files be returned recursively \emph{in and under} the specified
 GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
-
-\item{list_filter_fun}{Function to filter relevant GDS files.}
 }
 \description{
 Download only GDS files that can be processed by dracarys.
diff --git a/man/dr_s3_download.Rd b/man/dr_s3_download.Rd
index 674ed93..95136f0 100644
--- a/man/dr_s3_download.Rd
+++ b/man/dr_s3_download.Rd
@@ -10,8 +10,7 @@ dr_s3_download(
   max_objects = 100,
   pattern = NULL,
   regexes = DR_FILE_REGEX,
-  dryrun = FALSE,
-  list_filter_fun = s3_list_files_filter_relevant
+  dryrun = FALSE
 )
 }
 \arguments{
@@ -27,8 +26,6 @@ dr_s3_download(
 
 \item{dryrun}{If TRUE, just list the files that will be downloaded (don't
 download them).}
-
-\item{list_filter_fun}{Function to filter relevant S3 files.}
 }
 \description{
 Download only S3 files that can be processed by dracarys.

From 492369236ebf6b01353279562d0ac6db175c616d Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 14 Sep 2024 12:06:33 +1000
Subject: [PATCH 4/8] sash: hardcode file paths

---
 R/sash.R       |  66 +++++++--------------
 man/Wf_sash.Rd | 153 +++++++++++++++----------------------------------
 2 files changed, 67 insertions(+), 152 deletions(-)

diff --git a/R/sash.R b/R/sash.R
index ad42b0e..345fc2b 100644
--- a/R/sash.R
+++ b/R/sash.R
@@ -62,17 +62,19 @@ Wf_sash <- R6::R6Class(
     #' @param SampleID_tumor The SampleID of the tumor sample (needed for path lookup).
     initialize = function(path = NULL, SubjectID = NULL, SampleID_tumor = NULL) {
       wname <- "sash"
+      pref <- glue("{SubjectID}_{SampleID_tumor}")
+      crep <- "cancer_report/cancer_report_tables"
       regexes <- tibble::tribble(
         ~regex, ~fun,
-        "cancer_report/cancer_report_tables/hrd/.*-chord\\.tsv\\.gz$", "hrdchordtsv",
-        "cancer_report/cancer_report_tables/hrd/.*-hrdetect\\.tsv\\.gz$", "hrdetecttsv",
-        "cancer_report/cancer_report_tables/hrd/.*-dragen\\.tsv\\.gz$", "hrddragentsv",
-        "cancer_report/cancer_report_tables/sigs/.*-snv_2015\\.tsv\\.gz$", "sigssnv2015tsv",
-        "cancer_report/cancer_report_tables/sigs/.*-snv_2020\\.tsv\\.gz$", "sigssnv2020tsv",
-        "cancer_report/cancer_report_tables/sigs/.*-dbs\\.tsv\\.gz$", "sigsdbstsv",
-        "cancer_report/cancer_report_tables/sigs/.*-indel\\.tsv\\.gz$", "sigsindeltsv",
-        "cancer_report/cancer_report_tables/.*-qc_summary\\.tsv\\.gz$", "qcsummarytsv",
-        "smlv_somatic/report/pcgr/.*\\.pcgr_acmg\\.grch38\\.json\\.gz$", "pcgrjson"
+        glue("{pref}/{crep}/hrd/{pref}-chord\\.tsv\\.gz$"), "hrd_chord",
+        glue("{pref}/{crep}/hrd/{pref}-hrdetect\\.tsv\\.gz$"), "hrd_hrdetect",
+        glue("{pref}/{crep}/hrd/{pref}-dragen\\.tsv\\.gz$"), "hrd_dragen",
+        glue("{pref}/{crep}/sigs/{pref}-snv_2015\\.tsv\\.gz$"), "sigs_snv2015",
+        glue("{pref}/{crep}/sigs/{pref}-snv_2020\\.tsv\\.gz$"), "sigs_snv2020",
+        glue("{pref}/{crep}/sigs/{pref}-dbs\\.tsv\\.gz$"), "sigs_dbs",
+        glue("{pref}/{crep}/sigs/{pref}-indel\\.tsv\\.gz$"), "sigs_indel",
+        glue("{pref}/{crep}/{pref}-qc_summary\\.tsv\\.gz$"), "qcsum",
+        glue("{pref}/smlv_somatic/report/pcgr/{SampleID_tumor}\\.pcgr_acmg\\.grch38\\.json\\.gz$"), "pcgr_json"
       ) |>
         dplyr::mutate(fun = paste0("read_", .data$fun))
 
@@ -94,37 +96,9 @@ Wf_sash <- R6::R6Class(
       print(res)
       invisible(self)
     },
-    #' @description List dracarys files under given path
-    #' @param max_files Max number of files to list (for gds/s3 only).
-    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
-    #' @param ... Passed on to the `gds_list_files_filter_relevant` or
-    #' the `s3_list_files_filter_relevant` function.
-    list_files_filter_relevant = function(max_files = 1000,
-                                          ica_token = Sys.getenv("ICA_ACCESS_TOKEN"), ...) {
-      path <- self$path
-      dir1 <- file.path(path, glue("{self$SubjectID}_{self$SampleID_tumor}"))
-      f1 <- super$list_files_filter_relevant(path = dir1, max_files = 500)
-      return(f1)
-    },
-    #' @description Download files from GDS/S3 to local filesystem.
-    #' @param outdir Path to output directory.
-    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
-    #' @param max_files Max number of files to list.
-    #' @param dryrun If TRUE, just list the files that will be downloaded (don't
-    #' download them).
-    #' @param recursive Should files be returned recursively _in and under_ the specified
-    #' GDS directory, or _only directly in_ the specified GDS directory (def: TRUE via ICA API).
-    download_files = function(outdir, ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
-                              max_files = 1000, dryrun = FALSE, recursive = NULL) {
-      super$download_files(
-        outdir = outdir, ica_token = ica_token, max_files = max_files,
-        dryrun = dryrun, recursive = recursive,
-        list_filter_fun = self$list_files_filter_relevant
-      )
-    },
     #' @description Read `pcgr.json.gz` file.
     #' @param x Path to file.
-    read_pcgrjson = function(x) {
+    read_pcgr_json = function(x) {
       j <- read_jsongz_jsonlite(x)
       tmb <-
         j[["content"]][["tmb"]][["variant_statistic"]] %||%
@@ -144,13 +118,13 @@ Wf_sash <- R6::R6Class(
     },
     #' @description Read `dragen.tsv.gz` cancer report hrd file.
     #' @param x Path to file.
-    read_hrddragentsv = function(x) {
+    read_hrd_dragen = function(x) {
       ct <- readr::cols(.default = "d", Sample = "c")
       read_tsvgz(x, col_types = ct)
     },
     #' @description Read `chord.tsv.gz` cancer report hrd file.
     #' @param x Path to file.
-    read_hrdchordtsv = function(x) {
+    read_hrd_chord = function(x) {
       ct <- readr::cols_only(
         p_hrd = "d",
         hr_status = "c",
@@ -162,7 +136,7 @@ Wf_sash <- R6::R6Class(
     },
     #' @description Read `hrdetect.tsv.gz` cancer report hrd file.
     #' @param x Path to file.
-    read_hrdetecttsv = function(x) {
+    read_hrd_hrdetect = function(x) {
       ct <- readr::cols(
         .default = "d",
         sample = "c"
@@ -181,27 +155,27 @@ Wf_sash <- R6::R6Class(
     },
     #' @description Read `snv_2015.tsv.gz` sigs cancer report file.
     #' @param x Path to file.
-    read_sigssnv2015tsv = function(x) {
+    read_sigs_snv2015 = function(x) {
       self$read_sigstsv(x)
     },
     #' @description Read `snv_2020.tsv.gz` sigs cancer report file.
     #' @param x Path to file.
-    read_sigssnv2020tsv = function(x) {
+    read_sigs_snv2020 = function(x) {
       self$read_sigstsv(x)
     },
     #' @description Read `dbs.tsv.gz` sigs cancer report file.
     #' @param x Path to file.
-    read_sigsdbstsv = function(x) {
+    read_sigs_dbs = function(x) {
       self$read_sigstsv(x)
     },
     #' @description Read `indel.tsv.gz` sigs cancer report file.
     #' @param x Path to file.
-    read_sigsindeltsv = function(x) {
+    read_sigs_indel = function(x) {
       self$read_sigstsv(x)
     },
     #' @description Read `qc_summary.tsv.gz` cancer report file.
     #' @param x Path to file.
-    read_qcsummarytsv = function(x) {
+    read_qcsum = function(x) {
       d <- read_tsvgz(x, col_types = readr::cols(.default = "c"))
       d |>
         dplyr::select("variable", "value") |>
diff --git a/man/Wf_sash.Rd b/man/Wf_sash.Rd
index d1f9c4d..1caa51a 100644
--- a/man/Wf_sash.Rd
+++ b/man/Wf_sash.Rd
@@ -67,25 +67,25 @@ d_write <- s1$write(
 \itemize{
 \item \href{#method-Wf_sash-new}{\code{Wf_sash$new()}}
 \item \href{#method-Wf_sash-print}{\code{Wf_sash$print()}}
-\item \href{#method-Wf_sash-list_files_filter_relevant}{\code{Wf_sash$list_files_filter_relevant()}}
-\item \href{#method-Wf_sash-download_files}{\code{Wf_sash$download_files()}}
-\item \href{#method-Wf_sash-read_pcgrjson}{\code{Wf_sash$read_pcgrjson()}}
-\item \href{#method-Wf_sash-read_hrddragentsv}{\code{Wf_sash$read_hrddragentsv()}}
-\item \href{#method-Wf_sash-read_hrdchordtsv}{\code{Wf_sash$read_hrdchordtsv()}}
-\item \href{#method-Wf_sash-read_hrdetecttsv}{\code{Wf_sash$read_hrdetecttsv()}}
+\item \href{#method-Wf_sash-read_pcgr_json}{\code{Wf_sash$read_pcgr_json()}}
+\item \href{#method-Wf_sash-read_hrd_dragen}{\code{Wf_sash$read_hrd_dragen()}}
+\item \href{#method-Wf_sash-read_hrd_chord}{\code{Wf_sash$read_hrd_chord()}}
+\item \href{#method-Wf_sash-read_hrd_hrdetect}{\code{Wf_sash$read_hrd_hrdetect()}}
 \item \href{#method-Wf_sash-read_sigstsv}{\code{Wf_sash$read_sigstsv()}}
-\item \href{#method-Wf_sash-read_sigssnv2015tsv}{\code{Wf_sash$read_sigssnv2015tsv()}}
-\item \href{#method-Wf_sash-read_sigssnv2020tsv}{\code{Wf_sash$read_sigssnv2020tsv()}}
-\item \href{#method-Wf_sash-read_sigsdbstsv}{\code{Wf_sash$read_sigsdbstsv()}}
-\item \href{#method-Wf_sash-read_sigsindeltsv}{\code{Wf_sash$read_sigsindeltsv()}}
-\item \href{#method-Wf_sash-read_qcsummarytsv}{\code{Wf_sash$read_qcsummarytsv()}}
+\item \href{#method-Wf_sash-read_sigs_snv2015}{\code{Wf_sash$read_sigs_snv2015()}}
+\item \href{#method-Wf_sash-read_sigs_snv2020}{\code{Wf_sash$read_sigs_snv2020()}}
+\item \href{#method-Wf_sash-read_sigs_dbs}{\code{Wf_sash$read_sigs_dbs()}}
+\item \href{#method-Wf_sash-read_sigs_indel}{\code{Wf_sash$read_sigs_indel()}}
+\item \href{#method-Wf_sash-read_qcsum}{\code{Wf_sash$read_qcsum()}}
 \item \href{#method-Wf_sash-clone}{\code{Wf_sash$clone()}}
 }
 }
 \if{html}{\out{
 <details open><summary>Inherited methods</summary>
 <ul>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="download_files"><a href='../../dracarys/html/Wf.html#method-Wf-download_files'><code>dracarys::Wf$download_files()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="list_files"><a href='../../dracarys/html/Wf.html#method-Wf-list_files'><code>dracarys::Wf$list_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="list_files_filter_relevant"><a href='../../dracarys/html/Wf.html#method-Wf-list_files_filter_relevant'><code>dracarys::Wf$list_files_filter_relevant()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="tidy_files"><a href='../../dracarys/html/Wf.html#method-Wf-tidy_files'><code>dracarys::Wf$tidy_files()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="write"><a href='../../dracarys/html/Wf.html#method-Wf-write'><code>dracarys::Wf$write()</code></a></span></li>
 </ul>
@@ -131,71 +131,12 @@ Print details about the Workflow.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-list_files_filter_relevant"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-list_files_filter_relevant}{}}}
-\subsection{Method \code{list_files_filter_relevant()}}{
-List dracarys files under given path
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$list_files_filter_relevant(
-  max_files = 1000,
-  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
-  ...
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{max_files}}{Max number of files to list (for gds/s3 only).}
-
-\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
-
-\item{\code{...}}{Passed on to the \code{gds_list_files_filter_relevant} or
-the \code{s3_list_files_filter_relevant} function.}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-download_files"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-download_files}{}}}
-\subsection{Method \code{download_files()}}{
-Download files from GDS/S3 to local filesystem.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$download_files(
-  outdir,
-  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
-  max_files = 1000,
-  dryrun = FALSE,
-  recursive = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{outdir}}{Path to output directory.}
-
-\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
-
-\item{\code{max_files}}{Max number of files to list.}
-
-\item{\code{dryrun}}{If TRUE, just list the files that will be downloaded (don't
-download them).}
-
-\item{\code{recursive}}{Should files be returned recursively \emph{in and under} the specified
-GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-read_pcgrjson"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-read_pcgrjson}{}}}
-\subsection{Method \code{read_pcgrjson()}}{
+\if{html}{\out{<a id="method-Wf_sash-read_pcgr_json"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_pcgr_json}{}}}
+\subsection{Method \code{read_pcgr_json()}}{
 Read \code{pcgr.json.gz} file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_pcgrjson(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_pcgr_json(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -207,12 +148,12 @@ Read \code{pcgr.json.gz} file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-read_hrddragentsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-read_hrddragentsv}{}}}
-\subsection{Method \code{read_hrddragentsv()}}{
+\if{html}{\out{<a id="method-Wf_sash-read_hrd_dragen"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_hrd_dragen}{}}}
+\subsection{Method \code{read_hrd_dragen()}}{
 Read \code{dragen.tsv.gz} cancer report hrd file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_hrddragentsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_hrd_dragen(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -224,12 +165,12 @@ Read \code{dragen.tsv.gz} cancer report hrd file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-read_hrdchordtsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-read_hrdchordtsv}{}}}
-\subsection{Method \code{read_hrdchordtsv()}}{
+\if{html}{\out{<a id="method-Wf_sash-read_hrd_chord"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_hrd_chord}{}}}
+\subsection{Method \code{read_hrd_chord()}}{
 Read \code{chord.tsv.gz} cancer report hrd file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_hrdchordtsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_hrd_chord(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -241,12 +182,12 @@ Read \code{chord.tsv.gz} cancer report hrd file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-read_hrdetecttsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-read_hrdetecttsv}{}}}
-\subsection{Method \code{read_hrdetecttsv()}}{
+\if{html}{\out{<a id="method-Wf_sash-read_hrd_hrdetect"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_hrd_hrdetect}{}}}
+\subsection{Method \code{read_hrd_hrdetect()}}{
 Read \code{hrdetect.tsv.gz} cancer report hrd file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_hrdetecttsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_hrd_hrdetect(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -275,12 +216,12 @@ Read signature cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-read_sigssnv2015tsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigssnv2015tsv}{}}}
-\subsection{Method \code{read_sigssnv2015tsv()}}{
+\if{html}{\out{<a id="method-Wf_sash-read_sigs_snv2015"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigs_snv2015}{}}}
+\subsection{Method \code{read_sigs_snv2015()}}{
 Read \code{snv_2015.tsv.gz} sigs cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigssnv2015tsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigs_snv2015(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -292,12 +233,12 @@ Read \code{snv_2015.tsv.gz} sigs cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-read_sigssnv2020tsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigssnv2020tsv}{}}}
-\subsection{Method \code{read_sigssnv2020tsv()}}{
+\if{html}{\out{<a id="method-Wf_sash-read_sigs_snv2020"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigs_snv2020}{}}}
+\subsection{Method \code{read_sigs_snv2020()}}{
 Read \code{snv_2020.tsv.gz} sigs cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigssnv2020tsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigs_snv2020(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -309,12 +250,12 @@ Read \code{snv_2020.tsv.gz} sigs cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-read_sigsdbstsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigsdbstsv}{}}}
-\subsection{Method \code{read_sigsdbstsv()}}{
+\if{html}{\out{<a id="method-Wf_sash-read_sigs_dbs"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigs_dbs}{}}}
+\subsection{Method \code{read_sigs_dbs()}}{
 Read \code{dbs.tsv.gz} sigs cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigsdbstsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigs_dbs(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -326,12 +267,12 @@ Read \code{dbs.tsv.gz} sigs cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-read_sigsindeltsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigsindeltsv}{}}}
-\subsection{Method \code{read_sigsindeltsv()}}{
+\if{html}{\out{<a id="method-Wf_sash-read_sigs_indel"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_sigs_indel}{}}}
+\subsection{Method \code{read_sigs_indel()}}{
 Read \code{indel.tsv.gz} sigs cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigsindeltsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_sigs_indel(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -343,12 +284,12 @@ Read \code{indel.tsv.gz} sigs cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_sash-read_qcsummarytsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_sash-read_qcsummarytsv}{}}}
-\subsection{Method \code{read_qcsummarytsv()}}{
+\if{html}{\out{<a id="method-Wf_sash-read_qcsum"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_sash-read_qcsum}{}}}
+\subsection{Method \code{read_qcsum()}}{
 Read \code{qc_summary.tsv.gz} cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_qcsummarytsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_sash$read_qcsum(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{

From 281a084d6be2e9a0c2790a2be1823840712045ef Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 14 Sep 2024 12:06:52 +1000
Subject: [PATCH 5/8] umccrise: hardcode file paths

---
 R/umccrise.R       |  68 ++++++---------------
 man/Wf_umccrise.Rd | 143 +++++++++++++--------------------------------
 2 files changed, 61 insertions(+), 150 deletions(-)

diff --git a/R/umccrise.R b/R/umccrise.R
index 2ba73e6..efcd5fe 100644
--- a/R/umccrise.R
+++ b/R/umccrise.R
@@ -64,17 +64,19 @@ Wf_umccrise <- R6::R6Class(
     #' @param SampleID_tumor The SampleID of the tumor sample (needed for path lookup).
     initialize = function(path = NULL, SubjectID = NULL, SampleID_tumor = NULL) {
       wname <- "umccrise"
+      pref <- glue("{SubjectID}__{SampleID_tumor}")
+      crep <- "cancer_report_tables"
       regexes <- tibble::tribble(
         ~regex, ~fun,
-        "-chord\\.tsv\\.gz$", "chordtsv",
-        "-hrdetect\\.tsv\\.gz$", "hrdetecttsv",
-        "-snv_2015\\.tsv\\.gz$", "sigssnv2015tsv",
-        "-snv_2020\\.tsv\\.gz$", "sigssnv2020tsv",
-        "-dbs\\.tsv\\.gz$", "sigsdbstsv",
-        "-indel\\.tsv\\.gz$", "sigsindeltsv",
-        "-qc_summary\\.tsv\\.gz$", "qcsummarytsv",
-        "multiqc_conpair\\.txt$", "conpairmultiqc",
-        "-somatic\\.pcgr\\.json\\.gz$", "pcgrjson"
+        glue("{pref}/{crep}/hrd/{pref}-chord\\.tsv\\.gz$"), "hrd_chord",
+        glue("{pref}/{crep}/hrd/{pref}-hrdetect\\.tsv\\.gz$"), "hrd_hrdetect",
+        glue("{pref}/{crep}/sigs/{pref}-snv_2015\\.tsv\\.gz$"), "sigs_snv2015",
+        glue("{pref}/{crep}/sigs/{pref}-snv_2020\\.tsv\\.gz$"), "sigs_snv2020",
+        glue("{pref}/{crep}/sigs/{pref}-dbs\\.tsv\\.gz$"), "sigs_dbs",
+        glue("{pref}/{crep}/sigs/{pref}-indel\\.tsv\\.gz$"), "sigs_indel",
+        glue("{pref}/{crep}/{pref}-qc_summary\\.tsv\\.gz$"), "qcsum",
+        glue("{pref}/{pref}-multiqc_report_data/multiqc_conpair\\.txt$"), "conpairmultiqc",
+        glue("work/{pref}/pcgr/{pref}-somatic\\.pcgr\\.json\\.gz$"), "pcgr_json"
       ) |>
         dplyr::mutate(fun = paste0("read_", .data$fun))
 
@@ -96,41 +98,9 @@ Wf_umccrise <- R6::R6Class(
       print(res)
       invisible(self)
     },
-    #' @description List dracarys files under given path
-    #' @param max_files Max number of files to list (for gds/s3 only).
-    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
-    #' @param ... Passed on to the `gds_list_files_filter_relevant` or
-    #' the `s3_list_files_filter_relevant` function.
-    list_files_filter_relevant = function(max_files = 1000,
-                                          ica_token = Sys.getenv("ICA_ACCESS_TOKEN"), ...) {
-      path <- self$path
-      dir_final <- file.path(path, glue("{self$SubjectID}__{self$SampleID_tumor}"))
-      dir_work <- file.path(path, "work", glue("{self$SubjectID}__{self$SampleID_tumor}"))
-      dir_work_pcgr <- file.path(dir_work, "pcgr") # for pcgr json
-      f1 <- super$list_files_filter_relevant(path = dir_final, max_files = 300, ica_token = ica_token)
-      f2 <- super$list_files_filter_relevant(path = dir_work_pcgr, max_files = 50, ica_token = ica_token)
-      f_all <- dplyr::bind_rows(f1, f2)
-      return(f_all)
-    },
-    #' @description Download files from GDS/S3 to local filesystem.
-    #' @param outdir Path to output directory.
-    #' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
-    #' @param max_files Max number of files to list.
-    #' @param dryrun If TRUE, just list the files that will be downloaded (don't
-    #' download them).
-    #' @param recursive Should files be returned recursively _in and under_ the specified
-    #' GDS directory, or _only directly in_ the specified GDS directory (def: TRUE via ICA API).
-    download_files = function(outdir, ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
-                              max_files = 1000, dryrun = FALSE, recursive = NULL) {
-      super$download_files(
-        outdir = outdir, ica_token = ica_token, max_files = max_files,
-        dryrun = dryrun, recursive = recursive,
-        list_filter_fun = self$list_files_filter_relevant
-      )
-    },
     #' @description Read `pcgr.json.gz` file.
     #' @param x Path to file.
-    read_pcgrjson = function(x) {
+    read_pcgr_json = function(x) {
       j <- read_jsongz_jsonlite(x)
       tmb <-
         j[["content"]][["tmb"]][["variant_statistic"]] %||%
@@ -150,7 +120,7 @@ Wf_umccrise <- R6::R6Class(
     },
     #' @description Read `chord.tsv.gz` cancer report file.
     #' @param x Path to file.
-    read_chordtsv = function(x) {
+    read_hrd_chord = function(x) {
       ct <- readr::cols_only(
         p_hrd = "d",
         hr_status = "c",
@@ -162,7 +132,7 @@ Wf_umccrise <- R6::R6Class(
     },
     #' @description Read `hrdetect.tsv.gz` cancer report file.
     #' @param x Path to file.
-    read_hrdetecttsv = function(x) {
+    read_hrd_hrdetect = function(x) {
       ct <- readr::cols(
         .default = "d",
         sample = "c"
@@ -181,27 +151,27 @@ Wf_umccrise <- R6::R6Class(
     },
     #' @description Read `snv_2015.tsv.gz` sigs cancer report file.
     #' @param x Path to file.
-    read_sigssnv2015tsv = function(x) {
+    read_sigs_snv2015 = function(x) {
       self$read_sigstsv(x)
     },
     #' @description Read `snv_2020.tsv.gz` sigs cancer report file.
     #' @param x Path to file.
-    read_sigssnv2020tsv = function(x) {
+    read_sigs_snv2020 = function(x) {
       self$read_sigstsv(x)
     },
     #' @description Read `dbs.tsv.gz` sigs cancer report file.
     #' @param x Path to file.
-    read_sigsdbstsv = function(x) {
+    read_sigs_dbs = function(x) {
       self$read_sigstsv(x)
     },
     #' @description Read `indel.tsv.gz` sigs cancer report file.
     #' @param x Path to file.
-    read_sigsindeltsv = function(x) {
+    read_sigs_indel = function(x) {
       self$read_sigstsv(x)
     },
     #' @description Read `qc_summary.tsv.gz` cancer report file.
     #' @param x Path to file.
-    read_qcsummarytsv = function(x) {
+    read_qcsum = function(x) {
       d <- read_tsvgz(x, col_types = readr::cols(.default = "c"))
       d |>
         dplyr::select("variable", "value") |>
diff --git a/man/Wf_umccrise.Rd b/man/Wf_umccrise.Rd
index 7753970..131a62e 100644
--- a/man/Wf_umccrise.Rd
+++ b/man/Wf_umccrise.Rd
@@ -69,17 +69,15 @@ d_write <- um2$write(
 \itemize{
 \item \href{#method-Wf_umccrise-new}{\code{Wf_umccrise$new()}}
 \item \href{#method-Wf_umccrise-print}{\code{Wf_umccrise$print()}}
-\item \href{#method-Wf_umccrise-list_files_filter_relevant}{\code{Wf_umccrise$list_files_filter_relevant()}}
-\item \href{#method-Wf_umccrise-download_files}{\code{Wf_umccrise$download_files()}}
-\item \href{#method-Wf_umccrise-read_pcgrjson}{\code{Wf_umccrise$read_pcgrjson()}}
-\item \href{#method-Wf_umccrise-read_chordtsv}{\code{Wf_umccrise$read_chordtsv()}}
-\item \href{#method-Wf_umccrise-read_hrdetecttsv}{\code{Wf_umccrise$read_hrdetecttsv()}}
+\item \href{#method-Wf_umccrise-read_pcgr_json}{\code{Wf_umccrise$read_pcgr_json()}}
+\item \href{#method-Wf_umccrise-read_hrd_chord}{\code{Wf_umccrise$read_hrd_chord()}}
+\item \href{#method-Wf_umccrise-read_hrd_hrdetect}{\code{Wf_umccrise$read_hrd_hrdetect()}}
 \item \href{#method-Wf_umccrise-read_sigstsv}{\code{Wf_umccrise$read_sigstsv()}}
-\item \href{#method-Wf_umccrise-read_sigssnv2015tsv}{\code{Wf_umccrise$read_sigssnv2015tsv()}}
-\item \href{#method-Wf_umccrise-read_sigssnv2020tsv}{\code{Wf_umccrise$read_sigssnv2020tsv()}}
-\item \href{#method-Wf_umccrise-read_sigsdbstsv}{\code{Wf_umccrise$read_sigsdbstsv()}}
-\item \href{#method-Wf_umccrise-read_sigsindeltsv}{\code{Wf_umccrise$read_sigsindeltsv()}}
-\item \href{#method-Wf_umccrise-read_qcsummarytsv}{\code{Wf_umccrise$read_qcsummarytsv()}}
+\item \href{#method-Wf_umccrise-read_sigs_snv2015}{\code{Wf_umccrise$read_sigs_snv2015()}}
+\item \href{#method-Wf_umccrise-read_sigs_snv2020}{\code{Wf_umccrise$read_sigs_snv2020()}}
+\item \href{#method-Wf_umccrise-read_sigs_dbs}{\code{Wf_umccrise$read_sigs_dbs()}}
+\item \href{#method-Wf_umccrise-read_sigs_indel}{\code{Wf_umccrise$read_sigs_indel()}}
+\item \href{#method-Wf_umccrise-read_qcsum}{\code{Wf_umccrise$read_qcsum()}}
 \item \href{#method-Wf_umccrise-read_conpairmultiqc}{\code{Wf_umccrise$read_conpairmultiqc()}}
 \item \href{#method-Wf_umccrise-clone}{\code{Wf_umccrise$clone()}}
 }
@@ -87,7 +85,9 @@ d_write <- um2$write(
 \if{html}{\out{
 <details open><summary>Inherited methods</summary>
 <ul>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="download_files"><a href='../../dracarys/html/Wf.html#method-Wf-download_files'><code>dracarys::Wf$download_files()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="list_files"><a href='../../dracarys/html/Wf.html#method-Wf-list_files'><code>dracarys::Wf$list_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="list_files_filter_relevant"><a href='../../dracarys/html/Wf.html#method-Wf-list_files_filter_relevant'><code>dracarys::Wf$list_files_filter_relevant()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="tidy_files"><a href='../../dracarys/html/Wf.html#method-Wf-tidy_files'><code>dracarys::Wf$tidy_files()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="write"><a href='../../dracarys/html/Wf.html#method-Wf-write'><code>dracarys::Wf$write()</code></a></span></li>
 </ul>
@@ -133,71 +133,12 @@ Print details about the Workflow.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_umccrise-list_files_filter_relevant"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_umccrise-list_files_filter_relevant}{}}}
-\subsection{Method \code{list_files_filter_relevant()}}{
-List dracarys files under given path
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$list_files_filter_relevant(
-  max_files = 1000,
-  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
-  ...
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{max_files}}{Max number of files to list (for gds/s3 only).}
-
-\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
-
-\item{\code{...}}{Passed on to the \code{gds_list_files_filter_relevant} or
-the \code{s3_list_files_filter_relevant} function.}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_umccrise-download_files"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_umccrise-download_files}{}}}
-\subsection{Method \code{download_files()}}{
-Download files from GDS/S3 to local filesystem.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$download_files(
-  outdir,
-  ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
-  max_files = 1000,
-  dryrun = FALSE,
-  recursive = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{outdir}}{Path to output directory.}
-
-\item{\code{ica_token}}{ICA access token (def: $ICA_ACCESS_TOKEN env var).}
-
-\item{\code{max_files}}{Max number of files to list.}
-
-\item{\code{dryrun}}{If TRUE, just list the files that will be downloaded (don't
-download them).}
-
-\item{\code{recursive}}{Should files be returned recursively \emph{in and under} the specified
-GDS directory, or \emph{only directly in} the specified GDS directory (def: TRUE via ICA API).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_umccrise-read_pcgrjson"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_pcgrjson}{}}}
-\subsection{Method \code{read_pcgrjson()}}{
+\if{html}{\out{<a id="method-Wf_umccrise-read_pcgr_json"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_pcgr_json}{}}}
+\subsection{Method \code{read_pcgr_json()}}{
 Read \code{pcgr.json.gz} file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_pcgrjson(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_pcgr_json(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -209,12 +150,12 @@ Read \code{pcgr.json.gz} file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_umccrise-read_chordtsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_chordtsv}{}}}
-\subsection{Method \code{read_chordtsv()}}{
+\if{html}{\out{<a id="method-Wf_umccrise-read_hrd_chord"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_hrd_chord}{}}}
+\subsection{Method \code{read_hrd_chord()}}{
 Read \code{chord.tsv.gz} cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_chordtsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_hrd_chord(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -226,12 +167,12 @@ Read \code{chord.tsv.gz} cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_umccrise-read_hrdetecttsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_hrdetecttsv}{}}}
-\subsection{Method \code{read_hrdetecttsv()}}{
+\if{html}{\out{<a id="method-Wf_umccrise-read_hrd_hrdetect"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_hrd_hrdetect}{}}}
+\subsection{Method \code{read_hrd_hrdetect()}}{
 Read \code{hrdetect.tsv.gz} cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_hrdetecttsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_hrd_hrdetect(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -260,12 +201,12 @@ Read signature cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_umccrise-read_sigssnv2015tsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigssnv2015tsv}{}}}
-\subsection{Method \code{read_sigssnv2015tsv()}}{
+\if{html}{\out{<a id="method-Wf_umccrise-read_sigs_snv2015"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigs_snv2015}{}}}
+\subsection{Method \code{read_sigs_snv2015()}}{
 Read \code{snv_2015.tsv.gz} sigs cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigssnv2015tsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigs_snv2015(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -277,12 +218,12 @@ Read \code{snv_2015.tsv.gz} sigs cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_umccrise-read_sigssnv2020tsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigssnv2020tsv}{}}}
-\subsection{Method \code{read_sigssnv2020tsv()}}{
+\if{html}{\out{<a id="method-Wf_umccrise-read_sigs_snv2020"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigs_snv2020}{}}}
+\subsection{Method \code{read_sigs_snv2020()}}{
 Read \code{snv_2020.tsv.gz} sigs cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigssnv2020tsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigs_snv2020(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -294,12 +235,12 @@ Read \code{snv_2020.tsv.gz} sigs cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_umccrise-read_sigsdbstsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigsdbstsv}{}}}
-\subsection{Method \code{read_sigsdbstsv()}}{
+\if{html}{\out{<a id="method-Wf_umccrise-read_sigs_dbs"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigs_dbs}{}}}
+\subsection{Method \code{read_sigs_dbs()}}{
 Read \code{dbs.tsv.gz} sigs cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigsdbstsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigs_dbs(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -311,12 +252,12 @@ Read \code{dbs.tsv.gz} sigs cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_umccrise-read_sigsindeltsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigsindeltsv}{}}}
-\subsection{Method \code{read_sigsindeltsv()}}{
+\if{html}{\out{<a id="method-Wf_umccrise-read_sigs_indel"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_sigs_indel}{}}}
+\subsection{Method \code{read_sigs_indel()}}{
 Read \code{indel.tsv.gz} sigs cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigsindeltsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_sigs_indel(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -328,12 +269,12 @@ Read \code{indel.tsv.gz} sigs cancer report file.
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-Wf_umccrise-read_qcsummarytsv"></a>}}
-\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_qcsummarytsv}{}}}
-\subsection{Method \code{read_qcsummarytsv()}}{
+\if{html}{\out{<a id="method-Wf_umccrise-read_qcsum"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_umccrise-read_qcsum}{}}}
+\subsection{Method \code{read_qcsum()}}{
 Read \code{qc_summary.tsv.gz} cancer report file.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_qcsummarytsv(x)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Wf_umccrise$read_qcsum(x)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{

From 0ccf417736c4470943098e60bf3b2c2b24bdcebf Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 14 Sep 2024 15:55:15 +1000
Subject: [PATCH 6/8] sash: download and tidy

---
 .Rbuildignore                                 |  3 +-
 inst/rmd/umccr_workflows/sash/.gitignore      |  3 +
 inst/rmd/umccr_workflows/sash/dl_and_tidy.R   | 79 +++++++++++++++++++
 .../umccr_workflows/umccrise/dl_and_tidy.R    |  6 +-
 4 files changed, 87 insertions(+), 4 deletions(-)
 create mode 100644 inst/rmd/umccr_workflows/sash/.gitignore
 create mode 100755 inst/rmd/umccr_workflows/sash/dl_and_tidy.R

diff --git a/.Rbuildignore b/.Rbuildignore
index 5c38deb..c5136c8 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -27,5 +27,6 @@ inst/rmd/umccr_portal/html
 inst/rmd/umccr_workflows/alignment_qc/nogit
 inst/rmd/umccr_workflows/bcl_convert/html
 inst/rmd/umccr_workflows/interop/html
-inst/rmd/umccr_workflows/umccrise/html
+inst/rmd/umccr_workflows/sash/nogit
+inst/rmd/umccr_workflows/umccrise/nogit
 inst/sandbox
diff --git a/inst/rmd/umccr_workflows/sash/.gitignore b/inst/rmd/umccr_workflows/sash/.gitignore
new file mode 100644
index 0000000..0182e8f
--- /dev/null
+++ b/inst/rmd/umccr_workflows/sash/.gitignore
@@ -0,0 +1,3 @@
+nogit
+
+/.quarto/
diff --git a/inst/rmd/umccr_workflows/sash/dl_and_tidy.R b/inst/rmd/umccr_workflows/sash/dl_and_tidy.R
new file mode 100755
index 0000000..48a5a3a
--- /dev/null
+++ b/inst/rmd/umccr_workflows/sash/dl_and_tidy.R
@@ -0,0 +1,79 @@
+#!/usr/bin/env Rscript
+
+{
+  require(dplyr)
+  require(assertthat, include.only = "assert_that")
+  require(dracarys, include.only = "Wf_sash_download_tidy_write")
+  require(glue, include.only = "glue")
+  require(here, include.only = "here")
+  require(rportal, include.only = c("portaldb_query_workflow"))
+  require(tidyr, include.only = "separate_wider_delim")
+}
+
+query_workflow_sash <- function(start_date, end_date) {
+  q1 <- glue(
+    "WHERE \"type_name\" = 'sash' ",
+    "AND \"start\" >= date(\'{start_date}\') ",
+    "AND \"end\" <= date(\'{end_date}\') ",
+    "ORDER BY \"start\" DESC;"
+  )
+  rportal::portaldb_query_workflow(q1)
+}
+
+query_limsrow_libids <- function(libids) {
+  assertthat::assert_that(!is.null(libids), all(grepl("^L", libids)))
+  libids <- unique(libids) |>
+    paste(collapse = "|")
+  q1 <- glue("WHERE REGEXP_LIKE(\"library_id\", '{libids}');")
+  rportal::portaldb_query_limsrow(q1)
+}
+
+# first read in the workflows table, extract metadata, then join with lims
+start_date <- "2024-08-29"
+end_date <- "2024-09-07"
+meta_raw <- query_workflow_sash(start_date, end_date)
+meta <- meta_raw |>
+  rportal::meta_sash()
+lims_raw <- query_limsrow_libids(meta$LibraryID_tumor)
+lims <- lims_raw |>
+  tidyr::separate_wider_delim(
+    library_id,
+    delim = "_", names = c("library_id", "topup_or_rerun"), too_few = "align_start"
+  ) |>
+  select(
+    subject_id, library_id, sample_id, sample_name,
+    external_subject_id, external_sample_id,
+    project_name, project_owner,
+    source, quality, workflow
+  ) |>
+  distinct()
+table(lims$library_id %in% meta$LibraryID_tumor) # double-check
+
+meta_lims <- meta |>
+  left_join(lims, by = c("LibraryID_tumor" = "library_id")) |>
+  mutate(rownum = row_number()) |>
+  select(
+    rownum, wfr_id, version, end_status, start, end, portal_run_id, SubjectID, LibraryID_tumor, LibraryID_normal,
+    SampleID_tumor, SampleID_normal, s3_outdir_sash, external_subject_id, external_sample_id,
+    project_owner, project_name, source, quality, workflow
+  )
+meta_lims |>
+  saveRDS(here(glue("inst/rmd/umccr_workflows/sash/nogit/meta/{start_date}_{end_date}.rds")))
+
+d <- meta_lims |>
+  rowwise() |>
+  mutate(
+    indir = .data$s3_outdir_sash,
+    outdir = file.path(sub("s3://", "", .data$indir)),
+    outdir = file.path(normalizePath("~/s3"), .data$outdir),
+    res = list(
+      dracarys::Wf_sash_download_tidy_write(
+        path = .data$indir, SubjectID = .data$SubjectID, SampleID_tumor = .data$SampleID_tumor,
+        outdir = .data$outdir, max_files = 300, dryrun = FALSE
+      )
+    )
+  ) |>
+  ungroup()
+
+d |>
+  saveRDS(here(glue("inst/rmd/umccr_workflows/sash/nogit/results_{start_date}_{end_date}.rds")))
diff --git a/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R b/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R
index 3b5ede3..fb17721 100755
--- a/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R
+++ b/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R
@@ -12,9 +12,9 @@
 
 query_workflow_umccrise <- function(start_date, end_date) {
   q1 <- glue(
-    "WHERE \"type_name\" = 'umccrise'",
-    "AND  \"start\" >= date(\'{start_date}\')",
-    "AND  \"end\" <= date(\'{end_date}\')",
+    "WHERE \"type_name\" = 'umccrise' ",
+    "AND \"start\" >= date(\'{start_date}\') ",
+    "AND \"end\" <= date(\'{end_date}\') ",
     "ORDER BY \"start\" DESC;"
   )
   rportal::portaldb_query_workflow(q1)

From d39f620a3ac9e637a4d9133d2e273661e33ee043 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sun, 15 Sep 2024 15:47:11 +1000
Subject: [PATCH 7/8] sash: add summary report

---
 inst/rmd/umccr_workflows/sash/dl_and_tidy.R   |   5 +-
 inst/rmd/umccr_workflows/sash/render.sh       |   9 +
 .../rmd/umccr_workflows/sash/summary_sash.qmd | 378 ++++++++++++++++++
 3 files changed, 390 insertions(+), 2 deletions(-)
 create mode 100644 inst/rmd/umccr_workflows/sash/render.sh
 create mode 100644 inst/rmd/umccr_workflows/sash/summary_sash.qmd

diff --git a/inst/rmd/umccr_workflows/sash/dl_and_tidy.R b/inst/rmd/umccr_workflows/sash/dl_and_tidy.R
index 48a5a3a..9fcfcb1 100755
--- a/inst/rmd/umccr_workflows/sash/dl_and_tidy.R
+++ b/inst/rmd/umccr_workflows/sash/dl_and_tidy.R
@@ -63,13 +63,14 @@ meta_lims |>
 d <- meta_lims |>
   rowwise() |>
   mutate(
-    indir = .data$s3_outdir_sash,
+    # indir = .data$s3_outdir_sash,
     outdir = file.path(sub("s3://", "", .data$indir)),
     outdir = file.path(normalizePath("~/s3"), .data$outdir),
+    indir = outdir, # for when debugging locally
     res = list(
       dracarys::Wf_sash_download_tidy_write(
         path = .data$indir, SubjectID = .data$SubjectID, SampleID_tumor = .data$SampleID_tumor,
-        outdir = .data$outdir, max_files = 300, dryrun = FALSE
+        outdir = .data$outdir, max_files = 1000, dryrun = FALSE
       )
     )
   ) |>
diff --git a/inst/rmd/umccr_workflows/sash/render.sh b/inst/rmd/umccr_workflows/sash/render.sh
new file mode 100644
index 0000000..e1c6efe
--- /dev/null
+++ b/inst/rmd/umccr_workflows/sash/render.sh
@@ -0,0 +1,9 @@
+date_start="2024-08-29"
+date_end="2024-09-07"
+out="sash_${date_start}_${date_end}.html"
+
+quarto render summary_sash.qmd \
+    -P date_start:${date_start} \
+    -P date_end:${date_end} \
+    -o ${out} \
+    --output-dir nogit/html
diff --git a/inst/rmd/umccr_workflows/sash/summary_sash.qmd b/inst/rmd/umccr_workflows/sash/summary_sash.qmd
new file mode 100644
index 0000000..24b0903
--- /dev/null
+++ b/inst/rmd/umccr_workflows/sash/summary_sash.qmd
@@ -0,0 +1,378 @@
+---
+title: "{{< meta params.title >}}"
+subtitle: "Period: `r paste(params$date_start, ' to ', params$date_end)`"
+author: "UMCCR - Genomics Platform Group"
+date: now
+date-format: "YYYY-MM-DD HH:mm Z"
+execute:
+  echo: false
+format:
+  html:
+    toc: true
+    toc-expand: 1
+    toc-title: Contents
+    toc-location: body
+    highlight-style: github
+    number-sections: false
+    link-external-icon: true
+    link-external-newwindow: true
+    embed-resources: true
+    code-copy: true
+    code-link: true
+    code-fold: true
+    code-block-border-left: true
+    smooth-scroll: true
+    grid:
+      body-width: 1300px
+params:
+  title: "UMCCR sash Workflow Summary"
+  date_start: "XXXX-XX-XX"
+  date_end: "XXXX-XX-XX"
+---
+
+```{r}
+#| label: pkg_load
+#| message: false
+{
+  require(dplyr) # import all dplyr funcs
+  require(readr, include.only = c("read_rds"))
+  require(purrr, include.only = c("map"))
+  require(tidyr, include.only = c("unnest_wider"))
+  require(dracarys, include.only = c("session_info_kable"))
+  require(glue, include.only = "glue")
+  require(here, include.only = "here")
+  require(knitr, include.only = "kable")
+  require(reactable, include.only = "reactable")
+  require(ggplot2, include.only = c("ggplot", "aes"))
+  require(lubridate, include.only = c("as_datetime"))
+  require(plotly, include.only = c("ggplotly"))
+  require(patchwork, include.only = c("plot_layout"))
+}
+set.seed(42)
+```
+
+```{r}
+#| label: load_data
+date_start <- params$date_start
+date_end <- params$date_end
+nogit <- "inst/rmd/umccr_workflows/sash/nogit"
+d_raw <- here(glue("{nogit}/results_{date_start}_{date_end}.rds")) |>
+  readr::read_rds() |>
+  arrange(desc(SubjectID), desc(LibraryID_tumor)) |>
+  mutate(rownum = row_number()) |>
+  relocate(rownum)
+myriad <- paste0("SBJ0", c("0695", "0847", "0920", "2397", "2456", "2743", "3186", "3242", "4187", "4221"))
+```
+
+```{r}
+#| label: funcs
+
+# pal <- colorRamp(c("white", "lightgreen"))
+# rgb(pal(0.5), maxColorValue = 255)
+
+tab_view <- function(x, id, ...) {
+  htmltools::browsable(
+    htmltools::tagList(
+      htmltools::tags$button(
+        htmltools::tagList(fontawesome::fa("download"), "CSV"),
+        onclick = glue("Reactable.downloadDataCSV('{id}', '{id}.csv')")
+      ),
+      x |>
+        reactable::reactable(
+          bordered = TRUE,
+          compact = TRUE,
+          filterable = TRUE,
+          fullWidth = TRUE,
+          height = 800,
+          highlight = TRUE,
+          pagination = TRUE,
+          showPagination = TRUE,
+          defaultPageSize = nrow(x),
+          showPageSizeOptions = TRUE,
+          pageSizeOptions = c(20, 50, nrow(x)),
+          resizable = TRUE,
+          searchable = TRUE,
+          sortable = TRUE,
+          striped = TRUE,
+          wrap = FALSE,
+          elementId = id,
+          theme = reactable::reactableTheme(
+            borderColor = "#dfe2e5",
+            stripedColor = "#f6f8fa",
+            highlightColor = "#f0f5f9",
+            style = list(
+              fontFamily = "Monaco"
+            )
+          ),
+          ...
+        )
+    )
+  )
+}
+```
+
+## Metadata
+
+```{r}
+#| label: metadata
+meta <- d_raw |>
+  select(
+    rownum, portal_run_id, SubjectID, LibraryID_tumor, SampleID_tumor, external_subject_id, external_sample_id,
+    project_owner, project_name, source, quality, workflow
+  )
+tab_view(meta, id = "metadata")
+```
+
+## Results
+
+```{r}
+#| label: process
+# one row per file type - not all samples have sigsdbstsv
+d <- d_raw |>
+  tidyr::unnest_longer(res, indices_to = "filetype")
+# main_cols <- c("rownum", "portal_run_id", "SubjectID", "LibraryID_tumor")
+main_cols <- c("portal_run_id")
+```
+
+```{r}
+#| label: qcsum
+qcsum <- d |>
+  filter(filetype == "qcsum") |>
+  select(all_of(main_cols), res) |>
+  unnest_wider(res)
+```
+
+```{r}
+#| label: pcgr
+pcgr <- d |>
+  filter(filetype == "pcgr_json") |>
+  select(all_of(main_cols), res) |>
+  unnest_wider(res) |>
+  rename(
+    msi_fraction_indels_pcgr = "fracIndels",
+    msi_pcgr = "predicted_class",
+    tmb_pcgr = "tmb_estimate",
+    n_tmb_pcgr = "n_tmb"
+  ) |>
+  mutate(msi_pcgr = sub(" \\(.*\\)", "", msi_pcgr))
+```
+
+```{r}
+#| label: hrd
+hrd_chord <- d |>
+  filter(filetype == "hrd_chord") |>
+  unnest_wider(res) |>
+  select(all_of(main_cols), hrd_chord = "p_hrd")
+hrd_hrdetect <- d |>
+  filter(filetype == "hrd_hrdetect") |>
+  unnest_wider(res) |>
+  select(all_of(main_cols), hrd_hrdetect = "Probability")
+hrd_dragen <- d |>
+  filter(filetype == "hrd_dragen") |>
+  select(all_of(main_cols), res) |>
+  unnest_wider(res) |>
+  select(all_of(main_cols), hrd_dragen = "HRD")
+hrd_all <- hrd_dragen |>
+  left_join(hrd_chord, by = "portal_run_id") |>
+  left_join(hrd_hrdetect, by = "portal_run_id") |>
+  select(portal_run_id, hrd_dragen, hrd_chord, hrd_hrdetect)
+```
+
+```{r}
+#| label: sigs
+sigs_snv2015 <- d |>
+  filter(filetype == "sigs_snv2015") |>
+  select(all_of(main_cols), res) |>
+  tidyr::unnest_wider(res) |>
+  tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
+sigs_snv2020 <- d |>
+  filter(filetype == "sigs_snv2020") |>
+  select(all_of(main_cols), res) |>
+  tidyr::unnest_wider(res) |>
+  tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
+sigs_dbs <- d |>
+  filter(filetype == "sigs_dbs") |>
+  select(all_of(main_cols), res) |>
+  tidyr::unnest_wider(res) |>
+  tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
+sigs_indel <- d |>
+  filter(filetype == "sigs_indel") |>
+  select(all_of(main_cols), res) |>
+  tidyr::unnest_wider(res) |>
+  tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
+dsig <- bind_rows(
+  list(
+    snv2015 = sigs_snv2015, snv2020 = sigs_snv2020, dbs = sigs_dbs, indel = sigs_indel
+  ),
+  .id = "Sig_group"
+)
+
+# keep top two ranked sigs
+dsig_filt <- dsig |>
+  group_by(Sig_group, portal_run_id) |>
+  mutate(tot_sig_vars = sum(Contribution)) |>
+  arrange(Rank) |>
+  slice_head(n = 2) |>
+  # some sigs have same Rank so use explicit sig_rank
+  mutate(sig_rank = row_number()) |>
+  ungroup() |>
+  mutate(
+    sig_summary = glue("{Signature} ({RelFreq} = {Contribution} / {tot_sig_vars})")
+  ) |>
+  select(Sig_group, portal_run_id, sig_rank, sig_summary) |>
+  tidyr::pivot_wider(names_from = sig_rank, values_from = sig_summary, names_prefix = "rank") |>
+  mutate(sig_top2 = paste(rank1, rank2, sep = ", ")) |>
+  select(Sig_group, portal_run_id, sig_top2) |>
+  tidyr::pivot_wider(names_from = Sig_group, values_from = sig_top2) |>
+  select(portal_run_id, snv2015, snv2020, dbs, indel)
+```
+
+```{r}
+#| label: qc_all
+dall <- d_raw |>
+  select(
+    rownum,
+    date_analysed = "start", portal_run_id,
+    SubjectID, LibraryID_tumor, SampleID_tumor,
+    external_subject_id, external_sample_id,
+    project_owner, project_name, source, quality, workflow
+  ) |>
+  left_join(hrd_all, by = "portal_run_id") |>
+  left_join(qcsum, by = "portal_run_id") |>
+  left_join(pcgr, by = "portal_run_id")
+```
+
+### Summary Metrics
+
+```{r}
+#| label: summary_metrics
+tab_view(dall, "summary_metrics")
+```
+
+### HRD Plot
+
+```{r}
+#| label: hrd_plot
+#| fig-width: 15
+#| fig-height: 15
+
+pdat <- dall |>
+  mutate(sbj = glue("{SubjectID}_{LibraryID_tumor}")) |>
+  select(sbj, dragen = hrd_dragen, chord = hrd_chord, hrdetect = hrd_hrdetect)
+p1 <- pdat |>
+  ggplot2::ggplot(aes(x = chord, y = hrdetect)) +
+  ggplot2::geom_point(colour = "#00bfc4") +
+  ggplot2::theme_bw()
+psub1 <- plotly::ggplotly(p1)
+p2 <- pdat |>
+  ggplot2::ggplot(aes(x = chord, y = dragen)) +
+  ggplot2::geom_point(colour = "#f8766d") +
+  ggplot2::theme_bw()
+psub2 <- plotly::ggplotly(p2)
+p3 <- pdat |>
+  ggplot2::ggplot(aes(x = hrdetect, y = dragen)) +
+  ggplot2::geom_point(colour = "#7cae00") +
+  ggplot2::theme_bw()
+psub3 <- plotly::ggplotly(p3)
+# p_all <- p1 + p2 + p3 + plot_layout(ncol = 1)
+plotly::subplot(psub1, psub2, psub3, nrows = 3, titleX = TRUE, titleY = TRUE)
+```
+
+### Signatures
+
+#### All (SNV, Indel, DBS)
+
+Don't show.
+
+```{r eval=FALSE}
+#| label: sig_results_all
+dsig |>
+  left_join(meta |> select(rownum, portal_run_id, SubjectID, LibraryID_tumor),
+    by = "portal_run_id"
+  ) |>
+  select(rownum, portal_run_id, SubjectID, LibraryID_tumor, everything()) |>
+  tab_view("sig_results_all")
+```
+
+#### Top 2
+
+```{r}
+#| label: sig_results_top2
+dsig_filt |>
+  left_join(meta |> select(rownum, portal_run_id, SubjectID, LibraryID_tumor),
+    by = "portal_run_id"
+  ) |>
+  select(rownum, portal_run_id, SubjectID, LibraryID_tumor, everything()) |>
+  arrange(rownum) |>
+  tab_view("sig_results_top2")
+```
+
+#### Top 3 SNV2015
+
+```{r}
+#| label: sig_results
+#| fig-width: 15
+#| fig-height: 65
+
+sig_order2015 <- paste0("Sig", 1:30)
+# sig_order2020 <- paste0(
+#  "SBS",
+#  c(
+#    1:6,
+#    paste0(7, c("a", "b", "c", "d")),
+#    8:9,
+#    paste0(10, c("a", "b", "c", "d")),
+#    11:16,
+#    paste0(17, c("a", "b")),
+#    18:60,
+#    84:94
+#  )
+# )
+
+p2_prep <- dsig |>
+  filter(
+    Sig_group == "snv2015",
+    Rank %in% c(1:3)
+  ) |>
+  left_join(dall |> select(portal_run_id, date_analysed, SubjectID, LibraryID_tumor), by = "portal_run_id") |>
+  mutate(sbj = as.character(glue("{SubjectID}_{LibraryID_tumor}"))) |>
+  select(date_analysed, sbj, Sig_group, Rank, Signature, Contribution, RelFreq) |>
+  mutate(Signature = factor(Signature, levels = sig_order2015))
+p2 <- p2_prep |>
+  ggplot2::ggplot(aes(x = Contribution, y = sbj, fill = Signature, text = sbj)) +
+  ggplot2::geom_bar(position = "fill", stat = "identity") +
+  ggplot2::theme_bw(base_size = 7)
+
+plotly::ggplotly(p2, tooltip = c("x", "text", "fill"))
+```
+
+## Metadata Summary
+
+::: {.panel-tabset .nav-pills}
+
+### Project Name/Owner
+
+```{r}
+#| label: project_owner_name
+dall |>
+  count(project_name, project_owner) |>
+  knitr::kable()
+```
+
+### Source / Quality
+
+```{r}
+#| label: source_quality
+count(dall, source, quality) |> knitr::kable()
+```
+
+### Workflow
+
+```{r}
+#| label: workflow_summary
+count(dall, workflow) |> knitr::kable()
+```
+
+:::
+
+</div>

From b143b5424d3806e9ba4c01680e5adcceb5a92e69 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sun, 15 Sep 2024 22:55:20 +1000
Subject: [PATCH 8/8] sash: fix reactable; include myriad

---
 .../rmd/umccr_workflows/sash/summary_sash.qmd | 58 ++++++++++++-------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/inst/rmd/umccr_workflows/sash/summary_sash.qmd b/inst/rmd/umccr_workflows/sash/summary_sash.qmd
index 24b0903..ec231d1 100644
--- a/inst/rmd/umccr_workflows/sash/summary_sash.qmd
+++ b/inst/rmd/umccr_workflows/sash/summary_sash.qmd
@@ -26,8 +26,8 @@ format:
       body-width: 1300px
 params:
   title: "UMCCR sash Workflow Summary"
-  date_start: "XXXX-XX-XX"
-  date_end: "XXXX-XX-XX"
+  date_start: "2024-08-29"
+  date_end: "2024-09-07"
 ---
 
 ```{r}
@@ -56,20 +56,21 @@ set.seed(42)
 date_start <- params$date_start
 date_end <- params$date_end
 nogit <- "inst/rmd/umccr_workflows/sash/nogit"
+myriad <- paste0("SBJ0", c("0695", "0847", "0920", "2397", "2456", "2743", "3186", "3242", "4187", "4221"))
 d_raw <- here(glue("{nogit}/results_{date_start}_{date_end}.rds")) |>
   readr::read_rds() |>
   arrange(desc(SubjectID), desc(LibraryID_tumor)) |>
-  mutate(rownum = row_number()) |>
-  relocate(rownum)
-myriad <- paste0("SBJ0", c("0695", "0847", "0920", "2397", "2456", "2743", "3186", "3242", "4187", "4221"))
+  mutate(
+    rownum = row_number(),
+    is_myriad = SubjectID %in% myriad
+  ) |>
+  relocate(rownum) |>
+  relocate(is_myriad, .after = rownum)
 ```
 
 ```{r}
 #| label: funcs
 
-# pal <- colorRamp(c("white", "lightgreen"))
-# rgb(pal(0.5), maxColorValue = 255)
-
 tab_view <- function(x, id, ...) {
   htmltools::browsable(
     htmltools::tagList(
@@ -82,7 +83,7 @@ tab_view <- function(x, id, ...) {
           bordered = TRUE,
           compact = TRUE,
           filterable = TRUE,
-          fullWidth = TRUE,
+          # fullWidth = TRUE,
           height = 800,
           highlight = TRUE,
           pagination = TRUE,
@@ -96,6 +97,18 @@ tab_view <- function(x, id, ...) {
           striped = TRUE,
           wrap = FALSE,
           elementId = id,
+          columns = list(
+            SubjectID = reactable::colDef(
+              sticky = "left",
+              # Add a right border style to visually distinguish the sticky column
+              style = list(borderRight = "1px solid #eee"),
+              headerStyle = list(borderRight = "1px solid #eee")
+            )
+          ),
+          defaultColDef = reactable::colDef(
+            minWidth = 170,
+            headerStyle = list(background = "#f7f7f8")
+          ),
           theme = reactable::reactableTheme(
             borderColor = "#dfe2e5",
             stripedColor = "#f6f8fa",
@@ -117,7 +130,7 @@ tab_view <- function(x, id, ...) {
 #| label: metadata
 meta <- d_raw |>
   select(
-    rownum, portal_run_id, SubjectID, LibraryID_tumor, SampleID_tumor, external_subject_id, external_sample_id,
+    rownum, is_myriad, portal_run_id, SubjectID, LibraryID_tumor, SampleID_tumor, external_subject_id, external_sample_id,
     project_owner, project_name, source, quality, workflow
   )
 tab_view(meta, id = "metadata")
@@ -232,6 +245,7 @@ dsig_filt <- dsig |>
 dall <- d_raw |>
   select(
     rownum,
+    is_myriad,
     date_analysed = "start", portal_run_id,
     SubjectID, LibraryID_tumor, SampleID_tumor,
     external_subject_id, external_sample_id,
@@ -251,46 +265,46 @@ tab_view(dall, "summary_metrics")
 
 ### HRD Plot
 
+Showing 2-way relationships between DRAGEN, CHORD and HRDetect.
+
 ```{r}
 #| label: hrd_plot
 #| fig-width: 15
-#| fig-height: 15
+#| fig-height: 22
 
 pdat <- dall |>
   mutate(sbj = glue("{SubjectID}_{LibraryID_tumor}")) |>
   select(sbj, dragen = hrd_dragen, chord = hrd_chord, hrdetect = hrd_hrdetect)
 p1 <- pdat |>
-  ggplot2::ggplot(aes(x = chord, y = hrdetect)) +
+  ggplot2::ggplot(aes(x = chord, y = hrdetect, label = sbj)) +
   ggplot2::geom_point(colour = "#00bfc4") +
   ggplot2::theme_bw()
 psub1 <- plotly::ggplotly(p1)
 p2 <- pdat |>
-  ggplot2::ggplot(aes(x = chord, y = dragen)) +
+  ggplot2::ggplot(aes(x = chord, y = dragen, label = sbj)) +
   ggplot2::geom_point(colour = "#f8766d") +
   ggplot2::theme_bw()
 psub2 <- plotly::ggplotly(p2)
 p3 <- pdat |>
-  ggplot2::ggplot(aes(x = hrdetect, y = dragen)) +
+  ggplot2::ggplot(aes(x = hrdetect, y = dragen, label = sbj)) +
   ggplot2::geom_point(colour = "#7cae00") +
   ggplot2::theme_bw()
 psub3 <- plotly::ggplotly(p3)
 # p_all <- p1 + p2 + p3 + plot_layout(ncol = 1)
-plotly::subplot(psub1, psub2, psub3, nrows = 3, titleX = TRUE, titleY = TRUE)
+plotly::subplot(psub1, psub2, psub3, nrows = 3, titleX = TRUE, titleY = TRUE, margin = c(0.02, 0.02, 0.04, 0.04))
 ```
 
 ### Signatures
 
 #### All (SNV, Indel, DBS)
 
-Don't show.
-
-```{r eval=FALSE}
+```{r}
 #| label: sig_results_all
 dsig |>
-  left_join(meta |> select(rownum, portal_run_id, SubjectID, LibraryID_tumor),
+  left_join(meta |> select(rownum, is_myriad, portal_run_id, SubjectID, LibraryID_tumor),
     by = "portal_run_id"
   ) |>
-  select(rownum, portal_run_id, SubjectID, LibraryID_tumor, everything()) |>
+  select(rownum, is_myriad, portal_run_id, SubjectID, LibraryID_tumor, everything()) |>
   tab_view("sig_results_all")
 ```
 
@@ -299,10 +313,10 @@ dsig |>
 ```{r}
 #| label: sig_results_top2
 dsig_filt |>
-  left_join(meta |> select(rownum, portal_run_id, SubjectID, LibraryID_tumor),
+  left_join(meta |> select(rownum, is_myriad, portal_run_id, SubjectID, LibraryID_tumor),
     by = "portal_run_id"
   ) |>
-  select(rownum, portal_run_id, SubjectID, LibraryID_tumor, everything()) |>
+  select(rownum, is_myriad, portal_run_id, SubjectID, LibraryID_tumor, everything()) |>
   arrange(rownum) |>
   tab_view("sig_results_top2")
 ```