From 85803fb5755124dddae282f36b44b716b133e53d Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner028.sumner2.jax.org>
Date: Thu, 18 Jul 2024 15:13:49 -0400
Subject: [PATCH 01/26] intial commit i# Please enter the commit message for
 your changes. Lines starting

---
 main.nf | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/main.nf b/main.nf
index 31549e6..f1a6f0c 100644
--- a/main.nf
+++ b/main.nf
@@ -48,6 +48,9 @@ else if (params.workflow == "gbrs"){
 else if (params.workflow == "amplicon"){
   include {AMPLICON} from './workflows/amplicon'
 }
+else if (params.workflow == "cnv_array"){
+  include {CNV} from './workflows/cnv_array'
+}
 else {
   // if workflow name is not supported: 
   exit 1, "ERROR: No valid pipeline called. '--workflow ${params.workflow}' is not a valid workflow name."

From e39d4f1bdb4b528c3bfd5fcd2b63e042bed1ee49 Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner028.sumner2.jax.org>
Date: Thu, 18 Jul 2024 15:49:32 -0400
Subject: [PATCH 02/26] initial commit

---
 bin/help/cnv.nf  | 13 +++++++++++++
 bin/log/cnv.nf   | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 workflows/cnv.nf | 44 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 103 insertions(+)
 create mode 100644 bin/help/cnv.nf
 create mode 100644 bin/log/cnv.nf
 create mode 100644 workflows/cnv.nf

diff --git a/bin/help/cnv.nf b/bin/help/cnv.nf
new file mode 100644
index 0000000..45e9c16
--- /dev/null
+++ b/bin/help/cnv.nf
@@ -0,0 +1,13 @@
+def help() {
+    println '''
+Parameter | Default | Description
+
+--idat_folder | /<PATH> | The directory containing IDAT files.
+--output_dir | /<PATH> | The directory to store the output files.
+--bpm_file | /<PATH> | The path to the BPM file.
+--egt_file | /<PATH> | The path to the EGT file.
+-w | /<PATH> | The directory for intermediary files and Nextflow processes. This directory can become quite large. Ensure ample storage.
+--help | false | Print this help message and exit.
+'''
+}
+
diff --git a/bin/log/cnv.nf b/bin/log/cnv.nf
new file mode 100644
index 0000000..156c1e3
--- /dev/null
+++ b/bin/log/cnv.nf
@@ -0,0 +1,46 @@
+import Logos
+
+logo = new Logo()
+println '\n'
+println logo.show()
+
+def param_log(){
+
+if (!params.idat_folder) {
+  error "'--idat_folder': is not provided, it is a required parameter."
+}
+
+if (!params.output_dir) {
+  error "'--output_dir': is not provided, it is a required parameter."
+}
+
+if (!params.bpm_file) {
+  error "'--bpm_file': is not provided, it is a required parameter."
+}
+
+if (!params.egt_file) {
+  error "'--egt_file': is not provided, it is a required parameter."
+}
+
+log.info """
+IAAP_CLI PARAMETER LOG
+
+--comment: ${params.comment}
+
+Results Published to: ${params.output_dir}
+______________________________________________________
+--idat_folder               ${params.idat_folder}
+--output_dir                ${params.output_dir}
+--bpm_file                  ${params.bpm_file}
+--egt_file                  ${params.egt_file}
+-w                          ${workDir}
+--keep_intermediate         ${params.keep_intermediate}
+-c                          ${params.config}
+
+Project Directory: ${projectDir}
+
+Command line call: 
+${workflow.commandLine}
+______________________________________________________
+"""
+}
\ No newline at end of file
diff --git a/workflows/cnv.nf b/workflows/cnv.nf
new file mode 100644
index 0000000..d2b9a0d
--- /dev/null
+++ b/workflows/cnv.nf
@@ -0,0 +1,44 @@
+#!/usr/bin/env nextflow
+nextflow.enable.dsl=2
+
+
+// import modules
+// include {help} from "${projectDir}/bin/help/cnv.nf"
+// include {param_log} from "${projectDir}/bin/log/cnv.nf"
+
+// Parameter validation
+if (!params.idat_folder || !params.output_dir || !params.bpm_file || !params.egt_file) {
+    exit 1, "All parameters (idat_folder, output_dir, bpm_file, egt_file) are required."
+}
+
+// main workflow
+process IAAP_CLI {
+
+    // container 'quay.io/jaxcompsci/gtc2vcf_with_tools:v2'
+    // errorStrategy 'finish'
+
+    input:
+    path idat_folder from params.idat_folder
+    path output_dir from params.output_dir
+    path bpm_file from params.bpm_file
+    path egt_file from params.egt_file
+
+    script:
+    """
+    mkdir -p $output_dir
+    chmod a+w $output_dir
+
+    echo "Running IAAP_CLI with BPM file: $bpm_file and EGT file: $egt_file" > $output_dir/iaap_cli.log
+
+    /usr/local/bin/iaap-cli/iaap-cli gencall \
+        $bpm_file \
+        $egt_file \
+        $output_dir \
+        --idat-folder $idat_folder \
+        --output-gtc >> $output_dir/iaap_cli.log 2>&1
+    """
+}
+
+workflow {
+    IAAP_CLI()
+}
\ No newline at end of file

From a0cb6180c618412e9ed23c2de0c000eaaf2a5178 Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner121.sumner2.jax.org>
Date: Mon, 22 Jul 2024 11:54:05 -0400
Subject: [PATCH 03/26] adding log files and main.nf

---
 bin/help/cnv.nf  | 13 -----------
 main.nf          |  5 +++++
 workflows/cnv.nf | 58 +++++++++++++++++-------------------------------
 3 files changed, 25 insertions(+), 51 deletions(-)
 delete mode 100644 bin/help/cnv.nf

diff --git a/bin/help/cnv.nf b/bin/help/cnv.nf
deleted file mode 100644
index 45e9c16..0000000
--- a/bin/help/cnv.nf
+++ /dev/null
@@ -1,13 +0,0 @@
-def help() {
-    println '''
-Parameter | Default | Description
-
---idat_folder | /<PATH> | The directory containing IDAT files.
---output_dir | /<PATH> | The directory to store the output files.
---bpm_file | /<PATH> | The path to the BPM file.
---egt_file | /<PATH> | The path to the EGT file.
--w | /<PATH> | The directory for intermediary files and Nextflow processes. This directory can become quite large. Ensure ample storage.
---help | false | Print this help message and exit.
-'''
-}
-
diff --git a/main.nf b/main.nf
index f1a6f0c..f8b2143 100644
--- a/main.nf
+++ b/main.nf
@@ -103,4 +103,9 @@ workflow{
   if (params.workflow == "amplicon"){
     AMPLICON()
   }
+  if (params.workflow == "cnv_array"){
+
+    CNV_ARRAY()
+
+  }
 }
diff --git a/workflows/cnv.nf b/workflows/cnv.nf
index d2b9a0d..ec53547 100644
--- a/workflows/cnv.nf
+++ b/workflows/cnv.nf
@@ -1,44 +1,26 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl=2
 
-
-// import modules
-// include {help} from "${projectDir}/bin/help/cnv.nf"
-// include {param_log} from "${projectDir}/bin/log/cnv.nf"
-
-// Parameter validation
-if (!params.idat_folder || !params.output_dir || !params.bpm_file || !params.egt_file) {
-    exit 1, "All parameters (idat_folder, output_dir, bpm_file, egt_file) are required."
+// Import modules
+include { IAAP_CLI } from "${projectDir}/modules/illumina/iaap_cli"
+include { help } from "${projectDir}/bin/help/cnv.nf"
+include { param_log } from "${projectDir}/bin/log/cnv.nf"
+
+// Help if needed
+if (params.help) {
+    help()
+    exit 0
 }
-
-// main workflow
-process IAAP_CLI {
-
-    // container 'quay.io/jaxcompsci/gtc2vcf_with_tools:v2'
-    // errorStrategy 'finish'
-
-    input:
-    path idat_folder from params.idat_folder
-    path output_dir from params.output_dir
-    path bpm_file from params.bpm_file
-    path egt_file from params.egt_file
-
-    script:
-    """
-    mkdir -p $output_dir
-    chmod a+w $output_dir
-
-    echo "Running IAAP_CLI with BPM file: $bpm_file and EGT file: $egt_file" > $output_dir/iaap_cli.log
-
-    /usr/local/bin/iaap-cli/iaap-cli gencall \
-        $bpm_file \
-        $egt_file \
-        $output_dir \
-        --idat-folder $idat_folder \
-        --output-gtc >> $output_dir/iaap_cli.log 2>&1
-    """
+// Log parameter info
+param_log()
+// Parameter validation
+if (!params.idat_folder || !params.bpm_file || !params.egt_file) {
+    exit 1, "All parameters (idat_folder, bpm_file, egt_file) are required."
 }
-
-workflow {
-    IAAP_CLI()
+errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
+// Main workflow
+workflow CNV_ARRAY {
+    IAAP_CLI(
+        idat_folder: params.idat_folder
+    )
 }
\ No newline at end of file

From 3e53bec2e3d41f560ad0a70c0990081d3da45f97 Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner121.sumner2.jax.org>
Date: Mon, 22 Jul 2024 11:58:00 -0400
Subject: [PATCH 04/26] config files and log files

---
 bin/help/cnv_array.nf        | 13 +++++++
 config/cnv_array.config      | 75 ++++++++++++++++++++++++++++++++++++
 modules/illumina/iaap_cli.nf | 39 +++++++++++++++++++
 3 files changed, 127 insertions(+)
 create mode 100644 bin/help/cnv_array.nf
 create mode 100644 config/cnv_array.config
 create mode 100644 modules/illumina/iaap_cli.nf

diff --git a/bin/help/cnv_array.nf b/bin/help/cnv_array.nf
new file mode 100644
index 0000000..45e9c16
--- /dev/null
+++ b/bin/help/cnv_array.nf
@@ -0,0 +1,13 @@
+def help() {
+    println '''
+Parameter | Default | Description
+
+--idat_folder | /<PATH> | The directory containing IDAT files.
+--output_dir | /<PATH> | The directory to store the output files.
+--bpm_file | /<PATH> | The path to the BPM file.
+--egt_file | /<PATH> | The path to the EGT file.
+-w | /<PATH> | The directory for intermediary files and Nextflow processes. This directory can become quite large. Ensure ample storage.
+--help | false | Print this help message and exit.
+'''
+}
+
diff --git a/config/cnv_array.config b/config/cnv_array.config
new file mode 100644
index 0000000..0bc3be5
--- /dev/null
+++ b/config/cnv_array.config
@@ -0,0 +1,75 @@
+//====================  Nextflow/Container Config  ==========
+
+manifest {
+    name = "iaap_cli"
+    description = 'Pipeline for processing of IAAP CLI samples.'
+    author = 'Your Name, Your Organization'
+    version = "0.1.0"
+}
+
+params {
+
+    // Shared params
+    idat_folder = null
+    bpm_file = null
+    egt_file = null
+    output_dir = './'  // Default to current directory
+    organize_by = 'idat'  // Organize by idat folder
+    pubdir = './results'  // Default publication directory
+
+    // Tool-specific params
+    iaap_cli_version = 'v2'
+    container = "quay.io/jaxcompsci/gtc2vcf_with_tools:${params.iaap_cli_version}"
+    mem_threshold = 60.GB
+    low_memory = 8.GB
+    high_memory = 24.GB
+    low_time = '03:00:00'
+    high_time = '12:00:00'
+}
+
+process {
+    withName: IAAP_CLI {
+        cpus = 4
+        memory { params.idat_folder.size() < params.mem_threshold ? params.low_memory : params.high_memory }
+        time { params.idat_folder.size() < params.mem_threshold ? params.low_time : params.high_time }
+        container = params.container
+        errorStrategy {
+            (task.exitStatus == 140) ? {
+                log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"
+                return 'finish'
+            }.call() : 'finish'
+        }
+    }
+}
+
+// Default configuration
+executor {
+    name = 'local'
+    cpus = 4
+    memory = '8 GB'
+}
+
+docker {
+    enabled = true
+}
+
+report {
+    enabled = true
+    file = "${params.pubdir}/report.html"
+}
+
+timeline {
+    enabled = true
+    file = "${params.pubdir}/timeline.html"
+}
+
+trace {
+    enabled = true
+    file = "${params.pubdir}/trace.txt"
+}
+
+params {
+    // Log parameter information
+    def param_log = { log.info("Parameter: ${it.key} = ${it.value}") }
+    params.each(param_log)
+}
\ No newline at end of file
diff --git a/modules/illumina/iaap_cli.nf b/modules/illumina/iaap_cli.nf
new file mode 100644
index 0000000..c4927f8
--- /dev/null
+++ b/modules/illumina/iaap_cli.nf
@@ -0,0 +1,39 @@
+process IAAP_CLI {
+  tag "$idat_folder"
+  cpus = 4
+  memory { idat_folder.size() < 60.GB ? 8.GB : 24.GB }
+  time { idat_folder.size() < 60.GB ? '03:00:00' : '12:00:00' }
+  errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.mem} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
+  
+  container 'quay.io/jaxcompsci/gtc2vcf_with_tools:v2'
+
+  publishDir "${params.pubdir}/${params.organize_by=='idat' ? "$idat_folder" + '/results' : 'iaap_cli'}", pattern:"*.log", mode:'copy'
+
+  input:
+  path idat_folder from params.idat_folder
+  path output_dir from params.output_dir
+  path bpm_file from params.bpm_file
+  path egt_file from params.egt_file
+
+  output:
+  path "$output_dir/iaap_cli.log", emit: iaap_cli_log
+
+  script:
+  """
+  mkdir -p $output_dir
+  chmod a+w $output_dir
+
+  echo "Running IAAP_CLI with BPM file: $bpm_file and EGT file: $egt_file" > $output_dir/iaap_cli.log
+
+  /usr/local/bin/iaap-cli/iaap-cli gencall \
+      $bpm_file \
+      $egt_file \
+      $output_dir \
+      --idat-folder $idat_folder \
+      --output-gtc >> $output_dir/iaap_cli.log 2>&1
+  """
+}
+
+workflow {
+  IAAP_CLI()
+}
\ No newline at end of file

From 9a02b9f72e4d6db36e6a93d752d2f27a9d037119 Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner121.sumner2.jax.org>
Date: Mon, 22 Jul 2024 12:08:33 -0400
Subject: [PATCH 05/26] config changes

---
 config/cnv_array.config | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/config/cnv_array.config b/config/cnv_array.config
index 0bc3be5..b928349 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -3,18 +3,17 @@
 manifest {
     name = "iaap_cli"
     description = 'Pipeline for processing of IAAP CLI samples.'
-    author = 'Your Name, Your Organization'
+    author = 'Tejas Temker, Copyright Jackson Laboratory 2024'
     version = "0.1.0"
 }
 
 params {
 
     // Shared params
-    idat_folder = null
+    idat_folder = '.idat'
     bpm_file = null
     egt_file = null
     output_dir = './'  // Default to current directory
-    organize_by = 'idat'  // Organize by idat folder
     pubdir = './results'  // Default publication directory
 
     // Tool-specific params

From b2b59dc8bb6ac19865badabae72bcacd2ba59734 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Mon, 22 Jul 2024 14:40:07 -0400
Subject: [PATCH 06/26] csv input added, iaap_cli working

---
 bin/help/cnv_array.nf               |  2 -
 bin/log/{cnv.nf => cnv_array.nf}    | 13 +----
 bin/shared/extract_cnv_array_csv.nf | 83 +++++++++++++++++++++++++++++
 config/cnv_array.config             | 71 ++----------------------
 main.nf                             |  4 +-
 modules/illumina/iaap_cli.nf        | 41 ++++++--------
 nextflow.config                     | 41 +++++++-------
 workflows/cnv.nf                    | 26 ---------
 workflows/cnv_array.nf              | 33 ++++++++++++
 9 files changed, 161 insertions(+), 153 deletions(-)
 rename bin/log/{cnv.nf => cnv_array.nf} (69%)
 create mode 100644 bin/shared/extract_cnv_array_csv.nf
 delete mode 100644 workflows/cnv.nf
 create mode 100644 workflows/cnv_array.nf

diff --git a/bin/help/cnv_array.nf b/bin/help/cnv_array.nf
index 45e9c16..1345dd2 100644
--- a/bin/help/cnv_array.nf
+++ b/bin/help/cnv_array.nf
@@ -2,8 +2,6 @@ def help() {
     println '''
 Parameter | Default | Description
 
---idat_folder | /<PATH> | The directory containing IDAT files.
---output_dir | /<PATH> | The directory to store the output files.
 --bpm_file | /<PATH> | The path to the BPM file.
 --egt_file | /<PATH> | The path to the EGT file.
 -w | /<PATH> | The directory for intermediary files and Nextflow processes. This directory can become quite large. Ensure ample storage.
diff --git a/bin/log/cnv.nf b/bin/log/cnv_array.nf
similarity index 69%
rename from bin/log/cnv.nf
rename to bin/log/cnv_array.nf
index 156c1e3..0184cf3 100644
--- a/bin/log/cnv.nf
+++ b/bin/log/cnv_array.nf
@@ -6,14 +6,6 @@ println logo.show()
 
 def param_log(){
 
-if (!params.idat_folder) {
-  error "'--idat_folder': is not provided, it is a required parameter."
-}
-
-if (!params.output_dir) {
-  error "'--output_dir': is not provided, it is a required parameter."
-}
-
 if (!params.bpm_file) {
   error "'--bpm_file': is not provided, it is a required parameter."
 }
@@ -27,10 +19,9 @@ IAAP_CLI PARAMETER LOG
 
 --comment: ${params.comment}
 
-Results Published to: ${params.output_dir}
+Results Published to: ${params.pubdir}
 ______________________________________________________
---idat_folder               ${params.idat_folder}
---output_dir                ${params.output_dir}
+--csv_input                 ${params.csv_input}
 --bpm_file                  ${params.bpm_file}
 --egt_file                  ${params.egt_file}
 -w                          ${workDir}
diff --git a/bin/shared/extract_cnv_array_csv.nf b/bin/shared/extract_cnv_array_csv.nf
new file mode 100644
index 0000000..0411a57
--- /dev/null
+++ b/bin/shared/extract_cnv_array_csv.nf
@@ -0,0 +1,83 @@
+// Function to extract information (meta data + file(s)) from csv file(s)
+// https://github.com/nf-core/sarek/blob/master/workflows/sarek.nf#L1084
+
+ANSI_RED = "\u001B[31m";
+ANSI_RESET = "\u001B[0m";
+
+def extract_csv(csv_file) {
+    // check that the sample sheet is not 1 line or less, because it'll skip all subsequent checks if so.
+    file(csv_file).withReader('UTF-8') { reader ->
+        def line, numberOfLinesInSampleSheet = 0;
+        while ((line = reader.readLine()) != null) {numberOfLinesInSampleSheet++}
+        if (numberOfLinesInSampleSheet < 2) {
+            System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
+            System.err.println(ANSI_RED + "Samplesheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines." + ANSI_RESET)
+            System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
+            System.exit(1)
+        }
+    }
+
+    Channel.from(csv_file).splitCsv(header: true)
+        .map{ row ->
+            if (!(row.sampleID) || !(row.idat_red || !(row.idat_green))){
+                System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
+                System.err.println(ANSI_RED + "Missing field in csv file header. The csv file must have fields: 'sampleID', 'idat_red', 'idat_green'." + ANSI_RESET)
+                System.err.println(ANSI_RED + "Exiting now." + ANSI_RESET)
+                System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
+                System.exit(1)
+            }
+            [row.sampleID.toString(), row]
+        }.groupTuple()
+        .map{ meta, rows ->
+            size = rows.size()
+            [rows, size]
+        }.transpose()
+        .map{ row, numLanes -> //from here do the usual thing for csv parsing
+
+
+        // Metadata to identify samplesheet
+        def meta = [:]
+
+        if (row.sampleID) meta.sampleID = row.sampleID.toString()
+
+        if (row.gender != "XY" && row.gender != "XX" && row.gender != ""){
+            System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
+            System.err.println(ANSI_RED + "Geneder must be 'XX', 'XY' or empty. " + row.gender + " was provided, and isn't valid." + ANSI_RESET)
+            System.err.println(ANSI_RED + "Exiting now." + ANSI_RESET)
+            System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
+            System.exit(1)
+        }
+
+        if (row.gender == "") {
+            meta.gender = 'NA'
+        } else {
+            meta.gender = row.gender.toString()
+        }
+    
+        // join meta to idat, and check file existence. 
+        try {
+            file(row.idat_red, checkIfExists: true)
+        }
+        catch (Exception e) {
+            System.err.println(ANSI_RED + "---------------------------------------------" + ANSI_RESET)
+            System.err.println(ANSI_RED + "The file: " + row.idat_red + " does not exist. Use absolute paths, and check for correctness." + ANSI_RESET)
+            System.err.println(ANSI_RED + "Exiting now." + ANSI_RESET)
+            System.err.println(ANSI_RED + "---------------------------------------------" + ANSI_RESET)
+            System.exit(1)
+        }
+        try {
+            file(row.idat_green, checkIfExists: true)
+        }
+        catch (Exception e) {
+            System.err.println(ANSI_RED + "---------------------------------------------" + ANSI_RESET)
+            System.err.println(ANSI_RED + "The file: " + row.idat_green + " does not exist. Use absolute paths, and check for correctness." + ANSI_RESET)
+            System.err.println(ANSI_RED + "Exiting now." + ANSI_RESET)
+            System.err.println(ANSI_RED + "---------------------------------------------" + ANSI_RESET)
+            System.exit(1)
+        }
+
+        return [meta.sampleID, meta, row.idat_red, row.idat_green]
+
+
+    }
+}
diff --git a/config/cnv_array.config b/config/cnv_array.config
index b928349..26122e2 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -1,74 +1,13 @@
 //====================  Nextflow/Container Config  ==========
 
 manifest {
-    name = "iaap_cli"
-    description = 'Pipeline for processing of IAAP CLI samples.'
-    author = 'Tejas Temker, Copyright Jackson Laboratory 2024'
+    name = "cnv_array"
+    description = 'Pipeline for processing Copy Number Variation from Illumina Genotype Array.'
+    author = 'Tejas Temker, Michael Lloyd, Copyright Jackson Laboratory 2024'
     version = "0.1.0"
 }
 
 params {
-
-    // Shared params
-    idat_folder = '.idat'
-    bpm_file = null
-    egt_file = null
-    output_dir = './'  // Default to current directory
-    pubdir = './results'  // Default publication directory
-
-    // Tool-specific params
-    iaap_cli_version = 'v2'
-    container = "quay.io/jaxcompsci/gtc2vcf_with_tools:${params.iaap_cli_version}"
-    mem_threshold = 60.GB
-    low_memory = 8.GB
-    high_memory = 24.GB
-    low_time = '03:00:00'
-    high_time = '12:00:00'
-}
-
-process {
-    withName: IAAP_CLI {
-        cpus = 4
-        memory { params.idat_folder.size() < params.mem_threshold ? params.low_memory : params.high_memory }
-        time { params.idat_folder.size() < params.mem_threshold ? params.low_time : params.high_time }
-        container = params.container
-        errorStrategy {
-            (task.exitStatus == 140) ? {
-                log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"
-                return 'finish'
-            }.call() : 'finish'
-        }
-    }
-}
-
-// Default configuration
-executor {
-    name = 'local'
-    cpus = 4
-    memory = '8 GB'
-}
-
-docker {
-    enabled = true
+    bpm_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.bpm'
+    egt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L.egt'
 }
-
-report {
-    enabled = true
-    file = "${params.pubdir}/report.html"
-}
-
-timeline {
-    enabled = true
-    file = "${params.pubdir}/timeline.html"
-}
-
-trace {
-    enabled = true
-    file = "${params.pubdir}/trace.txt"
-}
-
-params {
-    // Log parameter information
-    def param_log = { log.info("Parameter: ${it.key} = ${it.value}") }
-    params.each(param_log)
-}
\ No newline at end of file
diff --git a/main.nf b/main.nf
index f8b2143..73a303a 100644
--- a/main.nf
+++ b/main.nf
@@ -49,7 +49,7 @@ else if (params.workflow == "amplicon"){
   include {AMPLICON} from './workflows/amplicon'
 }
 else if (params.workflow == "cnv_array"){
-  include {CNV} from './workflows/cnv_array'
+  include {CNV_ARRAY} from './workflows/cnv_array'
 }
 else {
   // if workflow name is not supported: 
@@ -104,8 +104,6 @@ workflow{
     AMPLICON()
   }
   if (params.workflow == "cnv_array"){
-
     CNV_ARRAY()
-
   }
 }
diff --git a/modules/illumina/iaap_cli.nf b/modules/illumina/iaap_cli.nf
index c4927f8..ad57ed5 100644
--- a/modules/illumina/iaap_cli.nf
+++ b/modules/illumina/iaap_cli.nf
@@ -1,39 +1,32 @@
 process IAAP_CLI {
-  tag "$idat_folder"
+  
+  tag "$sampleID"
+  
   cpus = 4
-  memory { idat_folder.size() < 60.GB ? 8.GB : 24.GB }
-  time { idat_folder.size() < 60.GB ? '03:00:00' : '12:00:00' }
-  errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.mem} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
+  memory 24.GB
+  time '01:30:00'
+
+  errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
   
   container 'quay.io/jaxcompsci/gtc2vcf_with_tools:v2'
 
-  publishDir "${params.pubdir}/${params.organize_by=='idat' ? "$idat_folder" + '/results' : 'iaap_cli'}", pattern:"*.log", mode:'copy'
+  publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'iaap_cli' }", pattern: "*.gtc", mode:'copy'
+  publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'iaap_cli' }", pattern: "*.log", mode:'copy'
 
   input:
-  path idat_folder from params.idat_folder
-  path output_dir from params.output_dir
-  path bpm_file from params.bpm_file
-  path egt_file from params.egt_file
+  tuple val(sampleID), val(meta), path(red_idat), path(green_idat)
 
   output:
-  path "$output_dir/iaap_cli.log", emit: iaap_cli_log
+  tuple val(sampleID), val(meta), path("*.gtc"), emit: gtc
+  path "iaap_cli.log", emit: iaap_cli_log
 
   script:
   """
-  mkdir -p $output_dir
-  chmod a+w $output_dir
-
-  echo "Running IAAP_CLI with BPM file: $bpm_file and EGT file: $egt_file" > $output_dir/iaap_cli.log
-
   /usr/local/bin/iaap-cli/iaap-cli gencall \
-      $bpm_file \
-      $egt_file \
-      $output_dir \
-      --idat-folder $idat_folder \
-      --output-gtc >> $output_dir/iaap_cli.log 2>&1
+      ${params.bpm_file} \
+      ${params.egt_file} \
+      ./ \
+      --idat-folder ./ \
+      --output-gtc >> iaap_cli.log 2>&1
   """
 }
-
-workflow {
-  IAAP_CLI()
-}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 3fdcb69..dcb0c13 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,29 +1,27 @@
-/*___________________________________________________
-
-             Nextflow DSL2 Main Config
-
-  Authors: Anuj Srivastava, Carolyn Paisie, Barry Guglielmo, Michael Lloyd, Brian Sanderson, Sai Lek, Harshpreet Chandok, Peter Fields
-       Copyright of Jackson Laboratories 2022
-
-_____________________________________________________*/
-
 params {
-    // Select workflow
-    workflow = 'rnaseq'
+    // set workflow
+    pipeline = 'Not_Specified'
+    workflow = params.pipeline
+
+    // define reference_cache directory
+    reference_cache='/projects/omics_share'
 
-    // select config from config folder to use
+    // select config from config folder to use based on workflow
     config = "config/${params.workflow}.config"
 
     // set publish directory for data to save (easier to follow)
-    pubdir = "../${workflow}"
+    pubdir = "/flashscratch/${USER}"
+
+    profile = 'sumner2'
 
     // organize output:
     // by sample folders (with many analysis in one sample folder) or by
     // analysis folder (with many samples in one folder per analysis)
-
     organize_by = 'sample' // analysis
     keep_intermediate = false // true
-    
+    fastq2 = true // default is PE for workflows
+    tmpdir = "/flashscratch/${USER}"  // generic param
+
     // get help
     help = null
 
@@ -31,13 +29,12 @@ params {
     comment = ''
 }
 
-// specific config for the pipeline
-
 
 try {
     includeConfig params.config
 } catch (Exception e) {
-    System.err.println("ERROR: Could not load ${params.config} check that you are using a valid pipeline name")
+    System.err.println("ERROR: Could not load ${params.config} check that you are using a valid workflow name")
+    System.exit(1)
 }
 
 // work directory is important as it will be large, plan accordingly
@@ -47,13 +44,15 @@ manifest {
     name = "The Jackson Laboratory Computational Sciences Nextflow based analysis pipelines"
     homePage = "https://github.com/TheJacksonLaboratory/cs-nf-pipelines"
     mainScript = "main.nf"
-    nextflowVersion = "!>=20.10.0"
-    version = "0.4.1"
+    nextflowVersion = "!>=22.04.3"
+    version = "PIVOT"
+    author = 'Michael Lloyd, Brian Sanderson, Barry Guglielmo, Sai Lek, Peter Fields, Harshpreet Chandok, Carolyn Paisie, Gabriel Rech, Ardian Ferraj, Tejas Temker, Anuj Srivastava. Copyright Jackson Laboratory 2024'
 }
 
+
 profiles {
     sumner       { includeConfig "config/profiles/sumner.config" }
-    sumner2       { includeConfig "config/profiles/sumner2.config" }
+    sumner2      { includeConfig "config/profiles/sumner2.config" }
     elion        { includeConfig "config/profiles/elion.config" }
 }
 
diff --git a/workflows/cnv.nf b/workflows/cnv.nf
deleted file mode 100644
index ec53547..0000000
--- a/workflows/cnv.nf
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env nextflow
-nextflow.enable.dsl=2
-
-// Import modules
-include { IAAP_CLI } from "${projectDir}/modules/illumina/iaap_cli"
-include { help } from "${projectDir}/bin/help/cnv.nf"
-include { param_log } from "${projectDir}/bin/log/cnv.nf"
-
-// Help if needed
-if (params.help) {
-    help()
-    exit 0
-}
-// Log parameter info
-param_log()
-// Parameter validation
-if (!params.idat_folder || !params.bpm_file || !params.egt_file) {
-    exit 1, "All parameters (idat_folder, bpm_file, egt_file) are required."
-}
-errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
-// Main workflow
-workflow CNV_ARRAY {
-    IAAP_CLI(
-        idat_folder: params.idat_folder
-    )
-}
\ No newline at end of file
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
new file mode 100644
index 0000000..4ae7dc4
--- /dev/null
+++ b/workflows/cnv_array.nf
@@ -0,0 +1,33 @@
+#!/usr/bin/env nextflow
+nextflow.enable.dsl=2
+
+// Import modules
+include {help} from "${projectDir}/bin/help/cnv_array.nf"
+include {param_log} from "${projectDir}/bin/log/cnv_array.nf"
+include {extract_csv} from "${projectDir}/bin/shared/extract_cnv_array_csv.nf"
+include {IAAP_CLI} from "${projectDir}/modules/illumina/iaap_cli"
+
+// Help if needed
+if (params.help) {
+    help()
+    exit 0
+}
+
+// Log parameter info
+param_log()
+// Parameter validation
+if (!params.bpm_file || !params.egt_file) {
+    exit 1, "All parameters (idat_folder, bpm_file, egt_file) are required."
+}
+
+if (params.csv_input) {
+    ch_input = extract_csv(file(params.csv_input, checkIfExists: true))
+} else {
+    exit 1, "Workflow requires a CSV manifest. See `--help` for information."   
+}
+
+// Main workflow
+workflow CNV_ARRAY {
+    IAAP_CLI(ch_input)
+    IAAP_CLI.out.gtc.view()
+}

From 2a7e256e22670513e893e3706b823ea5b5ca2580 Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner-log4.sumner2.jax.org>
Date: Wed, 24 Jul 2024 11:47:41 -0400
Subject: [PATCH 07/26] BCFtools workflow

---
 bin/help/cnv_array.nf   | 17 +++++++++++++++++
 bin/log/cnv_array.nf    | 39 ++++++++++++++++++++++++++++++++++++++-
 config/cnv_array.config |  5 +++++
 workflows/cnv_array.nf  | 20 ++++++++++++++++++++
 4 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/bin/help/cnv_array.nf b/bin/help/cnv_array.nf
index 1345dd2..55da977 100644
--- a/bin/help/cnv_array.nf
+++ b/bin/help/cnv_array.nf
@@ -6,6 +6,23 @@ Parameter | Default | Description
 --egt_file | /<PATH> | The path to the EGT file.
 -w | /<PATH> | The directory for intermediary files and Nextflow processes. This directory can become quite large. Ensure ample storage.
 --help | false | Print this help message and exit.
+
++gtc2vcf --no-version -Ou \
+--bpm | /<PATH> | The path to the BPM file 
+--csv | /<PATH> | The path to csv file 
+--egt | /<PATH> | The patht to egt file 
+--gtcs | /<PATH> | The path to gtgc output 
+--fasta-ref | /<PATH> | The path to reference 
+--extra | /<PATH> | The path to output directory    
+
+bcftools sort -Ou -T ./bcftools. | \
+bcftools norm --no-version -Ob -c x -f ${fasta} | \
+tee bcftools_convert.bcf | \
+    
+bcftools index --force --output bcftools_convert.bcf.csi
+bcftools convert -O v -o bcftools_convert.vcf bcftools_convert.bcf
+    """
+
 '''
 }
 
diff --git a/bin/log/cnv_array.nf b/bin/log/cnv_array.nf
index 0184cf3..27194f7 100644
--- a/bin/log/cnv_array.nf
+++ b/bin/log/cnv_array.nf
@@ -14,6 +14,18 @@ if (!params.egt_file) {
   error "'--egt_file': is not provided, it is a required parameter."
 }
 
+if (!params.csv_file) {
+  error "'--csv_file': is not provided, it is a required parameter."
+}
+
+if (!params.fasta_file) {
+  error "'--fasta_file': is not provided, it is a required parameter."
+}
+
+if (!params.tsv_file) {
+  error "'--tsv_file': is not provided, it is a required parameter."
+}
+
 log.info """
 IAAP_CLI PARAMETER LOG
 
@@ -34,4 +46,29 @@ Command line call:
 ${workflow.commandLine}
 ______________________________________________________
 """
-}
\ No newline at end of file
+}
+
+
+log.info """
+BCFTOOLS_GTC2VCF PARAMETER LOG
+
+--comment: ${params.comment}
+
+Results Published to: ${params.pubdir}
+______________________________________________________
+--bpm_file                  ${params.bpm_file}
+--csv_file                  ${params.csv_file}
+--egt_file                  ${params.egt_file}
+--fasta_file                ${params.fasta_file}
+--tsv_file                  ${params.tsv_file}
+-w                          ${workDir}
+--keep_intermediate         ${params.keep_intermediate}
+-c                          ${params.config}
+
+Project Directory: ${projectDir}
+
+Command line call: 
+${workflow.commandLine}
+______________________________________________________
+"""
+}
diff --git a/config/cnv_array.config b/config/cnv_array.config
index 26122e2..2fc65ef 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -10,4 +10,9 @@ manifest {
 params {
     bpm_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.bpm'
     egt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L.egt'
+
+    csv_file = 'path/to/default.csv'
+    fasta_file = 'path/to/default.fasta'
+    tsv_file = 'path/to/default.tsv'
+    pubdir = 'path/to/output'
 }
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index 4ae7dc4..46a2ad2 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -6,6 +6,7 @@ include {help} from "${projectDir}/bin/help/cnv_array.nf"
 include {param_log} from "${projectDir}/bin/log/cnv_array.nf"
 include {extract_csv} from "${projectDir}/bin/shared/extract_cnv_array_csv.nf"
 include {IAAP_CLI} from "${projectDir}/modules/illumina/iaap_cli"
+include {BCFTOOLS_GTC2VCF} from "${projectDir}/modules/bcftools_gtc2vcf.nf"
 
 // Help if needed
 if (params.help) {
@@ -30,4 +31,23 @@ if (params.csv_input) {
 workflow CNV_ARRAY {
     IAAP_CLI(ch_input)
     IAAP_CLI.out.gtc.view()
+
+    // Define paths for BCFTOOLS_GTC2VCF inputs
+    bpm_file = file(params.bpm_file)
+    csv_file = file(params.csv_file)
+    egt_file = file(params.egt_file)
+    gtcs_dir = IAAP_CLI.out.gtc
+    fasta_file = file(params.fasta_file)
+    tsv_file = file(params.tsv_file)
+
+    // Call BCFTOOLS_GTC2VCF process
+    BCFTOOLS_GTC2VCF(bpm_file, csv_file, egt_file, gtcs_dir, fasta_file, tsv_file)
+
+    // result.view { files -> 
+    //     println "BCF: ${files[0]}"
+    //     println "CSI: ${files[1]}"
+    //     println "VCF: ${files[2]}"
+    //     println "TSV: ${files[3]}"
+    // }
+    BCFTOOLS_GTC2VCF.out.gtc.view()
 }

From 49e7a63ab6c050c2b6867f54db575576872596aa Mon Sep 17 00:00:00 2001
From: Tejas Temker <tejas.temker@jax.org>
Date: Wed, 24 Jul 2024 16:15:35 +0000
Subject: [PATCH 08/26] cnv_array.nf edited online with Bitbucket

---
 workflows/cnv_array.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index 46a2ad2..b234524 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -6,7 +6,7 @@ include {help} from "${projectDir}/bin/help/cnv_array.nf"
 include {param_log} from "${projectDir}/bin/log/cnv_array.nf"
 include {extract_csv} from "${projectDir}/bin/shared/extract_cnv_array_csv.nf"
 include {IAAP_CLI} from "${projectDir}/modules/illumina/iaap_cli"
-include {BCFTOOLS_GTC2VCF} from "${projectDir}/modules/bcftools_gtc2vcf.nf"
+include {BCFTOOLS_GTC2VCF} from "${projectDir}/modules/bcftools/bcftools_gtct2vcf.nf"
 
 // Help if needed
 if (params.help) {

From edbbb1c7657de67076206831d7c5bf151dd108e1 Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner-log4.sumner2.jax.org>
Date: Wed, 24 Jul 2024 12:22:03 -0400
Subject: [PATCH 09/26] adding bcftools_gtct2vcf.nf file

---
 modules/bcftools/bcftools_gtct2vcf.nf | 34 +++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 modules/bcftools/bcftools_gtct2vcf.nf

diff --git a/modules/bcftools/bcftools_gtct2vcf.nf b/modules/bcftools/bcftools_gtct2vcf.nf
new file mode 100644
index 0000000..70c37df
--- /dev/null
+++ b/modules/bcftools/bcftools_gtct2vcf.nf
@@ -0,0 +1,34 @@
+// bcftools_gtc2vcf.nf
+
+process BCFTOOLS_GTC2VCF {
+      
+    cpus = 4
+    memory 24.GB
+    time '01:30:00'
+    errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
+
+    container 'quay.io/jaxcompsci/gtc2vcf_with_tools:v2'
+    publishDir "${params.pubdir}", mode: 'copy'
+
+    input:
+    tuple path(bpm), path(csv), path(egt), path(gtcs_dir), path(fasta), path(tsv)
+
+    output:
+    tuple path('bcftools_convert.bcf'), path('bcftools_convert.bcf.csi'), path('bcftools_convert.vcf'), path('bcftools_convert.tsv')
+
+    script:
+    """
+    bcftools +gtc2vcf --no-version -Ou \
+    --bpm ${bpm} \
+    --csv ${csv} \
+    --egt ${egt} \
+    --gtcs ${gtcs_dir} \
+    --fasta-ref ${fasta} \
+    --extra ${tsv} | \
+    bcftools sort -Ou -T ./bcftools. | \
+    bcftools norm --no-version -Ob -c x -f ${fasta} | \
+    tee bcftools_convert.bcf | \
+    bcftools index --force --output bcftools_convert.bcf.csi
+    bcftools convert -O v -o bcftools_convert.vcf bcftools_convert.bcf
+    """
+}
\ No newline at end of file

From 6a276cc40feccb538fd355ce1323ff42a703dea1 Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner044.sumner2.jax.org>
Date: Fri, 26 Jul 2024 12:55:03 -0400
Subject: [PATCH 10/26] gtc2vcf files

---
 bin/help/cnv_array.nf                 |  41 +++++----
 bin/log/cnv_array.nf                  | 118 ++++++++++----------------
 config/cnv_array.config               |  11 +--
 modules/bcftools/bcftools_gtct2vcf.nf |  18 +++-
 workflows/cnv_array.nf                |  25 ++----
 5 files changed, 87 insertions(+), 126 deletions(-)

diff --git a/bin/help/cnv_array.nf b/bin/help/cnv_array.nf
index 55da977..2024b45 100644
--- a/bin/help/cnv_array.nf
+++ b/bin/help/cnv_array.nf
@@ -1,28 +1,25 @@
 def help() {
     println '''
-Parameter | Default | Description
+Parameter        | Default | Description
+-----------------|---------|---------------------------------------------------------------------------
+--bpm_file       | /<PATH> | The path to the BPM file.
+--egt_file       | /<PATH> | The path to the EGT file.
+-w               | /<PATH> | The directory for intermediary files and Nextflow processes. Ensure ample storage.
+--help           | false   | Print this help message and exit.
 
---bpm_file | /<PATH> | The path to the BPM file.
---egt_file | /<PATH> | The path to the EGT file.
--w | /<PATH> | The directory for intermediary files and Nextflow processes. This directory can become quite large. Ensure ample storage.
---help | false | Print this help message and exit.
-
-+gtc2vcf --no-version -Ou \
---bpm | /<PATH> | The path to the BPM file 
---csv | /<PATH> | The path to csv file 
---egt | /<PATH> | The patht to egt file 
---gtcs | /<PATH> | The path to gtgc output 
---fasta-ref | /<PATH> | The path to reference 
---extra | /<PATH> | The path to output directory    
-
-bcftools sort -Ou -T ./bcftools. | \
-bcftools norm --no-version -Ob -c x -f ${fasta} | \
-tee bcftools_convert.bcf | \
-    
-bcftools index --force --output bcftools_convert.bcf.csi
-bcftools convert -O v -o bcftools_convert.vcf bcftools_convert.bcf
-    """
+--bpm            | /<PATH> | The path to the BPM file.
+--csv            | /<PATH> | The path to the CSV file.
+--egt            | /<PATH> | The path to the EGT file.
+--gtcs           | /<PATH> | The path to GTC output.
+--fasta-ref      | /<PATH> | The path to the reference FASTA file.
+--extra          | /<PATH> | The path to the output directory.
 
+// Example usage of BCFTOOLS:
+// ---------------------------
+// bcftools sort -Ou -T ./bcftools. | \\
+// bcftools norm --no-version -Ob -c x -f <fasta> | \\
+// tee bcftools_convert.bcf | \\
+// bcftools index --force --output bcftools_convert.bcf.csi
+// bcftools convert -O v -o bcftools_convert.vcf bcftools_convert.bcf
 '''
 }
-
diff --git a/bin/log/cnv_array.nf b/bin/log/cnv_array.nf
index 27194f7..2f9dfbf 100644
--- a/bin/log/cnv_array.nf
+++ b/bin/log/cnv_array.nf
@@ -1,74 +1,46 @@
-import Logos
-
-logo = new Logo()
-println '\n'
-println logo.show()
-
-def param_log(){
-
-if (!params.bpm_file) {
-  error "'--bpm_file': is not provided, it is a required parameter."
-}
-
-if (!params.egt_file) {
-  error "'--egt_file': is not provided, it is a required parameter."
-}
-
-if (!params.csv_file) {
-  error "'--csv_file': is not provided, it is a required parameter."
-}
-
-if (!params.fasta_file) {
-  error "'--fasta_file': is not provided, it is a required parameter."
-}
-
-if (!params.tsv_file) {
-  error "'--tsv_file': is not provided, it is a required parameter."
-}
-
-log.info """
-IAAP_CLI PARAMETER LOG
-
---comment: ${params.comment}
-
-Results Published to: ${params.pubdir}
-______________________________________________________
---csv_input                 ${params.csv_input}
---bpm_file                  ${params.bpm_file}
---egt_file                  ${params.egt_file}
--w                          ${workDir}
---keep_intermediate         ${params.keep_intermediate}
--c                          ${params.config}
-
-Project Directory: ${projectDir}
-
-Command line call: 
-${workflow.commandLine}
-______________________________________________________
-"""
-}
-
-
-log.info """
-BCFTOOLS_GTC2VCF PARAMETER LOG
-
---comment: ${params.comment}
-
-Results Published to: ${params.pubdir}
-______________________________________________________
---bpm_file                  ${params.bpm_file}
---csv_file                  ${params.csv_file}
---egt_file                  ${params.egt_file}
---fasta_file                ${params.fasta_file}
---tsv_file                  ${params.tsv_file}
--w                          ${workDir}
---keep_intermediate         ${params.keep_intermediate}
--c                          ${params.config}
-
-Project Directory: ${projectDir}
-
-Command line call: 
-${workflow.commandLine}
-______________________________________________________
-"""
+def param_log() {
+    // Check required parameters
+    if (!params.bpm_file) {
+        error "'--bpm_file': is not provided, it is a required parameter."
+    }
+
+    if (!params.egt_file) {
+        error "'--egt_file': is not provided, it is a required parameter."
+    }
+
+    if (!params.csv_file) {
+        error "'--csv_file': is not provided, it is a required parameter."
+    }
+
+    if (!params.fasta_file) {
+        error "'--fasta_file': is not provided, it is a required parameter."
+    }
+
+    if (!params.tsv_file) {
+        error "'--tsv_file': is not provided, it is a required parameter."
+    }
+
+    // Log parameter information
+    log.info """
+    CNV_ARRAY PARAMETER LOG
+
+    --comment: ${params.comment ?: 'N/A'}
+
+    Results Published to: ${params.pubdir ?: 'N/A'}
+    ______________________________________________________
+    --idat_folder               ${params.idat_folder ?: 'N/A'}
+    --bpm_file                  ${params.bpm_file}
+    --egt_file                  ${params.egt_file}
+    --csv_file                  ${params.csv_file}
+    --fasta_file                ${params.fasta_file}
+    --tsv_file                  ${params.tsv_file}
+    -w                          ${workDir}
+    --keep_intermediate         ${params.keep_intermediate ?: 'N/A'}
+    -c                          ${params.config ?: 'N/A'}
+    
+    Project Directory: ${projectDir}
+    Command line call: 
+    ${workflow.commandLine}
+    ______________________________________________________
+    """
 }
diff --git a/config/cnv_array.config b/config/cnv_array.config
index 2fc65ef..7bd7d28 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -1,5 +1,3 @@
-//====================  Nextflow/Container Config  ==========
-
 manifest {
     name = "cnv_array"
     description = 'Pipeline for processing Copy Number Variation from Illumina Genotype Array.'
@@ -10,9 +8,8 @@ manifest {
 params {
     bpm_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.bpm'
     egt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L.egt'
-
-    csv_file = 'path/to/default.csv'
-    fasta_file = 'path/to/default.fasta'
-    tsv_file = 'path/to/default.tsv'
-    pubdir = 'path/to/output'
+    idat_folder = '/home/temket/cnv_workflow/data/raw_idat'
+    gtc_file = '/flashscratch/lloydm/CNV_test/example_sample_input.csv'
+    ref_fa = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.csv'
+    tsv_file = '/home/temket/cnv_workflow/output/bcftools_convert.tsv'
 }
diff --git a/modules/bcftools/bcftools_gtct2vcf.nf b/modules/bcftools/bcftools_gtct2vcf.nf
index 70c37df..a0aecf4 100644
--- a/modules/bcftools/bcftools_gtct2vcf.nf
+++ b/modules/bcftools/bcftools_gtct2vcf.nf
@@ -1,7 +1,17 @@
-// bcftools_gtc2vcf.nf
+def prepare_bcftools_inputs(ch_input) {
+    bpm_file = file(params.bpm_file)
+    csv_file = file(params.csv_input)
+    egt_file = file(params.egt_file)
+    gtcs_dir = IAAP_CLI.out.gtc
+    fasta_file = file(params.fasta_file)
+    tsv_file = file(params.tsv_file)
 
+    return tuple(bpm_file, csv_file, egt_file, gtcs_dir, fasta_file, tsv_file)
+}
+
+
+// Define BCFTOOLS_GTC2VCF process
 process BCFTOOLS_GTC2VCF {
-      
     cpus = 4
     memory 24.GB
     time '01:30:00'
@@ -11,7 +21,7 @@ process BCFTOOLS_GTC2VCF {
     publishDir "${params.pubdir}", mode: 'copy'
 
     input:
-    tuple path(bpm), path(csv), path(egt), path(gtcs_dir), path(fasta), path(tsv)
+    tuple path(bpm_file), path(csv_file), path(egt_file), path(gtcs_dir), path(fasta_file), path(tsv_file) from prepare_bcftools_inputs
 
     output:
     tuple path('bcftools_convert.bcf'), path('bcftools_convert.bcf.csi'), path('bcftools_convert.vcf'), path('bcftools_convert.tsv')
@@ -31,4 +41,4 @@ process BCFTOOLS_GTC2VCF {
     bcftools index --force --output bcftools_convert.bcf.csi
     bcftools convert -O v -o bcftools_convert.vcf bcftools_convert.bcf
     """
-}
\ No newline at end of file
+}
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index b234524..177872d 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -5,7 +5,7 @@ nextflow.enable.dsl=2
 include {help} from "${projectDir}/bin/help/cnv_array.nf"
 include {param_log} from "${projectDir}/bin/log/cnv_array.nf"
 include {extract_csv} from "${projectDir}/bin/shared/extract_cnv_array_csv.nf"
-include {IAAP_CLI} from "${projectDir}/modules/illumina/iaap_cli"
+include {IAAP_CLI} from "${projectDir}/modules/illumina/iaap_cli.nf"
 include {BCFTOOLS_GTC2VCF} from "${projectDir}/modules/bcftools/bcftools_gtct2vcf.nf"
 
 // Help if needed
@@ -27,27 +27,12 @@ if (params.csv_input) {
     exit 1, "Workflow requires a CSV manifest. See `--help` for information."   
 }
 
+// Extract CSV input
+ch_input = extract_csv(file(params.csv_input, checkIfExists: true))
+
 // Main workflow
 workflow CNV_ARRAY {
     IAAP_CLI(ch_input)
     IAAP_CLI.out.gtc.view()
-
-    // Define paths for BCFTOOLS_GTC2VCF inputs
-    bpm_file = file(params.bpm_file)
-    csv_file = file(params.csv_file)
-    egt_file = file(params.egt_file)
-    gtcs_dir = IAAP_CLI.out.gtc
-    fasta_file = file(params.fasta_file)
-    tsv_file = file(params.tsv_file)
-
-    // Call BCFTOOLS_GTC2VCF process
-    BCFTOOLS_GTC2VCF(bpm_file, csv_file, egt_file, gtcs_dir, fasta_file, tsv_file)
-
-    // result.view { files -> 
-    //     println "BCF: ${files[0]}"
-    //     println "CSI: ${files[1]}"
-    //     println "VCF: ${files[2]}"
-    //     println "TSV: ${files[3]}"
-    // }
-    BCFTOOLS_GTC2VCF.out.gtc.view()
+    BCFTOOLS_GTC2VCF(prepare_bcftools_inputs)
 }

From cf89bbfc0e7cdf9efe9e243e21607c9e33d9b5c8 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Fri, 26 Jul 2024 14:03:21 -0400
Subject: [PATCH 11/26] working through gtc2vcf

---
 bin/log/cnv_array.nf                  | 12 ++++----
 config/cnv_array.config               |  4 +--
 modules/bcftools/bcftools_gtct2vcf.nf | 43 ++++++++++-----------------
 modules/illumina/iaap_cli.nf          |  2 +-
 workflows/cnv_array.nf                |  4 +--
 5 files changed, 27 insertions(+), 38 deletions(-)

diff --git a/bin/log/cnv_array.nf b/bin/log/cnv_array.nf
index 2f9dfbf..75d2990 100644
--- a/bin/log/cnv_array.nf
+++ b/bin/log/cnv_array.nf
@@ -8,12 +8,12 @@ def param_log() {
         error "'--egt_file': is not provided, it is a required parameter."
     }
 
-    if (!params.csv_file) {
-        error "'--csv_file': is not provided, it is a required parameter."
+    if (!params.gtc_csv) {
+        error "'--gtc_csv': is not provided, it is a required parameter."
     }
 
-    if (!params.fasta_file) {
-        error "'--fasta_file': is not provided, it is a required parameter."
+    if (!params.ref_fa) {
+        error "'--ref_fa': is not provided, it is a required parameter."
     }
 
     if (!params.tsv_file) {
@@ -31,8 +31,8 @@ def param_log() {
     --idat_folder               ${params.idat_folder ?: 'N/A'}
     --bpm_file                  ${params.bpm_file}
     --egt_file                  ${params.egt_file}
-    --csv_file                  ${params.csv_file}
-    --fasta_file                ${params.fasta_file}
+    --gtc_csv                   ${params.gtc_csv}
+    --ref_fa                    ${params.ref_fa}
     --tsv_file                  ${params.tsv_file}
     -w                          ${workDir}
     --keep_intermediate         ${params.keep_intermediate ?: 'N/A'}
diff --git a/config/cnv_array.config b/config/cnv_array.config
index 7bd7d28..eb03404 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -9,7 +9,7 @@ params {
     bpm_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.bpm'
     egt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L.egt'
     idat_folder = '/home/temket/cnv_workflow/data/raw_idat'
-    gtc_file = '/flashscratch/lloydm/CNV_test/example_sample_input.csv'
-    ref_fa = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.csv'
+    gtc_csv = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.csv'
+    ref_fa = '/projects/compsci/omics_share/human/GRCh38/genome/sequence/gatk/Homo_sapiens_assembly38.fasta'
     tsv_file = '/home/temket/cnv_workflow/output/bcftools_convert.tsv'
 }
diff --git a/modules/bcftools/bcftools_gtct2vcf.nf b/modules/bcftools/bcftools_gtct2vcf.nf
index a0aecf4..f047a36 100644
--- a/modules/bcftools/bcftools_gtct2vcf.nf
+++ b/modules/bcftools/bcftools_gtct2vcf.nf
@@ -1,44 +1,33 @@
-def prepare_bcftools_inputs(ch_input) {
-    bpm_file = file(params.bpm_file)
-    csv_file = file(params.csv_input)
-    egt_file = file(params.egt_file)
-    gtcs_dir = IAAP_CLI.out.gtc
-    fasta_file = file(params.fasta_file)
-    tsv_file = file(params.tsv_file)
-
-    return tuple(bpm_file, csv_file, egt_file, gtcs_dir, fasta_file, tsv_file)
-}
-
-
 // Define BCFTOOLS_GTC2VCF process
 process BCFTOOLS_GTC2VCF {
-    cpus = 4
+    cpus = 1
     memory 24.GB
     time '01:30:00'
     errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
 
     container 'quay.io/jaxcompsci/gtc2vcf_with_tools:v2'
-    publishDir "${params.pubdir}", mode: 'copy'
+    publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'bcftools' }", mode: 'copy'
 
     input:
-    tuple path(bpm_file), path(csv_file), path(egt_file), path(gtcs_dir), path(fasta_file), path(tsv_file) from prepare_bcftools_inputs
+    tuple val(sampleID), val(meta), path(gtc)
 
     output:
-    tuple path('bcftools_convert.bcf'), path('bcftools_convert.bcf.csi'), path('bcftools_convert.vcf'), path('bcftools_convert.tsv')
-
+    tuple val(sampleID), val(meta), path('*_convert.bcf'), path('*_convert.bcf.csi'), path('*_convert.vcf'), path('*_convert.tsv'), emit: gtc2vcf
+    // tuple val(sampleID), val(meta), path('*.BAF'), path('*.LRR'), emit: baf_lrr
+    
     script:
     """
     bcftools +gtc2vcf --no-version -Ou \
-    --bpm ${bpm} \
-    --csv ${csv} \
-    --egt ${egt} \
-    --gtcs ${gtcs_dir} \
-    --fasta-ref ${fasta} \
-    --extra ${tsv} | \
+    --bpm ${params.bpm_file} \
+    --csv ${params.gtc_csv} \
+    --egt ${params.egt_file} \
+    --gtcs ./ \
+    --fasta-ref ${params.ref_fa} \
+    --extra ${sampleID}_convert.tsv | \
     bcftools sort -Ou -T ./bcftools. | \
-    bcftools norm --no-version -Ob -c x -f ${fasta} | \
-    tee bcftools_convert.bcf | \
-    bcftools index --force --output bcftools_convert.bcf.csi
-    bcftools convert -O v -o bcftools_convert.vcf bcftools_convert.bcf
+    bcftools norm --no-version -Ob -c x -f ${params.ref_fa} | \
+    tee ${sampleID}_convert.bcf | \
+    bcftools index --force --output ${sampleID}_convert.bcf.csi
+    bcftools convert -O v -o ${sampleID}_convert.vcf ${sampleID}_convert.bcf
     """
 }
diff --git a/modules/illumina/iaap_cli.nf b/modules/illumina/iaap_cli.nf
index ad57ed5..8bea6aa 100644
--- a/modules/illumina/iaap_cli.nf
+++ b/modules/illumina/iaap_cli.nf
@@ -2,7 +2,7 @@ process IAAP_CLI {
   
   tag "$sampleID"
   
-  cpus = 4
+  cpus = 1
   memory 24.GB
   time '01:30:00'
 
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index 177872d..ec996a5 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -33,6 +33,6 @@ ch_input = extract_csv(file(params.csv_input, checkIfExists: true))
 // Main workflow
 workflow CNV_ARRAY {
     IAAP_CLI(ch_input)
-    IAAP_CLI.out.gtc.view()
-    BCFTOOLS_GTC2VCF(prepare_bcftools_inputs)
+    BCFTOOLS_GTC2VCF(IAAP_CLI.out.gtc)
+    BCFTOOLS_GTC2VCF.out.gtc2vcf.view()
 }

From 5acfdc0a614b806a27c2629e87d1ba19994fe39b Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner061.sumner2.jax.org>
Date: Tue, 30 Jul 2024 09:47:27 -0400
Subject: [PATCH 12/26] adding bcftools query module

---
 bin/help/cnv_array.nf                 | 7 -------
 config/cnv_array.config               | 5 +++--
 modules/bcftools/bcftools_gtct2vcf.nf | 2 +-
 workflows/cnv_array.nf                | 5 +++++
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/bin/help/cnv_array.nf b/bin/help/cnv_array.nf
index 2024b45..5eef671 100644
--- a/bin/help/cnv_array.nf
+++ b/bin/help/cnv_array.nf
@@ -14,12 +14,5 @@ Parameter        | Default | Description
 --fasta-ref      | /<PATH> | The path to the reference FASTA file.
 --extra          | /<PATH> | The path to the output directory.
 
-// Example usage of BCFTOOLS:
-// ---------------------------
-// bcftools sort -Ou -T ./bcftools. | \\
-// bcftools norm --no-version -Ob -c x -f <fasta> | \\
-// tee bcftools_convert.bcf | \\
-// bcftools index --force --output bcftools_convert.bcf.csi
-// bcftools convert -O v -o bcftools_convert.vcf bcftools_convert.bcf
 '''
 }
diff --git a/config/cnv_array.config b/config/cnv_array.config
index eb03404..7ebb716 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -6,10 +6,11 @@ manifest {
 }
 
 params {
+    gtc_csv = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.csv'
     bpm_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.bpm'
     egt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L.egt'
-    idat_folder = '/home/temket/cnv_workflow/data/raw_idat'
-    gtc_csv = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.csv'
+    idat_folder = '/home/temket/cnv_workflow/data/raw_idat/'
     ref_fa = '/projects/compsci/omics_share/human/GRCh38/genome/sequence/gatk/Homo_sapiens_assembly38.fasta'
     tsv_file = '/home/temket/cnv_workflow/output/bcftools_convert.tsv'
+
 }
diff --git a/modules/bcftools/bcftools_gtct2vcf.nf b/modules/bcftools/bcftools_gtct2vcf.nf
index f047a36..ece9c2c 100644
--- a/modules/bcftools/bcftools_gtct2vcf.nf
+++ b/modules/bcftools/bcftools_gtct2vcf.nf
@@ -13,7 +13,7 @@ process BCFTOOLS_GTC2VCF {
 
     output:
     tuple val(sampleID), val(meta), path('*_convert.bcf'), path('*_convert.bcf.csi'), path('*_convert.vcf'), path('*_convert.tsv'), emit: gtc2vcf
-    // tuple val(sampleID), val(meta), path('*.BAF'), path('*.LRR'), emit: baf_lrr
+
     
     script:
     """
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index ec996a5..1f23504 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -7,6 +7,8 @@ include {param_log} from "${projectDir}/bin/log/cnv_array.nf"
 include {extract_csv} from "${projectDir}/bin/shared/extract_cnv_array_csv.nf"
 include {IAAP_CLI} from "${projectDir}/modules/illumina/iaap_cli.nf"
 include {BCFTOOLS_GTC2VCF} from "${projectDir}/modules/bcftools/bcftools_gtct2vcf.nf"
+include {BCFTOOLS_QUERY_ASCAT} from "${projectDir}/modules/bcftools/bcftools_query_ascat.nf"
+
 
 // Help if needed
 if (params.help) {
@@ -35,4 +37,7 @@ workflow CNV_ARRAY {
     IAAP_CLI(ch_input)
     BCFTOOLS_GTC2VCF(IAAP_CLI.out.gtc)
     BCFTOOLS_GTC2VCF.out.gtc2vcf.view()
+    BCFTOOLS_QUERY_ASCAT(BCFTOOLS_GTC2VCF.out.gtc2vcf)
+        BCFTOOLS_QUERY_ASCAT.out.bcftools_query.view()
+
 }

From bab489c4c53b2efe9fc3f75ec151cfe0a48f1266 Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner061.sumner2.jax.org>
Date: Tue, 30 Jul 2024 10:12:35 -0400
Subject: [PATCH 13/26] module

---
 modules/bcftools/bcftools_query_ascat.nf | 25 ++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 modules/bcftools/bcftools_query_ascat.nf

diff --git a/modules/bcftools/bcftools_query_ascat.nf b/modules/bcftools/bcftools_query_ascat.nf
new file mode 100644
index 0000000..426af84
--- /dev/null
+++ b/modules/bcftools/bcftools_query_ascat.nf
@@ -0,0 +1,25 @@
+process BCFTOOLS_QUERY_ASCAT {
+    cpus 1
+    memory 8.GB
+    time '01:00:00'
+    errorStrategy 'finish'
+
+    container 'quay.io/jaxcompsci/gtc2vcf_with_tools:v2'
+    publishDir "${params.pubdir}/${params.organize_by == 'sample' ? sampleID : 'bcftools'}", mode: 'copy'
+
+    input:
+    tuple val(sampleID), val(meta), path(bcf), path(csi), path(vcf), path(tsv)
+
+    output:
+    tuple val(sampleID), val(meta), path('*_convert.BAF'), path('*_convert.LRR'), emit: bcftools_query
+
+    script:
+    """
+    (bcftools query -l ${sampleID}_convert.bcf | awk 'BEGIN{printf("\\tCHROM\\tPOS");} {printf("\\t%s",\$1);} END{printf("\\n");}'  &&  bcftools query -f '%ID\\t%CHROM\\t%POS[\\t%BAF]\\n' ${sampleID}_convert.bcf) > ${sampleID}_convert.BAF
+    
+    (bcftools query -l ${sampleID}_convert.bcf | awk 'BEGIN{printf("\\tCHROM\\tPOS");} {printf("\\t%s",\$1);} END{printf("\\n");}'  &&  bcftools query -f '%ID\\t%CHROM\\t%POS[\\t%LRR]\\n' ${sampleID}_convert.bcf) > ${sampleID}_convert.LRR
+    
+    sed -i s/chr// ${sampleID}_convert.BAF
+    sed -i s/chr// ${sampleID}_convert.LRR
+    """
+}

From 0d583e9181ef93c181853e0575c077929472cef0 Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner-log3.sumner2.jax.org>
Date: Tue, 30 Jul 2024 15:52:54 -0400
Subject: [PATCH 14/26] commit ascat module

---
 config/cnv_array.config                  | 5 ++++-
 modules/bcftools/bcftools_query_ascat.nf | 1 +
 modules/illumina/iaap_cli.nf             | 1 +
 workflows/cnv_array.nf                   | 8 ++++++--
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/config/cnv_array.config b/config/cnv_array.config
index 7ebb716..09ab81b 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -7,10 +7,13 @@ manifest {
 
 params {
     gtc_csv = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.csv'
+    
     bpm_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.bpm'
     egt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L.egt'
     idat_folder = '/home/temket/cnv_workflow/data/raw_idat/'
     ref_fa = '/projects/compsci/omics_share/human/GRCh38/genome/sequence/gatk/Homo_sapiens_assembly38.fasta'
     tsv_file = '/home/temket/cnv_workflow/output/bcftools_convert.tsv'
-
+    snp_platform = 'IlluminaCytoSNP'
+    GC_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_GCcontent_validSNPloci.txt'
+    RT_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg19.txt'
 }
diff --git a/modules/bcftools/bcftools_query_ascat.nf b/modules/bcftools/bcftools_query_ascat.nf
index 426af84..8916726 100644
--- a/modules/bcftools/bcftools_query_ascat.nf
+++ b/modules/bcftools/bcftools_query_ascat.nf
@@ -12,6 +12,7 @@ process BCFTOOLS_QUERY_ASCAT {
 
     output:
     tuple val(sampleID), val(meta), path('*_convert.BAF'), path('*_convert.LRR'), emit: bcftools_query
+    tuple path('*_convert.BAF'), path('*_convert.LRR'), emit: bafnlrr
 
     script:
     """
diff --git a/modules/illumina/iaap_cli.nf b/modules/illumina/iaap_cli.nf
index 8bea6aa..c1f46b7 100644
--- a/modules/illumina/iaap_cli.nf
+++ b/modules/illumina/iaap_cli.nf
@@ -18,6 +18,7 @@ process IAAP_CLI {
 
   output:
   tuple val(sampleID), val(meta), path("*.gtc"), emit: gtc
+  tuple val(sampleID), val(meta), emit: ascat2r
   path "iaap_cli.log", emit: iaap_cli_log
 
   script:
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index 1f23504..28354e7 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -8,6 +8,7 @@ include {extract_csv} from "${projectDir}/bin/shared/extract_cnv_array_csv.nf"
 include {IAAP_CLI} from "${projectDir}/modules/illumina/iaap_cli.nf"
 include {BCFTOOLS_GTC2VCF} from "${projectDir}/modules/bcftools/bcftools_gtct2vcf.nf"
 include {BCFTOOLS_QUERY_ASCAT} from "${projectDir}/modules/bcftools/bcftools_query_ascat.nf"
+include {ASCAT} from "${projectDir}/modules/r/ASCAT.nf"
 
 
 // Help if needed
@@ -28,6 +29,8 @@ if (params.csv_input) {
 } else {
     exit 1, "Workflow requires a CSV manifest. See `--help` for information."   
 }
+GC_file = file(params.gc_file, checkIfExists: true)
+RT_file = file(params.rt_file, checkIfExists: true)
 
 // Extract CSV input
 ch_input = extract_csv(file(params.csv_input, checkIfExists: true))
@@ -38,6 +41,7 @@ workflow CNV_ARRAY {
     BCFTOOLS_GTC2VCF(IAAP_CLI.out.gtc)
     BCFTOOLS_GTC2VCF.out.gtc2vcf.view()
     BCFTOOLS_QUERY_ASCAT(BCFTOOLS_GTC2VCF.out.gtc2vcf)
-        BCFTOOLS_QUERY_ASCAT.out.bcftools_query.view()
-
+    BCFTOOLS_QUERY_ASCAT.out.bcftools_query.view()
+    ASCAT(IAAP_CLI.out.ascat2r,BCFTOOLS_QUERY_ASCAT.out.bafnlrr, params.platform,GC_file,RT_file)
+    ASCAT.out.ascat.view()
 }

From b8cf928730f6c2de26c6df88e0cbe39e7983088d Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner-log3.sumner2.jax.org>
Date: Tue, 30 Jul 2024 15:55:53 -0400
Subject: [PATCH 15/26] ASCAT

---
 bin/cnv_array/ASCAT_run.R | 103 ++++++++++++++++++++++++++++++++++++++
 modules/r/ASCAT.nf        |  37 ++++++++++++++
 2 files changed, 140 insertions(+)
 create mode 100644 bin/cnv_array/ASCAT_run.R
 create mode 100644 modules/r/ASCAT.nf

diff --git a/bin/cnv_array/ASCAT_run.R b/bin/cnv_array/ASCAT_run.R
new file mode 100644
index 0000000..85f7c29
--- /dev/null
+++ b/bin/cnv_array/ASCAT_run.R
@@ -0,0 +1,103 @@
+suppressMessages(library(ASCAT))
+
+### ASCAT Run ######
+
+# Note: this script expects ASCAT is running on single sample BAF/LRR files.
+
+args=(commandArgs(TRUE))
+
+sampleID = args[1]
+LRR_file = args[2]
+BAF_file = args[3]
+gender   = args[4]
+platform = args[5]
+GC_file  = args[6]
+RT_file  = args[7]
+
+######
+
+# Expected SNP POS file: 
+# Probe Set ID    Chromosome      Physical Position
+# CN_473963       1       61736
+# CN_473964       1       61808
+
+## the above can be taken from the BAF file. The BAF file contains positions for all valid SNPs. 
+SNPpos <- read.table(BAF_file, sep = "\t", header = TRUE)[ ,1:3]
+colnames(SNPpos) <- c('Probe_Set_ID', 'Chromosome', 'Physical_Position')
+
+##
+
+ascat.bc = ascat.loadData(Tumor_LogR_file = LRR_file, Tumor_BAF_file = BAF_file, gender = gender, genomeVersion = "hg38")
+
+ascat.bc$samples[1] <- sampleID
+colnames(ascat.bc[["Tumor_LogR"]]) <- sampleID
+colnames(ascat.bc[["Tumor_BAF"]]) <- sampleID
+
+ascat.plotRawData(ascat.bc, img.prefix = "Before_correction_")
+
+ascat.bc = ascat.correctLogR(ascat.bc, GCcontentfile = GC_file, replictimingfile = RT_file)
+
+ascat.plotRawData(ascat.bc, img.prefix = "After_correction_")
+
+gg = ascat.predictGermlineGenotypes(ascat.bc, platform = platform)
+
+ascat.bc = ascat.aspcf(ascat.bc, ascat.gg = gg)
+
+ascat.plotSegmentedData(ascat.bc)
+
+ascat.output = ascat.runAscat(ascat.bc, write_segments = T)
+
+##
+
+QC = ascat.metrics(ascat.bc, ascat.output)
+
+write.table(as.data.frame(QC), file = paste0(sampleID, "_sample.QC.txt"), sep="\t", quote=F, row.names=F, col.names=T)
+
+save(ascat.bc, ascat.output, QC, file = paste0(sampleID, "_ASCAT_objects.Rdata"))
+
+##
+
+if ( length(ascat.output$failedarrays) == 0 ) {
+  
+  num_probes <- vector(mode="numeric", length=nrow(ascat.output$segments_raw))
+  for (i in 1:nrow(ascat.output$segments_raw)) {
+    L1 = which(SNPpos$Chromosome == ascat.output$segments_raw$chr[i] & SNPpos$Physical_Position == ascat.output$segments_raw$startpos[i])
+    L2 = which(SNPpos$Chromosome ==  ascat.output$segments_raw$chr[i] & SNPpos$Physical_Position == ascat.output$segments_raw$endpos[i])
+    num_probes[i] = L2[length(L2)] - L1[1] + 1
+  }
+  seg_raw = cbind(ascat.output$segments_raw,num_probes)
+  
+  num_probes <- vector(mode="numeric", length=nrow(ascat.output$segments))
+  for (i in 1:nrow(ascat.output$segments)) {
+    
+    #print(i)
+    L1 = which(SNPpos$Chromosome == ascat.output$segments$chr[i] & SNPpos$Physical_Position == ascat.output$segments$startpos[i])
+    L2 = which(SNPpos$Chromosome ==  ascat.output$segments$chr[i] & SNPpos$Physical_Position == ascat.output$segments$endpos[i])
+    num_probes[i] = L2[length(L2)] - L1[1] + 1
+    
+  }
+  seg = cbind(ascat.output$segments,num_probes)
+  
+  seg_raw_dfs <- split(seg_raw, seg_raw$sample)
+  seg_dfs <- split(seg, seg$sample)
+  
+  for (samp in names(seg_raw_dfs)){
+    write.table(seg_raw_dfs[[samp]], file = paste0(samp, ".segments_raw.txt"), sep="\t", quote=F, row.names=F)
+    write.table(seg_dfs[[samp]], file = paste0(samp, ".segments.txt"), sep="\t", quote=F, row.names=F)
+    write.table(as.data.frame(ascat.output$aberrantcellfraction)[row.names(as.data.frame(ascat.output$aberrantcellfraction)) %in% samp,], file=paste(samp,".aberrantcellfraction.txt",sep=""), sep="\t", quote=F, row.names=F, col.names=F)
+    write.table(as.data.frame(ascat.output$ploidy)[row.names(as.data.frame(ascat.output$ploidy)) %in% samp,], file=paste(samp,".ploidy.txt",sep=""), sep="\t", quote=F, row.names=F, col.names=F)
+  }
+  
+} else {
+  
+  write.table(as.data.frame(ascat.output$failedarrays), file="ASCAT.failedarrays.txt", sep="\t", quote=F, row.names=F, col.names=F)
+  
+}
+
+if ( !is.null(ascat.output$nonaberrantarrays) ) {
+  
+  write.table(as.data.frame(ascat.output$nonaberrantarrays), file="ASCAT.nonaberrantarrays.txt", sep="\t", quote=F, row.names=F, col.names=F)
+  
+}
+
+sessionInfo()
\ No newline at end of file
diff --git a/modules/r/ASCAT.nf b/modules/r/ASCAT.nf
new file mode 100644
index 0000000..449f1ef
--- /dev/null
+++ b/modules/r/ASCAT.nf
@@ -0,0 +1,37 @@
+process ASCAT {
+    tag "$sampleID"
+
+    cpus 1
+    memory 24.GB
+    time '01:30:00'
+    errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
+
+
+    container 'quay.io/biocontainers/ascat:3.1.1--r43hdfd78af_1'
+
+    input:
+         val(sampleID),path(LRR),path(BAF),val(gender),val(platform),path(GC_file),path(RT_file)
+
+    output:
+        tuple val(sampleID),
+             path("${sampleID}_sample.QC.txt"),
+             path("${sampleID}_ASCAT_objects.Rdata"),
+             path("${sampleID}.segments_raw.txt"),
+             path("${sampleID}.segments.txt"),
+             path("${sampleID}.aberrantcellfraction.txt"),
+             path("${sampleID}.ploidy.txt"),
+             path("ASCAT.failedarrays.txt", optional: true),
+             path("ASCAT.nonaberrantarrays.txt", optional: true), emit: ascat
+
+    script:
+        """
+        Rscript ${projectDir}/bin/cnv_array/ASCAT_run.R \
+            ${sampleID} \
+            ${LRR} \
+            ${BAF} \
+            ${meta.gender} \
+            ${params.snp_platform} \
+            ${params.GC_file} \
+            ${params.RT_file}
+        """
+}
\ No newline at end of file

From 6c90a3547c50999794bcbfc240e035b365150b62 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Tue, 30 Jul 2024 16:19:00 -0400
Subject: [PATCH 16/26] ascat script error yet to be resolved

---
 config/cnv_array.config                  | 4 ++--
 modules/bcftools/bcftools_query_ascat.nf | 3 +--
 modules/r/ASCAT.nf                       | 8 ++++----
 workflows/cnv_array.nf                   | 4 +---
 4 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/config/cnv_array.config b/config/cnv_array.config
index 09ab81b..3612960 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -14,6 +14,6 @@ params {
     ref_fa = '/projects/compsci/omics_share/human/GRCh38/genome/sequence/gatk/Homo_sapiens_assembly38.fasta'
     tsv_file = '/home/temket/cnv_workflow/output/bcftools_convert.tsv'
     snp_platform = 'IlluminaCytoSNP'
-    GC_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_GCcontent_validSNPloci.txt'
-    RT_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg19.txt'
+    gc_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_GCcontent_validSNPloci.txt'
+    rt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg19.txt'
 }
diff --git a/modules/bcftools/bcftools_query_ascat.nf b/modules/bcftools/bcftools_query_ascat.nf
index 8916726..4981fde 100644
--- a/modules/bcftools/bcftools_query_ascat.nf
+++ b/modules/bcftools/bcftools_query_ascat.nf
@@ -11,8 +11,7 @@ process BCFTOOLS_QUERY_ASCAT {
     tuple val(sampleID), val(meta), path(bcf), path(csi), path(vcf), path(tsv)
 
     output:
-    tuple val(sampleID), val(meta), path('*_convert.BAF'), path('*_convert.LRR'), emit: bcftools_query
-    tuple path('*_convert.BAF'), path('*_convert.LRR'), emit: bafnlrr
+    tuple val(sampleID), val(meta), path('*_convert.BAF'), path('*_convert.LRR'), emit: baf_lrr
 
     script:
     """
diff --git a/modules/r/ASCAT.nf b/modules/r/ASCAT.nf
index 449f1ef..fca6bd9 100644
--- a/modules/r/ASCAT.nf
+++ b/modules/r/ASCAT.nf
@@ -10,7 +10,7 @@ process ASCAT {
     container 'quay.io/biocontainers/ascat:3.1.1--r43hdfd78af_1'
 
     input:
-         val(sampleID),path(LRR),path(BAF),val(gender),val(platform),path(GC_file),path(RT_file)
+        tuple val(sampleID), val(meta), path(LRR), path(BAF)
 
     output:
         tuple val(sampleID),
@@ -31,7 +31,7 @@ process ASCAT {
             ${BAF} \
             ${meta.gender} \
             ${params.snp_platform} \
-            ${params.GC_file} \
-            ${params.RT_file}
+            ${params.gc_file} \
+            ${params.rt_file}
         """
-}
\ No newline at end of file
+}
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index 28354e7..1156cf1 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -39,9 +39,7 @@ ch_input = extract_csv(file(params.csv_input, checkIfExists: true))
 workflow CNV_ARRAY {
     IAAP_CLI(ch_input)
     BCFTOOLS_GTC2VCF(IAAP_CLI.out.gtc)
-    BCFTOOLS_GTC2VCF.out.gtc2vcf.view()
     BCFTOOLS_QUERY_ASCAT(BCFTOOLS_GTC2VCF.out.gtc2vcf)
-    BCFTOOLS_QUERY_ASCAT.out.bcftools_query.view()
-    ASCAT(IAAP_CLI.out.ascat2r,BCFTOOLS_QUERY_ASCAT.out.bafnlrr, params.platform,GC_file,RT_file)
+    ASCAT(BCFTOOLS_QUERY_ASCAT.out.baf_lrr)
     ASCAT.out.ascat.view()
 }

From 232f294be3e24d697ff968dec8b915bc994b2428 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Wed, 31 Jul 2024 11:07:29 -0400
Subject: [PATCH 17/26] ascat update

---
 config/cnv_array.config                  |  2 +-
 modules/bcftools/bcftools_gtct2vcf.nf    |  4 +++-
 modules/bcftools/bcftools_query_ascat.nf |  3 +++
 modules/r/ASCAT.nf                       | 16 ++++++----------
 workflows/cnv_array.nf                   |  2 +-
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/config/cnv_array.config b/config/cnv_array.config
index 3612960..4b2cfb3 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -15,5 +15,5 @@ params {
     tsv_file = '/home/temket/cnv_workflow/output/bcftools_convert.tsv'
     snp_platform = 'IlluminaCytoSNP'
     gc_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_GCcontent_validSNPloci.txt'
-    rt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg19.txt'
+    rt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg38.txt'
 }
diff --git a/modules/bcftools/bcftools_gtct2vcf.nf b/modules/bcftools/bcftools_gtct2vcf.nf
index ece9c2c..a2c8155 100644
--- a/modules/bcftools/bcftools_gtct2vcf.nf
+++ b/modules/bcftools/bcftools_gtct2vcf.nf
@@ -1,5 +1,7 @@
-// Define BCFTOOLS_GTC2VCF process
 process BCFTOOLS_GTC2VCF {
+    
+    tag "$sampleID"
+
     cpus = 1
     memory 24.GB
     time '01:30:00'
diff --git a/modules/bcftools/bcftools_query_ascat.nf b/modules/bcftools/bcftools_query_ascat.nf
index 4981fde..8d44824 100644
--- a/modules/bcftools/bcftools_query_ascat.nf
+++ b/modules/bcftools/bcftools_query_ascat.nf
@@ -1,4 +1,7 @@
 process BCFTOOLS_QUERY_ASCAT {
+    
+    tag "$sampleID"
+
     cpus 1
     memory 8.GB
     time '01:00:00'
diff --git a/modules/r/ASCAT.nf b/modules/r/ASCAT.nf
index fca6bd9..a9399aa 100644
--- a/modules/r/ASCAT.nf
+++ b/modules/r/ASCAT.nf
@@ -6,22 +6,18 @@ process ASCAT {
     time '01:30:00'
     errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
 
+    container 'quay.io/jaxcompsci/ascat:v3.1.3'
 
-    container 'quay.io/biocontainers/ascat:3.1.1--r43hdfd78af_1'
+    publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'ascat' }", mode: 'copy'
 
     input:
         tuple val(sampleID), val(meta), path(LRR), path(BAF)
 
     output:
-        tuple val(sampleID),
-             path("${sampleID}_sample.QC.txt"),
-             path("${sampleID}_ASCAT_objects.Rdata"),
-             path("${sampleID}.segments_raw.txt"),
-             path("${sampleID}.segments.txt"),
-             path("${sampleID}.aberrantcellfraction.txt"),
-             path("${sampleID}.ploidy.txt"),
-             path("ASCAT.failedarrays.txt", optional: true),
-             path("ASCAT.nonaberrantarrays.txt", optional: true), emit: ascat
+        tuple val(sampleID), val(meta), path("*.txt"), emit: all_txt
+        tuple val(sampleID), val(meta), path("*.png"), emit: all_png
+        tuple val(sampleID), val(meta), path("*.Rdata"), emit: ascat_rdata
+        tuple val(sampleID), val(meta), path("*segments_raw.txt"), path("*.ploidy.txt"), emit: seg_ploidy
 
     script:
         """
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index 1156cf1..31bce92 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -41,5 +41,5 @@ workflow CNV_ARRAY {
     BCFTOOLS_GTC2VCF(IAAP_CLI.out.gtc)
     BCFTOOLS_QUERY_ASCAT(BCFTOOLS_GTC2VCF.out.gtc2vcf)
     ASCAT(BCFTOOLS_QUERY_ASCAT.out.baf_lrr)
-    ASCAT.out.ascat.view()
+    ASCAT.out.seg_ploidy.view()
 }

From 85195700bce4bde78eecb9be89e7dfd8b692cb33 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Wed, 31 Jul 2024 11:29:58 -0400
Subject: [PATCH 18/26] ascat working

---
 bin/cnv_array/ASCAT_run.R | 4 ++--
 modules/r/ASCAT.nf        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/bin/cnv_array/ASCAT_run.R b/bin/cnv_array/ASCAT_run.R
index 85f7c29..bbbf444 100644
--- a/bin/cnv_array/ASCAT_run.R
+++ b/bin/cnv_array/ASCAT_run.R
@@ -7,8 +7,8 @@ suppressMessages(library(ASCAT))
 args=(commandArgs(TRUE))
 
 sampleID = args[1]
-LRR_file = args[2]
-BAF_file = args[3]
+BAF_file = args[2]
+LRR_file = args[3]
 gender   = args[4]
 platform = args[5]
 GC_file  = args[6]
diff --git a/modules/r/ASCAT.nf b/modules/r/ASCAT.nf
index a9399aa..4725ade 100644
--- a/modules/r/ASCAT.nf
+++ b/modules/r/ASCAT.nf
@@ -11,7 +11,7 @@ process ASCAT {
     publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'ascat' }", mode: 'copy'
 
     input:
-        tuple val(sampleID), val(meta), path(LRR), path(BAF)
+        tuple val(sampleID), val(meta), path(BAF), path(LRR)
 
     output:
         tuple val(sampleID), val(meta), path("*.txt"), emit: all_txt
@@ -23,8 +23,8 @@ process ASCAT {
         """
         Rscript ${projectDir}/bin/cnv_array/ASCAT_run.R \
             ${sampleID} \
-            ${LRR} \
             ${BAF} \
+            ${LRR} \
             ${meta.gender} \
             ${params.snp_platform} \
             ${params.gc_file} \

From 109ad21c940e3aa609bfecb423b572d2d3fafaff Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner015.sumner2.jax.org>
Date: Thu, 1 Aug 2024 15:44:26 -0400
Subject: [PATCH 19/26] Annotations module files

---
 config/cnv_array.config                     |  6 +++--
 modules/utility_modules/ascat_annotation.nf | 25 +++++++++++++++++++++
 workflows/cnv_array.nf                      |  5 ++++-
 3 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 modules/utility_modules/ascat_annotation.nf

diff --git a/config/cnv_array.config b/config/cnv_array.config
index 4b2cfb3..ee2b95d 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -14,6 +14,8 @@ params {
     ref_fa = '/projects/compsci/omics_share/human/GRCh38/genome/sequence/gatk/Homo_sapiens_assembly38.fasta'
     tsv_file = '/home/temket/cnv_workflow/output/bcftools_convert.tsv'
     snp_platform = 'IlluminaCytoSNP'
-    gc_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_GCcontent_validSNPloci.txt'
-    rt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg38.txt'
+    GC_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_GCcontent_validSNPloci.txt'
+    RT_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg38.txt'
+    chrArm = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/GRCh38_chromosome_arm.txt'
+    cnvGeneFile = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/biomaRt_GRCh38_ensemblv102_CNVgeneAnnotations_primaryChroms.txt'
 }
diff --git a/modules/utility_modules/ascat_annotation.nf b/modules/utility_modules/ascat_annotation.nf
new file mode 100644
index 0000000..289c1eb
--- /dev/null
+++ b/modules/utility_modules/ascat_annotation.nf
@@ -0,0 +1,25 @@
+process ASCAT_ANNOTATION {
+
+    tag "$sampleID"
+
+    cpus = 1
+    memory = 24.GB
+    time = '01:30:00'
+    errorStrategy = { (task.exitStatus == 140) ? { log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish' }.call() : 'finish' }
+
+    container 'quay.io/biocontainers/ascat:3.1.1--r43hdfd78af_1'
+    publishDir "${params.pubdir}/${params.organize_by == 'sample' ? sampleID : 'ascat_annotation'}", mode: 'copy'
+
+    input:
+    tuple val(sampleID), val(meta), path(segments_raw), path(ploidy)
+
+    output:
+    tuple val(sampleID), val(meta), path("${sampleID}.segments_raw.extend.txt"), path("${sampleID}.*"), emit: ascat_annotated
+
+    script:
+    """
+    perl \${projectDir}/bin/cnv_array/${sampleID}.segment_raw_extend.pl ${segments_raw} ${ploidy} ${params.chrArm} ${meta}
+    perl \${projectDir}/bin/cnv_array/annotate_ensembl_genes.pl ${sampleID}.segments_raw.extend.txt ${params.cnvGeneFile}
+    R CMD BATCH --slave "--args ${sampleID}.segments_raw.extend.txt ${sampleID} ./ " \${projectDir}/seg_plot.R
+    """
+}
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index 31bce92..4151078 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -9,6 +9,7 @@ include {IAAP_CLI} from "${projectDir}/modules/illumina/iaap_cli.nf"
 include {BCFTOOLS_GTC2VCF} from "${projectDir}/modules/bcftools/bcftools_gtct2vcf.nf"
 include {BCFTOOLS_QUERY_ASCAT} from "${projectDir}/modules/bcftools/bcftools_query_ascat.nf"
 include {ASCAT} from "${projectDir}/modules/r/ASCAT.nf"
+include {ASCAT_ANNOTATION} from "${projectDir}/modules/utility_modules/ascat_annotation.nf"
 
 
 // Help if needed
@@ -29,6 +30,7 @@ if (params.csv_input) {
 } else {
     exit 1, "Workflow requires a CSV manifest. See `--help` for information."   
 }
+
 GC_file = file(params.gc_file, checkIfExists: true)
 RT_file = file(params.rt_file, checkIfExists: true)
 
@@ -41,5 +43,6 @@ workflow CNV_ARRAY {
     BCFTOOLS_GTC2VCF(IAAP_CLI.out.gtc)
     BCFTOOLS_QUERY_ASCAT(BCFTOOLS_GTC2VCF.out.gtc2vcf)
     ASCAT(BCFTOOLS_QUERY_ASCAT.out.baf_lrr)
-    ASCAT.out.seg_ploidy.view()
+    ASCAT_ANNOTATION(ASCAT.out.seg_ploidy)
+    ASCAT_ANNOTATION.out.ascat_annotated.view()
 }

From 103925867fa8a5d109493b179e62a1e0fdf442b3 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Fri, 2 Aug 2024 12:06:25 -0400
Subject: [PATCH 20/26] cnv array working

---
 bin/cnv_array/ASCAT_run.R                   |   4 +
 bin/cnv_array/annotate_ensembl_genes.pl     | 132 +++++++++
 bin/cnv_array/seg_plot.R                    | 117 ++++++++
 bin/cnv_array/segment_raw_extend.pl         | 288 ++++++++++++++++++++
 config/cnv_array.config                     |   6 +-
 modules/utility_modules/ascat_annotation.nf |  14 +-
 workflows/cnv_array.nf                      |   1 -
 7 files changed, 552 insertions(+), 10 deletions(-)
 create mode 100644 bin/cnv_array/annotate_ensembl_genes.pl
 create mode 100644 bin/cnv_array/seg_plot.R
 create mode 100644 bin/cnv_array/segment_raw_extend.pl

diff --git a/bin/cnv_array/ASCAT_run.R b/bin/cnv_array/ASCAT_run.R
index bbbf444..6911201 100644
--- a/bin/cnv_array/ASCAT_run.R
+++ b/bin/cnv_array/ASCAT_run.R
@@ -27,6 +27,10 @@ colnames(SNPpos) <- c('Probe_Set_ID', 'Chromosome', 'Physical_Position')
 
 ##
 
+if (gender == 'NA') {
+  gender = 'XY'
+}
+
 ascat.bc = ascat.loadData(Tumor_LogR_file = LRR_file, Tumor_BAF_file = BAF_file, gender = gender, genomeVersion = "hg38")
 
 ascat.bc$samples[1] <- sampleID
diff --git a/bin/cnv_array/annotate_ensembl_genes.pl b/bin/cnv_array/annotate_ensembl_genes.pl
new file mode 100644
index 0000000..7b96f7b
--- /dev/null
+++ b/bin/cnv_array/annotate_ensembl_genes.pl
@@ -0,0 +1,132 @@
+#!/usr/bin/perl -w
+use POSIX;
+use File::Basename;
+
+# This script annotates ensembl genes with copy number and breakpoints
+# perl ensemblegenes_cnv_break.pl *.segments_raw.extend.txt mart_export_gene_chr1-Y.hg19ensembl75-85.08232016.txt
+
+if ($#ARGV != 1) {
+	print "This scripts requires: <file_cn> <file_gene> \n";
+	exit(-1);
+}
+
+$file_cn = $ARGV[0];		
+$file_gene = $ARGV[1];
+
+$file_output = basename($file_cn,".txt").".ensgene_cnvbreak.txt";
+open(OUTFILE, ">$file_output");
+
+open(GENEFILE, "$file_gene") or die "can't open $file_gene: $!";
+$gene = <GENEFILE>;
+chomp($gene);
+
+open(CNFILE, "$file_cn") or die "can't open $file_cn: $!";
+@data = <CNFILE>;
+close(CNFILE);
+chomp(@data);
+
+#print OUTFILE "$tmp\tstartext\tendext\tstartext_desc\tendext_desc\tCN_raw\tLOH\tparm_fraction\tqarm_fraction\tploidy\tcopydiff_2\tcopydiff_ploidy\tlogratio_2\tlogratio_ploidy\n";
+print OUTFILE "$gene\tnum_cnv_seg\tseg_desc\tploidy\tnMajor\tnMinor\tnAraw\tnBraw\tCN_raw\tLOH\tcopydiff_2\tcopydiff_ploidy\tlogratio_2\tlogratio_ploidy\tnMajor_max\tnMinor_max\tnAraw_max\tnBraw_max\tCN_raw_max\tLOH_max\tcopydiff_2_max\tcopydiff_ploidy_max\tlogratio_2_max\tlogratio_ploidy_max\n";
+
+while ($gene = <GENEFILE>) {
+    
+    chomp($gene);
+    @line = split(/\t/, $gene);
+    $chr = $line[2];
+    $start = $line[3];
+    $end = $line[4];
+    
+    #$cnraw1=999;
+    $numseg=0;
+    $region="";
+    %segline = ();
+    @n = ();
+    
+    for ($j=1; $j<=$#data; $j++) {
+        @segment = split(/\t/, $data[$j]);
+        
+        $chr_cn = $segment[1];
+        $pos1 = $segment[2];
+        $pos2 = $segment[3];
+        $pos1ext = $segment[9];
+        $pos2ext = $segment[10];
+        $left = $segment[11];
+        $right = $segment[12];
+        $cnraw = $segment[13];
+        
+        if (($chr_cn eq $chr) && ($start <= $pos2ext) && ($end >= $pos1ext)) { #overlap
+            #$numseg++;
+            push(@n, $cnraw);
+            $segline{$cnraw} = [ @segment ];
+            
+            #check if overlap with regions with no call
+            if (($start <= $pos1) && ($end >= $pos1ext)) {
+                $region = $region.$left.";";
+            }
+            if (($start <= $pos2ext) && ($end >= $pos2)) {
+                $region = $region.$right.";";
+            }
+            
+            #if ($cnraw < $cnraw1) {
+            #    $cnraw1 = $cnraw;
+            #    $count = $j;
+            #}
+        }
+    }
+    
+    if ($region eq "") {
+        $region = "NA";
+    }
+    
+    if ($#n >= 0) {
+        
+        $numseg = $#n +1;
+        @sortn = sort{ $a <=> $b } @n;
+    
+        $nA = $segline{$sortn[0]}[4];
+        $nB = $segline{$sortn[0]}[5];
+        $rawA = $segline{$sortn[0]}[6];
+        $rawB = $segline{$sortn[0]}[7];
+        $cnraw = $segline{$sortn[0]}[13];
+        $loh = $segline{$sortn[0]}[14];
+        $ploidy= $segline{$sortn[0]}[17];
+        $copydiff1 = $segline{$sortn[0]}[18];
+        $copydiff2 = $segline{$sortn[0]}[19];
+        $logratio1 = $segline{$sortn[0]}[20];
+        $logratio2 = $segline{$sortn[0]}[21];
+    
+        $outline = "$gene\t$numseg\t$region\t$ploidy\t$nA\t$nB\t$rawA\t$rawB\t$cnraw\t$loh\t$copydiff1\t$copydiff2\t$logratio1\t$logratio2\t";
+    
+        if ($numseg > 1 ) {
+            $nA = $segline{$sortn[$#sortn]}[4];
+            $nB = $segline{$sortn[$#sortn]}[5];
+            $rawA = $segline{$sortn[$#sortn]}[6];
+            $rawB = $segline{$sortn[$#sortn]}[7];
+            $cnraw = $segline{$sortn[$#sortn]}[13];
+            $loh = $segline{$sortn[$#sortn]}[14];
+            $copydiff1 = $segline{$sortn[$#sortn]}[18];
+            $copydiff2 = $segline{$sortn[$#sortn]}[19];
+            $logratio1 = $segline{$sortn[$#sortn]}[20];
+            $logratio2 = $segline{$sortn[$#sortn]}[21];
+        }
+        else {
+            $nA = "NA";
+            $nB = "NA";
+            $rawA = "NA";
+            $rawB = "NA";
+            $cnraw = "NA";
+            $loh = "NA";
+            $copydiff1 = "NA";
+            $copydiff2 = "NA";
+            $logratio1 = "NA";
+            $logratio2 = "NA";
+
+        }
+    
+        $outline = $outline."$nA\t$nB\t$rawA\t$rawB\t$cnraw\t$loh\t$copydiff1\t$copydiff2\t$logratio1\t$logratio2";
+        print OUTFILE "$outline\n";
+    }
+}
+
+close (GENEFILE);
+close (OUTFILE);
diff --git a/bin/cnv_array/seg_plot.R b/bin/cnv_array/seg_plot.R
new file mode 100644
index 0000000..f2bb5a2
--- /dev/null
+++ b/bin/cnv_array/seg_plot.R
@@ -0,0 +1,117 @@
+# updated to have Graces Sept '19 tweaks 
+
+options(scipen = 999)
+
+args=(commandArgs(TRUE))
+
+# filename is name of the *segments_raw.extend.txt file
+filename <- args[1]
+
+sampleID <- args[2]
+
+# outdir is the dir where png result will be written ... use "./" for current dir
+outdir <- args[3]
+
+
+CNS <-read.table(filename,header=T,sep="\t")
+
+gender <- 'female'
+sex <- 'female'
+
+if (sex == "female") {
+  CNS=CNS[CNS$chr!="Y",]
+}
+
+#title of plot
+ploidy=round(CNS$ploidy[1], digits=2)
+plottitle=paste( gsub("_","  ", sampleID), "   ploidy=",ploidy,sep="")
+
+chromo <- unique(CNS$chr)
+chromo
+xx=0
+y = c()
+start=CNS$startpos
+end=CNS$endpos
+
+for (x in chromo) {
+  start[CNS$chr == x]=start[CNS$chr == x]+xx
+  end[CNS$chr == x]=end[CNS$chr == x]+xx
+  tmp = CNS$endpos[CNS$chr == x]
+  xx=tail(tmp,1)+xx
+  y <- c(y, xx)
+}
+
+png(paste(outdir,sampleID,"_segmentsgenomeplot.copydiffploidy.png",sep=""), width=1300,height=600)
+
+par(mar = c(5, 5, 8, 4))
+
+val=CNS$copydiff_ploidy
+
+# in the following stmt, family="serif" changes font to times-roman; cex.main=1.8 scales up the title font size 
+# formerly also used: ylim=c(-7,20), 
+plot( c(start,end), c(val,val), col="white", main=plottitle, xlab="Chromosome", ylab="Delta from Ploidy", 
+      ylim=c(-8, max( c(val,val) ) ))
+
+for (i in 1:length(start)) {
+  if (CNS$LOH[i]==1) {
+    polygon(c(start[i],end[i],end[i],start[i]),c(min(-7),min(-7),max(-6),max(-6)),col="lightsteelblue",border="lightsteelblue",lwd=2)
+  }
+  
+}
+
+segments(start,val,end,val,col="tomato",lwd=5)
+abline(v=y,col="grey")
+posy=c(-8)
+i=1
+l=0
+for (x in chromo) {
+  posx=(l+y[i])/2
+  text(posx,posy,x,cex=1.2,srt=45)
+  l=y[i]
+  i=i+1
+}
+abline(h=0,col="black",lty=2,lwd=1.5)
+
+# formerly also used: inset=c(0,-0.1), 
+legend("topright", inset=-0.1, c("Difference from sample ploidy   ", "LOH"), xpd=TRUE, horiz=T, 
+       bty="n", lty=c(1,1), lwd=6, col=c("tomato", "lightsteelblue"), cex=1.5 )
+
+dev.off()
+
+
+png(paste(outdir,sampleID,"_segmentsgenomeplot.CNraw_loh.png",sep=""), width=1300,height=600)
+
+par(mar = c(5, 5, 8, 4))
+
+val=CNS$CN_raw
+d=max(val)/100
+plot(c(start,end),c(val,val),col="white",ylab="CN, CN Major, CN Minor",main=plottitle,ylim=c(-d,max(val)+4*d),xaxt='n',xlab="chromosomes")
+
+for (i in 1:length(start)) {
+  if (CNS$LOH[i]==1) {
+    polygon(c(start[i],end[i],end[i],start[i]),c(min(val),min(val),max(val),max(val)),col="lightsteelblue",border="lightsteelblue",lwd=2)
+  }
+  
+}
+
+val=CNS$nBraw-d
+segments(start,val,end,val,col="blue",lwd=4)
+val=CNS$nAraw
+segments(start,val,end,val,col="red",lwd=4)
+val=CNS$CN_raw+d
+segments(start,val,end,val,col="purple",lwd=4)
+abline(v=y,col="grey")
+posy=max(val)+2*d
+i=1
+l=0
+for (x in chromo) {
+  posx=(l+y[i])/2
+  text(posx,posy,x,cex=1,srt=45)
+  l=y[i]
+  i=i+1
+}
+abline(h=CNS$ploidy[1],col="black",lty=2,lwd=1.5)
+
+legend("topright", c("CN Total", "CN Major", "CN Minor", "LOH"),xpd=TRUE,horiz=T, inset=c(0,-0.1), bty = "n", lty=c(1,1,1,1), lwd=6, col = c("purple", "red", "blue", "lightsteelblue"), cex = 1)
+dev.off()
+
diff --git a/bin/cnv_array/segment_raw_extend.pl b/bin/cnv_array/segment_raw_extend.pl
new file mode 100644
index 0000000..04cd570
--- /dev/null
+++ b/bin/cnv_array/segment_raw_extend.pl
@@ -0,0 +1,288 @@
+#!/usr/bin/perl -w
+use POSIX;
+use File::Basename;
+
+# This script adds to segment file the arm fraction, LOH and CN diff and log ratio relative to 2 and ploidy
+# The segments are extended
+
+# perl segment_raw_annotate.pl *segments_raw.txt *ploidy.txt hg38_chromosome_arm.txt [male, female, unknown]
+
+if ($#ARGV != 3) {
+	print "This scripts requires: <file_cn> <file_ploidy> <file_arm> <gender> \n";
+	exit(-1);
+}
+
+$file_cn = $ARGV[0];		
+$file_ploidy = $ARGV[1];
+$file_arm = $ARGV[2];
+$gender = $ARGV[3];
+
+$file_output = basename($file_cn,".txt").".extend.txt";
+
+$ploidy = `cat $file_ploidy`;
+chomp($ploidy);
+
+# $gender = `cat $file_gender`;
+# chomp($gender);
+
+if (($gender eq "female") || ($gender eq "unknown")) {
+    $cn_factor = 1;
+}
+elsif ($gender eq "male") {
+    $cn_factor= 0.5;
+}
+
+$tmp = `cat $file_arm | awk 'NR>1'`;
+@arm = split(/\n/,$tmp);
+chomp(@arm);
+
+open(CN, "$file_cn") or die "can't open $file_cn: $!";
+$tmp = <CN>;
+chomp($tmp);
+
+open(OUTFILE, ">$file_output");
+print OUTFILE "$tmp\tstartext\tendext\tstartext_desc\tendext_desc\tCN_raw\tLOH\tparm_fraction\tqarm_fraction\tploidy\tcopydiff_2\tcopydiff_ploidy\tlogratio_2\tlogratio_ploidy\n";
+
+open(TMPFILE, ">tmp.txt");
+
+#merge segments
+$tmp = <CN>;
+chomp($tmp);
+@line = split(/\t/,$tmp);
+print "@line\n";
+$sample = $line[0];
+$chromo = $line[1];
+$n1 = $line[4];
+$n2 = $line[5];
+$cn1 = $line[6];
+$cn2 = $line[7];
+$start = $line[2];
+$end = $line[3];
+$num = $line[8];
+
+print "$num\n";
+
+while ($tmp = <CN>) {
+    chomp($tmp);
+    @line = split(/\t/,$tmp);
+    
+    if (($chromo eq $line[1]) && ($cn1 == $line[6]) && ($cn2 == $line[7])) {
+        $end = $line[3];
+        $num = $num + $line[8];
+    }
+    else {
+        print TMPFILE "$sample\t$chromo\t$start\t$end\t$n1\t$n2\t$cn1\t$cn2\t$num\n";
+        $sample = $line[0];
+        $chromo = $line[1];
+        $n1 = $line[4];
+        $n2 = $line[5];
+        $cn1 = $line[6];
+        $cn2 = $line[7];
+        $start = $line[2];
+        $end = $line[3];
+        $num = $line[8];
+    }
+}
+#lastline
+print TMPFILE "$sample\t$chromo\t$start\t$end\t$n1\t$n2\t$cn1\t$cn2\t$num\n";
+
+close (CN);
+close (TMPFILE);
+
+open(CN, "tmp.txt") or die "can't open tmp.txt: $!";
+@seg = <CN>;
+chomp(@seg);
+close (CN);
+$n = 0;
+
+for ($j=0; $j<$#seg; $j++) {
+    
+    @array1 = split(/\t/,$seg[$j]);
+    @array2 = split(/\t/,$seg[$j+1]);
+    #$x1 = $array1[2];
+    $x2 = $array1[3];
+    $y1 = $array2[2];
+    #$y2 = $array2[3];
+    
+    if ($array1[1] ne $n) { #first line for chr
+        
+        $n = $array1[1];
+        $left = 0;
+        $left1 = "telomere";
+        
+        for ($i=1; $i<=$#arm; $i+=2) {
+            @line = split(/\t/,$arm[$i]);
+            if ($n eq substr($line[0],3)) {
+                $a = $line[1];
+                $b = $line[2];
+            }
+        }
+        
+        if ($array2[1] ne $n) { #last line for chr
+            $right = $b;
+            $right1 = "telomere";
+        }
+        elsif (($x2 < $a) && ($y1 > $a)) {
+            $right = $a;
+            $right1 = "centromere";
+        }
+        else {
+            $right = floor(($x2 + $y1)/2);
+            $right1 = "no_probe";
+        }
+    }
+    else {
+        
+        $left = $right + 1;
+        $left1 = $right1;
+        
+        if ($array2[1] ne $n) { #last line for chr
+            
+            $right = $b;
+            $right1 = "telomere";
+        }
+        elsif (($x2 < $a) && ($y1 > $a)) {
+            $right = $a;
+            $right1 = "centromere";
+        }
+        else {
+            $right = floor(($x2 + $y1)/2);
+            $right1 = "no_probe";
+        }
+    }
+    
+    $copy = $array1[6] + $array1[7];
+    if ($array1[6] >= 0.5 && $array1[7] <= 0.1) {
+        $loh=1;
+    }
+    else {
+        $loh=0;
+    }
+    
+    for ($i=0; $i<=$#arm; $i+=2) {
+        @line = split(/\t/,$arm[$i]);
+        if ($n eq substr($line[0],3)) {
+            if (($right>=$line[1]) && ($left<=$line[2])) {
+                @tmp = ($left,$right,$line[1],$line[2]);
+                @sorttmp = sort{ $a <=> $b } @tmp;
+                $overlap1=($sorttmp[2]-$sorttmp[1])/($line[2]-$line[1]);
+            }
+            else {
+                $overlap1=0;
+            }
+        }
+    }
+    
+    for ($i=1; $i<=$#arm; $i+=2) {
+        @line = split(/\t/,$arm[$i]);
+        if ($n eq substr($line[0],3)) {
+            if (($right>=$line[1]) && ($left<=$line[2])) {
+                @tmp = ($left,$right,$line[1],$line[2]);
+                @sorttmp = sort{ $a <=> $b } @tmp;
+                $overlap2=($sorttmp[2]-$sorttmp[1])/($line[2]-$line[1]);
+            }
+            else {
+                $overlap2=0;
+            }
+        }
+    }
+
+    if (($n eq "X") || ($n eq "Y")) {
+        $diff1=$copy - ($cn_factor * 2);
+        $diff2=$copy- ($cn_factor * $ploidy);
+        $logratio1 = log(($copy+0.01)/($cn_factor * 2))/log(2);
+        $logratio2 = log(($copy+0.01)/($cn_factor * $ploidy))/log(2);
+    }
+    else {
+        $diff1=$copy-2;
+        $diff2=$copy-$ploidy;
+        $logratio1 = log(($copy+0.01)/2)/log(2);
+        $logratio2 = log(($copy+0.01)/$ploidy)/log(2);
+    }
+    
+    print OUTFILE "$seg[$j]\t$left\t$right\t$left1\t$right1\t$copy\t$loh\t$overlap1\t$overlap2\t$ploidy\t$diff1\t$diff2\t$logratio1\t$logratio2\n";
+}
+
+@array1 = split(/\t/,$seg[$#seg]);
+
+if ($array1[1] ne $n) { #first line for chr
+    
+    $n = $array1[1];
+    $left = 0;
+    $left1 = "telomere";
+    
+    for ($i=1; $i<=$#arm; $i+=2) {
+        @line = split(/\t/,$arm[$i]);
+        if ($n eq substr($line[0],3)) {
+            $a = $line[1];
+            $b = $line[2];
+        }
+    }
+    
+    $right = $b;
+    $right1 = "telomere";
+    
+}
+else {
+    
+    $left = $right + 1;
+    $left1 = $right1;
+    
+    $right = $b;
+    $right1 = "telomere";
+    
+}
+
+$copy = $array1[6] + $array1[7];
+if ($array1[6] >= 0.5 && $array1[7] <= 0.1) {
+    $loh=1;
+}
+else {
+    $loh=0;
+}
+
+for ($i=0; $i<=$#arm; $i+=2) {
+    @line = split(/\t/,$arm[$i]);
+    if ($n eq substr($line[0],3)) {
+        if (($right>=$line[1]) && ($left<=$line[2])) {
+            @tmp = ($left,$right,$line[1],$line[2]);
+            @sorttmp = sort{ $a <=> $b } @tmp;
+            $overlap1=($sorttmp[2]-$sorttmp[1])/($line[2]-$line[1]);
+        }
+        else {
+            $overlap1=0;
+        }
+    }
+}
+
+for ($i=1; $i<=$#arm; $i+=2) {
+    @line = split(/\t/,$arm[$i]);
+    if ($n eq substr($line[0],3)) {
+        if (($right>=$line[1]) && ($left<=$line[2])) {
+            @tmp = ($left,$right,$line[1],$line[2]);
+            @sorttmp = sort{ $a <=> $b } @tmp;
+            $overlap2=($sorttmp[2]-$sorttmp[1])/($line[2]-$line[1]);
+        }
+        else {
+            $overlap2=0;
+        }
+    }
+}
+
+if (($n eq "X") || ($n eq "Y")) {
+    $diff1=$copy - ($cn_factor * 2);
+    $diff2=$copy- ($cn_factor * $ploidy);
+    $logratio1 = log(($copy+0.01)/($cn_factor * 2))/log(2);
+    $logratio2 = log(($copy+0.01)/($cn_factor * $ploidy))/log(2);
+}
+else {
+    $diff1=$copy-2;
+    $diff2=$copy-$ploidy;
+    $logratio1 = log(($copy+0.01)/2)/log(2);
+    $logratio2 = log(($copy+0.01)/$ploidy)/log(2);
+}
+
+print OUTFILE "$seg[$j]\t$left\t$right\t$left1\t$right1\t$copy\t$loh\t$overlap1\t$overlap2\t$ploidy\t$diff1\t$diff2\t$logratio1\t$logratio2\n";
+
+close(CN);
+close (OUTFILE);
diff --git a/config/cnv_array.config b/config/cnv_array.config
index ee2b95d..f8c82f0 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -7,15 +7,13 @@ manifest {
 
 params {
     gtc_csv = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.csv'
-    
     bpm_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.bpm'
     egt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L.egt'
-    idat_folder = '/home/temket/cnv_workflow/data/raw_idat/'
     ref_fa = '/projects/compsci/omics_share/human/GRCh38/genome/sequence/gatk/Homo_sapiens_assembly38.fasta'
     tsv_file = '/home/temket/cnv_workflow/output/bcftools_convert.tsv'
     snp_platform = 'IlluminaCytoSNP'
-    GC_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_GCcontent_validSNPloci.txt'
-    RT_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg38.txt'
+    gc_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_GCcontent_validSNPloci.txt'
+    rt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg38.txt'
     chrArm = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/GRCh38_chromosome_arm.txt'
     cnvGeneFile = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/biomaRt_GRCh38_ensemblv102_CNVgeneAnnotations_primaryChroms.txt'
 }
diff --git a/modules/utility_modules/ascat_annotation.nf b/modules/utility_modules/ascat_annotation.nf
index 289c1eb..3eda34f 100644
--- a/modules/utility_modules/ascat_annotation.nf
+++ b/modules/utility_modules/ascat_annotation.nf
@@ -7,19 +7,23 @@ process ASCAT_ANNOTATION {
     time = '01:30:00'
     errorStrategy = { (task.exitStatus == 140) ? { log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish' }.call() : 'finish' }
 
-    container 'quay.io/biocontainers/ascat:3.1.1--r43hdfd78af_1'
+    container 'quay.io/jaxcompsci/ascat:v3.1.3'
+
     publishDir "${params.pubdir}/${params.organize_by == 'sample' ? sampleID : 'ascat_annotation'}", mode: 'copy'
 
     input:
     tuple val(sampleID), val(meta), path(segments_raw), path(ploidy)
 
     output:
-    tuple val(sampleID), val(meta), path("${sampleID}.segments_raw.extend.txt"), path("${sampleID}.*"), emit: ascat_annotated
+    tuple val(sampleID), val(meta), path("*.segments_raw.extend.txt"), emit: seg_extended
+    tuple val(sampleID), val(meta), path("*.ensgene_cnvbreak.txt"), emit: ensembl_annot
+    tuple val(sampleID), val(meta), path("*.png"), emit: png
 
     script:
+    gender = meta.gender == 'XX' ? 'female' : 'male'
     """
-    perl \${projectDir}/bin/cnv_array/${sampleID}.segment_raw_extend.pl ${segments_raw} ${ploidy} ${params.chrArm} ${meta}
-    perl \${projectDir}/bin/cnv_array/annotate_ensembl_genes.pl ${sampleID}.segments_raw.extend.txt ${params.cnvGeneFile}
-    R CMD BATCH --slave "--args ${sampleID}.segments_raw.extend.txt ${sampleID} ./ " \${projectDir}/seg_plot.R
+    perl ${projectDir}/bin/cnv_array/segment_raw_extend.pl ${segments_raw} ${ploidy} ${params.chrArm} ${gender}
+    perl ${projectDir}/bin/cnv_array/annotate_ensembl_genes.pl ${sampleID}.segments_raw.extend.txt ${params.cnvGeneFile}
+    R CMD BATCH --slave "--args ${sampleID}.segments_raw.extend.txt ${sampleID} ./ " ${projectDir}/bin/cnv_array/seg_plot.R
     """
 }
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index 4151078..38b087e 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -44,5 +44,4 @@ workflow CNV_ARRAY {
     BCFTOOLS_QUERY_ASCAT(BCFTOOLS_GTC2VCF.out.gtc2vcf)
     ASCAT(BCFTOOLS_QUERY_ASCAT.out.baf_lrr)
     ASCAT_ANNOTATION(ASCAT.out.seg_ploidy)
-    ASCAT_ANNOTATION.out.ascat_annotated.view()
 }

From 773d29a023a064a0fff356a082387e1022695b99 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Mon, 5 Aug 2024 14:20:22 -0400
Subject: [PATCH 21/26] added nf-test dir and config

---
 nf-test.config        | 13 +++++++++++++
 tests/nextflow.config |  5 +++++
 2 files changed, 18 insertions(+)
 create mode 100644 nf-test.config
 create mode 100644 tests/nextflow.config

diff --git a/nf-test.config b/nf-test.config
new file mode 100644
index 0000000..9d571fd
--- /dev/null
+++ b/nf-test.config
@@ -0,0 +1,13 @@
+config {
+
+    testsDir "tests"
+    workDir ".nf-test"
+    configFile "tests/nextflow.config"
+    profile "sumner2"
+    stage {
+        symlink "subworkflows/"
+        symlink "modules/"
+        symlink "test/"
+        symlink "workflows/"
+    }
+}
diff --git a/tests/nextflow.config b/tests/nextflow.config
new file mode 100644
index 0000000..c99eca5
--- /dev/null
+++ b/tests/nextflow.config
@@ -0,0 +1,5 @@
+/*
+========================================================================================
+    Nextflow config file for running tests
+========================================================================================
+*/
\ No newline at end of file

From 5a45c9cb822a7dd2dae96d88b49bd53e412e58ac Mon Sep 17 00:00:00 2001
From: Tejas Temker <temket@sumner083.sumner2.jax.org>
Date: Thu, 15 Aug 2024 13:47:33 -0400
Subject: [PATCH 22/26] test file

---
 tests/cnv.nf.test | 130 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 tests/cnv.nf.test

diff --git a/tests/cnv.nf.test b/tests/cnv.nf.test
new file mode 100644
index 0000000..da36883
--- /dev/null
+++ b/tests/cnv.nf.test
@@ -0,0 +1,130 @@
+nextflow_workflow {
+
+    name "Test Workflow CNV_ARRAY"
+    script "workflows/cnv_array.nf"
+    workflow "CNV_ARRAY"
+
+    test("Full Workflow -- Required Params") {
+        tag "RequiredParams"
+        tag "primary"
+        when {
+            params {
+                csv_input = "${baseDir}/test/cnv/data/example_sample_input.csv"
+            }
+        }
+
+        then {
+            assert workflow.success
+        }
+    }
+
+    test("IAAP_CLI Process") {
+        tag "IAAP_CLI"
+        tag "process"
+        when {
+            params {
+                outdir = "tests/results"
+                idat_folder = "${baseDir}/test/cnv/idat_folder"
+                bpm_file = "${baseDir}/test/cnv/bpm_file"
+                egt_file = "${baseDir}/test/cnv/egt_file"
+                csv_input = "${baseDir}/test/cnv/csv_input"
+                gc_file = "${baseDir}/test/cnv/gc_file"
+                rt_file = "${baseDir}/test/cnv/rt_file"
+            }
+        }
+
+        then {
+            assert workflow.success
+            // assert GTC format files
+            assert file("tests/results/iaap_cli_output.csv").exists()
+        }
+    }
+
+    test("BCFTOOLS_GTC2VCF Process") {
+        tag "BCFTOOLS_GTC2VCF"
+        tag "process"
+        when {
+            params {
+                outdir = "tests/results"
+                idat_folder = "${baseDir}/test/cnv/idat_folder"
+                bpm_file = "${baseDir}/test/cnv/bpm_file"
+                egt_file = "${baseDir}/test/cnv/egt_file"
+                csv_input = "${baseDir}/test/cnv/csv_input"
+                gc_file = "${baseDir}/test/cnv/gc_file"
+                rt_file = "${baseDir}/test/cnv/rt_file"
+            }
+        }
+
+        then {
+            assert workflow.success
+            // assert the tsv, bcf, csi and vcf files 
+            assert file("tests/results/bcftools_gtct2vcf_output.vcf").exists()
+        }
+    }
+
+    test("BCFTOOLS_QUERY_ASCAT Process") {
+        tag "BCFTOOLS_QUERY_ASCAT"
+        tag "process"
+        when {
+            params {
+                outdir = "tests/results"
+                idat_folder = "${baseDir}/test/cnv/idat_folder"
+                bpm_file = "${baseDir}/test/cnv/bpm_file"
+                egt_file = "${baseDir}/test/cnv/egt_file"
+                csv_input = "${baseDir}/test/cnv/csv_input"
+                gc_file = "${baseDir}/test/cnv/gc_file"
+                rt_file = "${baseDir}/test/cnv/rt_file"
+            }
+        }
+
+        then {
+            assert workflow.success
+            // assert the files BAF and LRR 
+            assert file("tests/results/bcftools_query_ascat_output.csv").exists()
+        }
+    }
+
+    test("ASCAT Process") {
+        tag "ASCAT"
+        tag "process"
+        when {
+            params {
+                outdir = "tests/results"
+                idat_folder = "${baseDir}/test/cnv/idat_folder"
+                bpm_file = "${baseDir}/test/cnv/bpm_file"
+                egt_file = "${baseDir}/test/cnv/egt_file"
+                csv_input = "${baseDir}/test/cnv/csv_input"
+                gc_file = "${baseDir}/test/cnv/gc_file"
+                rt_file = "${baseDir}/test/cnv/rt_file"
+            }
+        }
+
+        then {
+            assert workflow.success
+            // Assert the files 
+            assert file("tests/results/ascat_output.csv").exists()
+        }
+    }
+
+    test("ASCAT_ANNOTATION Process") {
+        tag "ASCAT_ANNOTATION"
+        tag "process"
+        when {
+            params {
+                outdir = "tests/results"
+                idat_folder = "${baseDir}/test/cnv/idat_folder"
+                bpm_file = "${baseDir}/test/cnv/bpm_file"
+                egt_file = "${baseDir}/test/cnv/egt_file"
+                csv_input = "${baseDir}/test/cnv/csv_input"
+                gc_file = "${baseDir}/test/cnv/gc_file"
+                rt_file = "${baseDir}/test/cnv/rt_file"
+            }
+        }
+
+        then {
+            assert workflow.success
+            // assert the files .txt file and ploly file 
+            assert file("tests/results/ascat_annotation_output.csv").exists()
+        }
+    }
+}

From a91198c3b6fd41ef62aea3511d6d0ba0f12de3f9 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Fri, 16 Aug 2024 10:25:24 -0400
Subject: [PATCH 23/26] polish and test update

---
 .gitignore                              |   2 +
 bin/help/cnv_array.nf                   |  26 ++---
 bin/log/cnv_array.nf                    |   5 -
 bin/shared/extract_cnv_array_csv.nf     |  25 ++++-
 config/cnv_array.config                 |   1 -
 nextflow.config                         |   2 +-
 test/cnv_array/example_sample_input.csv |   4 +
 test/cnv_array/fail_example_input.csv   |   2 +
 tests/cnv.nf.test                       | 130 ----------------------
 tests/workflows/cnv_array.nf.test       | 139 ++++++++++++++++++++++++
 workflows/cnv_array.nf                  |   5 +-
 11 files changed, 183 insertions(+), 158 deletions(-)
 create mode 100644 test/cnv_array/example_sample_input.csv
 create mode 100644 test/cnv_array/fail_example_input.csv
 delete mode 100644 tests/cnv.nf.test
 create mode 100644 tests/workflows/cnv_array.nf.test

diff --git a/.gitignore b/.gitignore
index 6bfdf18..a295e6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,5 @@ design.csv
 sv_input.csv
 test.csv
 test2.csv
+.nf-test/
+.nf-test*
\ No newline at end of file
diff --git a/bin/help/cnv_array.nf b/bin/help/cnv_array.nf
index 5eef671..3f27cb8 100644
--- a/bin/help/cnv_array.nf
+++ b/bin/help/cnv_array.nf
@@ -1,18 +1,18 @@
 def help() {
     println '''
-Parameter        | Default | Description
------------------|---------|---------------------------------------------------------------------------
---bpm_file       | /<PATH> | The path to the BPM file.
---egt_file       | /<PATH> | The path to the EGT file.
--w               | /<PATH> | The directory for intermediary files and Nextflow processes. Ensure ample storage.
---help           | false   | Print this help message and exit.
-
---bpm            | /<PATH> | The path to the BPM file.
---csv            | /<PATH> | The path to the CSV file.
---egt            | /<PATH> | The path to the EGT file.
---gtcs           | /<PATH> | The path to GTC output.
---fasta-ref      | /<PATH> | The path to the reference FASTA file.
---extra          | /<PATH> | The path to the output directory.
+Parameter | Default | Description
 
+--pubdir | /<PATH> | The directory that the saved outputs will be stored.
+--organize_by | sample | How to organize the output folder structure. Options: sample or analysis.
+-w | /<PATH> | The directory that all intermediary files and nextflow processes utilize. This directory can become quite large. This should be a location on /fastscratch or other directory with ample storage.
+--gtc_csv | '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.csv' | Genotype Call (GTC) manifest for IDAT conversion. Provided by Illumina.
+--bpm_file | '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.bpm' | Manifest file describing the SNP or probe content on a BeadChip. Provided by Illumina.
+--egt_file | '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L.egt' | Cluster file describing the cluster positions for the Illumina genotyping array. Provided by Illumina.
+--ref_fa | '/projects/compsci/omics_share/human/GRCh38/genome/sequence/gatk/Homo_sapiens_assembly38.fasta' | The reference fasta file. Reference FASTA build should match Illumina provided files.
+--snp_platform | 'IlluminaCytoSNP' | SNP platform supported by ASCAT. See full supported list here: https://github.com/VanLoo-lab/ascat?tab=readme-ov-file#supported-arrays-without-matched-germline
+--gc_file | '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_GCcontent_validSNPloci.txt' | ASCAT’s GC correction file, generated from scripts at https://github.com/VanLoo-lab/ascat/tree/master/LogRcorrection
+--rt_file | '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg38.txt' | ASCAT’s replication timing file, generated from scripts at https://github.com/VanLoo-lab/ascat/tree/master/LogRcorrection
+--chrArm | '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/GRCh38_chromosome_arm.txt' | Chromosome arm locations, used in CNV segment annotation.
+--cnvGeneFile | '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/biomaRt_GRCh38_ensemblv102_CNVgeneAnnotations_primaryChroms.txt'
 '''
 }
diff --git a/bin/log/cnv_array.nf b/bin/log/cnv_array.nf
index 75d2990..4213e53 100644
--- a/bin/log/cnv_array.nf
+++ b/bin/log/cnv_array.nf
@@ -16,9 +16,6 @@ def param_log() {
         error "'--ref_fa': is not provided, it is a required parameter."
     }
 
-    if (!params.tsv_file) {
-        error "'--tsv_file': is not provided, it is a required parameter."
-    }
 
     // Log parameter information
     log.info """
@@ -28,12 +25,10 @@ def param_log() {
 
     Results Published to: ${params.pubdir ?: 'N/A'}
     ______________________________________________________
-    --idat_folder               ${params.idat_folder ?: 'N/A'}
     --bpm_file                  ${params.bpm_file}
     --egt_file                  ${params.egt_file}
     --gtc_csv                   ${params.gtc_csv}
     --ref_fa                    ${params.ref_fa}
-    --tsv_file                  ${params.tsv_file}
     -w                          ${workDir}
     --keep_intermediate         ${params.keep_intermediate ?: 'N/A'}
     -c                          ${params.config ?: 'N/A'}
diff --git a/bin/shared/extract_cnv_array_csv.nf b/bin/shared/extract_cnv_array_csv.nf
index 0411a57..68dfb6a 100644
--- a/bin/shared/extract_cnv_array_csv.nf
+++ b/bin/shared/extract_cnv_array_csv.nf
@@ -19,13 +19,13 @@ def extract_csv(csv_file) {
 
     Channel.from(csv_file).splitCsv(header: true)
         .map{ row ->
-            if (!(row.sampleID) || !(row.idat_red || !(row.idat_green))){
+            if (!(row.sampleID) || !(row.idat_red) || !(row.idat_green)) {
                 System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
                 System.err.println(ANSI_RED + "Missing field in csv file header. The csv file must have fields: 'sampleID', 'idat_red', 'idat_green'." + ANSI_RESET)
                 System.err.println(ANSI_RED + "Exiting now." + ANSI_RESET)
                 System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
                 System.exit(1)
-            }
+            } 
             [row.sampleID.toString(), row]
         }.groupTuple()
         .map{ meta, rows ->
@@ -34,6 +34,24 @@ def extract_csv(csv_file) {
         }.transpose()
         .map{ row, numLanes -> //from here do the usual thing for csv parsing
 
+        if (row.idat_red.substring(row.idat_red.lastIndexOf(System.getProperty("file.separator")) + 1).count("_") > 2){
+                System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
+                System.err.println(ANSI_RED + "The file: " + row.idat_red + " containes more than 2 underscores in the name." + ANSI_RESET)
+                System.err.println(ANSI_RED + "IDAT files must have only 2 underscores (i.e., xxx_xxx_Grn.idat and xxx_xxx_Red.idat)." + ANSI_RESET)
+                System.err.println(ANSI_RED + "GEO (and others) rename files to have more than 2 (i.e., GSMxxx_xxx_xxx_Red.idat). File names must be adjusted prior to running." + ANSI_RESET)
+                System.err.println(ANSI_RED + "Exiting now." + ANSI_RESET)
+                System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
+                System.exit(1)
+        }
+        if (row.idat_green.substring(row.idat_green.lastIndexOf(System.getProperty("file.separator")) + 1).count("_") > 2){
+                System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
+                System.err.println(ANSI_RED + "The file: " + row.idat_green + " containes more than 2 underscores in the name." + ANSI_RESET)
+                System.err.println(ANSI_RED + "IDAT files must have only 2 underscores (i.e., xxx_xxx_Grn.idat and xxx_xxx_Red.idat)." + ANSI_RESET)
+                System.err.println(ANSI_RED + "GEO (and others) rename files to have more than 2 (i.e., GSMxxx_xxx_xxx_Red.idat). File names must be adjusted prior to running." + ANSI_RESET)
+                System.err.println(ANSI_RED + "Exiting now." + ANSI_RESET)
+                System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
+                System.exit(1)
+        }
 
         // Metadata to identify samplesheet
         def meta = [:]
@@ -42,7 +60,7 @@ def extract_csv(csv_file) {
 
         if (row.gender != "XY" && row.gender != "XX" && row.gender != ""){
             System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
-            System.err.println(ANSI_RED + "Geneder must be 'XX', 'XY' or empty. " + row.gender + " was provided, and isn't valid." + ANSI_RESET)
+            System.err.println(ANSI_RED + "Gender must be 'XX', 'XY' or empty. " + row.gender + " was provided, and isn't valid." + ANSI_RESET)
             System.err.println(ANSI_RED + "Exiting now." + ANSI_RESET)
             System.err.println(ANSI_RED + "-----------------------------------------------------------------------" + ANSI_RESET)
             System.exit(1)
@@ -78,6 +96,5 @@ def extract_csv(csv_file) {
 
         return [meta.sampleID, meta, row.idat_red, row.idat_green]
 
-
     }
 }
diff --git a/config/cnv_array.config b/config/cnv_array.config
index f8c82f0..18eebf6 100644
--- a/config/cnv_array.config
+++ b/config/cnv_array.config
@@ -10,7 +10,6 @@ params {
     bpm_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L1.bpm'
     egt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_L.egt'
     ref_fa = '/projects/compsci/omics_share/human/GRCh38/genome/sequence/gatk/Homo_sapiens_assembly38.fasta'
-    tsv_file = '/home/temket/cnv_workflow/output/bcftools_convert.tsv'
     snp_platform = 'IlluminaCytoSNP'
     gc_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_GCcontent_validSNPloci.txt'
     rt_file = '/projects/compsci/omics_share/human/GRCh38/supporting_files/cnv_array/HumanCytoSNP-12v2-1_ReplicationTiming_SNPloci_hg38.txt'
diff --git a/nextflow.config b/nextflow.config
index dcb0c13..5bac0e2 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -38,7 +38,7 @@ try {
 }
 
 // work directory is important as it will be large, plan accordingly
-workDir = "/fastscratch/${USER}/${params.workflow}"
+workDir = "/flashscratch/${USER}/${params.workflow}"
 
 manifest {
     name = "The Jackson Laboratory Computational Sciences Nextflow based analysis pipelines"
diff --git a/test/cnv_array/example_sample_input.csv b/test/cnv_array/example_sample_input.csv
new file mode 100644
index 0000000..2df939c
--- /dev/null
+++ b/test/cnv_array/example_sample_input.csv
@@ -0,0 +1,4 @@
+sampleID,gender,idat_red,idat_green
+Test_Sample_XY,XY,/projects/compsci/omics_share/human/GRCh38/supporting_files/benchmarking_data/CNV_ARRAY/raw_idat/GSM7177504_205848650018R01C01_Red.idat,/projects/compsci/omics_share/human/GRCh38/supporting_files/benchmarking_data/CNV_ARRAY/raw_idat/GSM7177504_205848650018R01C01_Grn.idat
+Test_Sample_XX,XX,/projects/compsci/omics_share/human/GRCh38/supporting_files/benchmarking_data/CNV_ARRAY/raw_idat/GSM7177504_205848650018R01C01_Red.idat,/projects/compsci/omics_share/human/GRCh38/supporting_files/benchmarking_data/CNV_ARRAY/raw_idat/GSM7177504_205848650018R01C01_Grn.idat
+Test_Sample_NA,,/projects/compsci/omics_share/human/GRCh38/supporting_files/benchmarking_data/CNV_ARRAY/raw_idat/GSM7177504_205848650018R01C01_Red.idat,/projects/compsci/omics_share/human/GRCh38/supporting_files/benchmarking_data/CNV_ARRAY/raw_idat/GSM7177504_205848650018R01C01_Grn.idat
diff --git a/test/cnv_array/fail_example_input.csv b/test/cnv_array/fail_example_input.csv
new file mode 100644
index 0000000..af206bf
--- /dev/null
+++ b/test/cnv_array/fail_example_input.csv
@@ -0,0 +1,2 @@
+sampleID,gender,idat_red,idat_green
+Test_Sample_XY,XY,/projects/compsci/omics_share/human/GRCh38/supporting_files/benchmarking_data/CNV_ARRAY/raw_idat/GSM7177504_205848650018_R01C01_Red.idat,/projects/compsci/omics_share/human/GRCh38/supporting_files/benchmarking_data/CNV_ARRAY/raw_idat/GSM7177504_205848650018_R01C01_Grn.idat
\ No newline at end of file
diff --git a/tests/cnv.nf.test b/tests/cnv.nf.test
deleted file mode 100644
index da36883..0000000
--- a/tests/cnv.nf.test
+++ /dev/null
@@ -1,130 +0,0 @@
-nextflow_workflow {
-
-    name "Test Workflow CNV_ARRAY"
-    script "workflows/cnv_array.nf"
-    workflow "CNV_ARRAY"
-
-    test("Full Workflow -- Required Params") {
-        tag "RequiredParams"
-        tag "primary"
-        when {
-            params {
-                csv_input = "${baseDir}/test/cnv/data/example_sample_input.csv"
-            }
-        }
-
-        then {
-            assert workflow.success
-        }
-    }
-
-    test("IAAP_CLI Process") {
-        tag "IAAP_CLI"
-        tag "process"
-        when {
-            params {
-                outdir = "tests/results"
-                idat_folder = "${baseDir}/test/cnv/idat_folder"
-                bpm_file = "${baseDir}/test/cnv/bpm_file"
-                egt_file = "${baseDir}/test/cnv/egt_file"
-                csv_input = "${baseDir}/test/cnv/csv_input"
-                gc_file = "${baseDir}/test/cnv/gc_file"
-                rt_file = "${baseDir}/test/cnv/rt_file"
-            }
-        }
-
-        then {
-            assert workflow.success
-            // assert GTC format files
-            assert file("tests/results/iaap_cli_output.csv").exists()
-        }
-    }
-
-    test("BCFTOOLS_GTC2VCF Process") {
-        tag "BCFTOOLS_GTC2VCF"
-        tag "process"
-        when {
-            params {
-                outdir = "tests/results"
-                idat_folder = "${baseDir}/test/cnv/idat_folder"
-                bpm_file = "${baseDir}/test/cnv/bpm_file"
-                egt_file = "${baseDir}/test/cnv/egt_file"
-                csv_input = "${baseDir}/test/cnv/csv_input"
-                gc_file = "${baseDir}/test/cnv/gc_file"
-                rt_file = "${baseDir}/test/cnv/rt_file"
-            }
-        }
-
-        then {
-            assert workflow.success
-            // assert the tsv, bcf, csi and vcf files 
-            assert file("tests/results/bcftools_gtct2vcf_output.vcf").exists()
-        }
-    }
-
-    test("BCFTOOLS_QUERY_ASCAT Process") {
-        tag "BCFTOOLS_QUERY_ASCAT"
-        tag "process"
-        when {
-            params {
-                outdir = "tests/results"
-                idat_folder = "${baseDir}/test/cnv/idat_folder"
-                bpm_file = "${baseDir}/test/cnv/bpm_file"
-                egt_file = "${baseDir}/test/cnv/egt_file"
-                csv_input = "${baseDir}/test/cnv/csv_input"
-                gc_file = "${baseDir}/test/cnv/gc_file"
-                rt_file = "${baseDir}/test/cnv/rt_file"
-            }
-        }
-
-        then {
-            assert workflow.success
-            // assert the files BAF and LRR 
-            assert file("tests/results/bcftools_query_ascat_output.csv").exists()
-        }
-    }
-
-    test("ASCAT Process") {
-        tag "ASCAT"
-        tag "process"
-        when {
-            params {
-                outdir = "tests/results"
-                idat_folder = "${baseDir}/test/cnv/idat_folder"
-                bpm_file = "${baseDir}/test/cnv/bpm_file"
-                egt_file = "${baseDir}/test/cnv/egt_file"
-                csv_input = "${baseDir}/test/cnv/csv_input"
-                gc_file = "${baseDir}/test/cnv/gc_file"
-                rt_file = "${baseDir}/test/cnv/rt_file"
-            }
-        }
-
-        then {
-            assert workflow.success
-            // Assert the files 
-            assert file("tests/results/ascat_output.csv").exists()
-        }
-    }
-
-    test("ASCAT_ANNOTATION Process") {
-        tag "ASCAT_ANNOTATION"
-        tag "process"
-        when {
-            params {
-                outdir = "tests/results"
-                idat_folder = "${baseDir}/test/cnv/idat_folder"
-                bpm_file = "${baseDir}/test/cnv/bpm_file"
-                egt_file = "${baseDir}/test/cnv/egt_file"
-                csv_input = "${baseDir}/test/cnv/csv_input"
-                gc_file = "${baseDir}/test/cnv/gc_file"
-                rt_file = "${baseDir}/test/cnv/rt_file"
-            }
-        }
-
-        then {
-            assert workflow.success
-            // assert the files .txt file and ploly file 
-            assert file("tests/results/ascat_annotation_output.csv").exists()
-        }
-    }
-}
diff --git a/tests/workflows/cnv_array.nf.test b/tests/workflows/cnv_array.nf.test
new file mode 100644
index 0000000..136e05c
--- /dev/null
+++ b/tests/workflows/cnv_array.nf.test
@@ -0,0 +1,139 @@
+nextflow_workflow {
+
+    name "Test Workflow CNV_ARRAY"
+    script "workflows/cnv_array.nf"
+    workflow "CNV_ARRAY"
+
+    test("Full Workflow") {
+        tag "primary"
+        when {
+            params {
+                csv_input = "${baseDir}/test/cnv_array/example_sample_input.csv"
+                pipeline = 'cnv_array'
+            }
+        }
+
+        then {
+            assert workflow.success
+        }
+    }
+
+    test("Full Workflow -- GEO filename failure") {
+        tag "primary"
+        when {
+            params {
+                csv_input = "${baseDir}/test/cnv_array/fail_example_input.csv"
+                pipeline = 'cnv_array'
+            }
+        }
+
+        then {
+            assert workflow.failed
+        }
+    }
+    // test("IAAP_CLI Process") {
+    //     tag "IAAP_CLI"
+    //     tag "process"
+    //     when {
+    //         params {
+    //             outdir = "tests/results"
+    //             bpm_file = "${baseDir}/test/cnv_array/bpm_file"
+    //             egt_file = "${baseDir}/test/cnv_array/egt_file"
+    //             csv_input = "${baseDir}/test/cnv_array/csv_input"
+    //             gc_file = "${baseDir}/test/cnv_array/gc_file"
+    //             rt_file = "${baseDir}/test/cnv_array/rt_file"
+    //             pipeline = 'cnv_array'
+    //         }
+    //     }
+
+    //     then {
+    //         assert workflow.success
+    //         // assert GTC format files
+    //         assert file("tests/results/iaap_cli_output.csv").exists()
+    //     }
+    // }
+
+    // test("BCFTOOLS_GTC2VCF Process") {
+    //     tag "BCFTOOLS_GTC2VCF"
+    //     tag "process"
+    //     when {
+    //         params {
+    //             outdir = "tests/results"
+    //             bpm_file = "${baseDir}/test/cnv_array/bpm_file"
+    //             egt_file = "${baseDir}/test/cnv_array/egt_file"
+    //             csv_input = "${baseDir}/test/cnv_array/csv_input"
+    //             gc_file = "${baseDir}/test/cnv_array/gc_file"
+    //             rt_file = "${baseDir}/test/cnv_array/rt_file"
+    //         }
+    //     }
+
+    //     then {
+    //         assert workflow.success
+    //         // assert the tsv, bcf, csi and vcf files 
+    //         assert file("tests/results/bcftools_gtct2vcf_output.vcf").exists()
+    //     }
+    // }
+
+    // test("BCFTOOLS_QUERY_ASCAT Process") {
+    //     tag "BCFTOOLS_QUERY_ASCAT"
+    //     tag "process"
+    //     when {
+    //         params {
+    //             outdir = "tests/results"
+    //             bpm_file = "${baseDir}/test/cnv_array/bpm_file"
+    //             egt_file = "${baseDir}/test/cnv_array/egt_file"
+    //             csv_input = "${baseDir}/test/cnv_array/csv_input"
+    //             gc_file = "${baseDir}/test/cnv_array/gc_file"
+    //             rt_file = "${baseDir}/test/cnv_array/rt_file"
+    //         }
+    //     }
+
+    //     then {
+    //         assert workflow.success
+    //         // assert the files BAF and LRR 
+    //         assert file("tests/results/bcftools_query_ascat_output.csv").exists()
+    //     }
+    // }
+
+    // test("ASCAT Process") {
+    //     tag "ASCAT"
+    //     tag "process"
+    //     when {
+    //         params {
+    //             outdir = "tests/results"
+    //             bpm_file = "${baseDir}/test/cnv_array/bpm_file"
+    //             egt_file = "${baseDir}/test/cnv_array/egt_file"
+    //             csv_input = "${baseDir}/test/cnv_array/csv_input"
+    //             gc_file = "${baseDir}/test/cnv_array/gc_file"
+    //             rt_file = "${baseDir}/test/cnv_array/rt_file"
+    //         }
+    //     }
+
+    //     then {
+    //         assert workflow.success
+    //         // Assert the files 
+    //         assert file("tests/results/ascat_output.csv").exists()
+    //     }
+    // }
+
+    // test("ASCAT_ANNOTATION Process") {
+    //     tag "ASCAT_ANNOTATION"
+    //     tag "process"
+    //     when {
+    //         params {
+    //             outdir = "tests/results"
+    //             bpm_file = "${baseDir}/test/cnv_array/bpm_file"
+    //             egt_file = "${baseDir}/test/cnv_array/egt_file"
+    //             csv_input = "${baseDir}/test/cnv_array/csv_input"
+    //             gc_file = "${baseDir}/test/cnv_array/gc_file"
+    //             rt_file = "${baseDir}/test/cnv_array/rt_file"
+    //         }
+    //     }
+
+    //     then {
+    //         assert workflow.success
+    //         // assert the files .txt file and ploly file 
+    //         assert file("tests/results/ascat_annotation_output.csv").exists()
+    //     }
+    // }
+}
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index 38b087e..6cf5f17 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -22,7 +22,7 @@ if (params.help) {
 param_log()
 // Parameter validation
 if (!params.bpm_file || !params.egt_file) {
-    exit 1, "All parameters (idat_folder, bpm_file, egt_file) are required."
+    exit 1, "All parameters (bpm_file, egt_file) are required."
 }
 
 if (params.csv_input) {
@@ -34,9 +34,6 @@ if (params.csv_input) {
 GC_file = file(params.gc_file, checkIfExists: true)
 RT_file = file(params.rt_file, checkIfExists: true)
 
-// Extract CSV input
-ch_input = extract_csv(file(params.csv_input, checkIfExists: true))
-
 // Main workflow
 workflow CNV_ARRAY {
     IAAP_CLI(ch_input)

From f8b7b229e591d88ca7bb649b760c5ed91d4c6307 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Fri, 16 Aug 2024 11:05:26 -0400
Subject: [PATCH 24/26] polish

---
 modules/{utility_modules => ascat}/ascat_annotation.nf | 0
 modules/{r/ASCAT.nf => ascat/ascat_run.nf}             | 0
 workflows/cnv_array.nf                                 | 4 ++--
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename modules/{utility_modules => ascat}/ascat_annotation.nf (100%)
 rename modules/{r/ASCAT.nf => ascat/ascat_run.nf} (100%)

diff --git a/modules/utility_modules/ascat_annotation.nf b/modules/ascat/ascat_annotation.nf
similarity index 100%
rename from modules/utility_modules/ascat_annotation.nf
rename to modules/ascat/ascat_annotation.nf
diff --git a/modules/r/ASCAT.nf b/modules/ascat/ascat_run.nf
similarity index 100%
rename from modules/r/ASCAT.nf
rename to modules/ascat/ascat_run.nf
diff --git a/workflows/cnv_array.nf b/workflows/cnv_array.nf
index 6cf5f17..449ae25 100644
--- a/workflows/cnv_array.nf
+++ b/workflows/cnv_array.nf
@@ -8,8 +8,8 @@ include {extract_csv} from "${projectDir}/bin/shared/extract_cnv_array_csv.nf"
 include {IAAP_CLI} from "${projectDir}/modules/illumina/iaap_cli.nf"
 include {BCFTOOLS_GTC2VCF} from "${projectDir}/modules/bcftools/bcftools_gtct2vcf.nf"
 include {BCFTOOLS_QUERY_ASCAT} from "${projectDir}/modules/bcftools/bcftools_query_ascat.nf"
-include {ASCAT} from "${projectDir}/modules/r/ASCAT.nf"
-include {ASCAT_ANNOTATION} from "${projectDir}/modules/utility_modules/ascat_annotation.nf"
+include {ASCAT} from "${projectDir}/modules/ascat/ascat_run.nf"
+include {ASCAT_ANNOTATION} from "${projectDir}/modules/ascat/ascat_annotation.nf"
 
 
 // Help if needed

From b8327f7aab464729e4e6b65d819cb54b506608e7 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Fri, 16 Aug 2024 12:00:26 -0400
Subject: [PATCH 25/26] polish

---
 bin/log/cnv_array.nf | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/bin/log/cnv_array.nf b/bin/log/cnv_array.nf
index 4213e53..e62d013 100644
--- a/bin/log/cnv_array.nf
+++ b/bin/log/cnv_array.nf
@@ -29,10 +29,14 @@ def param_log() {
     --egt_file                  ${params.egt_file}
     --gtc_csv                   ${params.gtc_csv}
     --ref_fa                    ${params.ref_fa}
+    --snp_platform              ${params.snp_platform}
+    --gc_file                   ${params.gc_file}
+    --rt_file                   ${params.rt_file}
+    --chrArm                    ${params.chrArm}
+    --cnvGeneFile               ${params.cnvGeneFile}
     -w                          ${workDir}
-    --keep_intermediate         ${params.keep_intermediate ?: 'N/A'}
-    -c                          ${params.config ?: 'N/A'}
-    
+    -c                          ${params.config}
+
     Project Directory: ${projectDir}
     Command line call: 
     ${workflow.commandLine}

From 477709bae2e64154988b3349b72d2873062202d4 Mon Sep 17 00:00:00 2001
From: Mike Lloyd <mike.lloyd@jax.org>
Date: Fri, 16 Aug 2024 14:25:47 -0400
Subject: [PATCH 26/26] polish

---
 modules/bcftools/bcftools_gtct2vcf.nf | 7 +++----
 modules/illumina/iaap_cli.nf          | 4 ++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/modules/bcftools/bcftools_gtct2vcf.nf b/modules/bcftools/bcftools_gtct2vcf.nf
index a2c8155..8db2b94 100644
--- a/modules/bcftools/bcftools_gtct2vcf.nf
+++ b/modules/bcftools/bcftools_gtct2vcf.nf
@@ -8,15 +8,14 @@ process BCFTOOLS_GTC2VCF {
     errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
 
     container 'quay.io/jaxcompsci/gtc2vcf_with_tools:v2'
-    publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'bcftools' }", mode: 'copy'
+    publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'bcftools' }", pattern: "*.{vcf,tsv}", mode: 'copy'
 
     input:
     tuple val(sampleID), val(meta), path(gtc)
 
     output:
-    tuple val(sampleID), val(meta), path('*_convert.bcf'), path('*_convert.bcf.csi'), path('*_convert.vcf'), path('*_convert.tsv'), emit: gtc2vcf
+    tuple val(sampleID), val(meta), path('*_convert.bcf'), path('*_convert.bcf.csi'), path('*_convert.vcf'), path('*_convert_info.tsv'), emit: gtc2vcf
 
-    
     script:
     """
     bcftools +gtc2vcf --no-version -Ou \
@@ -25,7 +24,7 @@ process BCFTOOLS_GTC2VCF {
     --egt ${params.egt_file} \
     --gtcs ./ \
     --fasta-ref ${params.ref_fa} \
-    --extra ${sampleID}_convert.tsv | \
+    --extra ${sampleID}_convert_info.tsv | \
     bcftools sort -Ou -T ./bcftools. | \
     bcftools norm --no-version -Ob -c x -f ${params.ref_fa} | \
     tee ${sampleID}_convert.bcf | \
diff --git a/modules/illumina/iaap_cli.nf b/modules/illumina/iaap_cli.nf
index c1f46b7..c882e96 100644
--- a/modules/illumina/iaap_cli.nf
+++ b/modules/illumina/iaap_cli.nf
@@ -10,8 +10,8 @@ process IAAP_CLI {
   
   container 'quay.io/jaxcompsci/gtc2vcf_with_tools:v2'
 
-  publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'iaap_cli' }", pattern: "*.gtc", mode:'copy'
-  publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'iaap_cli' }", pattern: "*.log", mode:'copy'
+  publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'iaap_cli' }", pattern: "*.gtc", mode:'copy', enabled: params.keep_intermediate
+  publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'iaap_cli' }", pattern: "*.log", mode:'copy', enabled: params.keep_intermediate
 
   input:
   tuple val(sampleID), val(meta), path(red_idat), path(green_idat)