Merge pull request #47 from rki-mf1/ci

New CI tests
rki-mf1 · May 6, 2024 · b947cdd · b947cdd
2 parents 921f7c3 + caebc78
commit b947cdd
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 32 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -37,8 +37,8 @@ jobs:
           nextflow -version
 
 
-  run-hap:
-    name: Run hap.nf
+  run-cievad:
+    name: Run cievad
     needs: build
     runs-on: "ubuntu-latest"
     defaults:
@@ -66,32 +66,17 @@ jobs:
         run: |
           nextflow run hap.nf -profile local,conda
 
-  run-eval:
-    name: Run eval.nf
-    needs: build
-    runs-on: "ubuntu-latest"
-    defaults:
-      run:
-        shell: bash -el {0}
-    steps:
-      - uses: actions/checkout@v4
-      - uses: conda-incubator/setup-miniconda@v3
-        with:
-          miniconda-version: "latest"
-          activate-environment: nextflow
-          environment-file: env/conda_nxf.yml
-          channels: conda-forge,bioconda,defaults
-          channel-priority: true
-          auto-activate-base: false
-
-      - name : Download reference
+      - name: Test callset evaluation with callset_dir
         run: |
-          wget https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3
-          sed 's/>ENA|MN908947|MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome./>MN908947.3/g' MN908947.3 > MN908947.3.fasta
-          mkdir -p reference/Sars-Cov-2/Wuhan-Hu-1/
-          mv MN908947.3.fasta reference/Sars-Cov-2/Wuhan-Hu-1/
+          nextflow run eval.nf -profile local,conda --callsets_dir aux/ci_data/
 
-      - name: Test callset evaluation
+      - name: Test callset evaluation with sample_sheet
         run: |
-          nextflow run eval.nf -profile local,conda --callsets_dir aux/ci_data/
+          cwd=$(pwd)
+          echo "index,truthset,callset" > my_samples.csv
+          echo "1,${cwd}/results/simulated_hap1.vcf,${cwd}/aux/ci_data/callset_1.vcf.gz" >> my_samples.csv
+          echo "2,${cwd}/results/simulated_hap2.vcf,${cwd}/aux/ci_data/callset_2.vcf.gz" >> my_samples.csv
+          echo "3,${cwd}/results/simulated_hap3.vcf,${cwd}/aux/ci_data/callset_3.vcf.gz" >> my_samples.csv
+          
+          nextflow run eval.nf -profile local,conda --sample_sheet my_samples.csv
 
diff --git a/README.md b/README.md
@@ -69,11 +69,13 @@ The minimal command to evaluate the accordance between a truthset (generated dat
 nextflow run eval.nf -profile local,conda --callsets_dir <path/to/callsets>
 ```
 where `--callsets_dir` is the parameter to specify a folder containing the callset VCF files.
-Currently, a callset within this folder has to follow the naming convention `callset_<X>.vcf[.gz]` where _\<X\>_ is the integer of the corresponding truthset. Alternatively, one can provide a sample sheet (","-delimited) with the columns "index", "callset" and truthset", where "index" is an iteration from 1 to n (number of samples) and "callset"/"truthset" are paths to the respectively matching callset VCF files. The command is
+Currently, a callset within this folder has to follow the naming convention `callset_<X>.vcf[.gz]` where _\<X\>_ is the integer of the corresponding truthset.
+Alternatively, one can provide a sample sheet of comma separated values (CSV file) with the columns "index", "truthset" and callset", where "index" is an integer from 1 to n (number of samples) and "callset"/"truthset" are paths to the pairwise matching VCF files.
+Callsets can optionally be _gzip_ compressed.
+The command for the sample sheet input is
 ```
 nextflow run eval.nf -profile local,conda --sample_sheet <path/to/sample_sheet>
 ```
-Note: Callsets can optionally be _gzip_ compressed.
 
 <details><summary>⚠️ Run commands from the root directory </summary>
 Without further ado, please run the commands from a terminal at the top folder (root directory) of this repository.

diff --git a/eval.nf b/eval.nf
@@ -13,13 +13,13 @@ workflow{
 
     if (params.callsets_dir != "" && params.sample_sheet == "") {
 
-        ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}")
+        ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}", checkIfExists: true)
         ch_callsets
             .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) }
             .set {ch_callsets}
         // ch_callsets.view()
 
-        ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf")
+        ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf", checkIfExists: true)
         ch_truthsets
             .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) }
             .set {ch_truthsets}
@@ -34,7 +34,7 @@ workflow{
         ch_variantsets_map = Channel
             .fromPath(params.sample_sheet, checkIfExists: true)
             .splitCsv(header: true, sep: ",")
-            .map {row -> [row["index"] as Integer, row["callset"], row["truthset"]]}
+            .map {row -> [row["index"] as Integer, row["truthset"], row["callset"]]}
             // .view()
 
     } else {