From 9d75700f839c1a12718265feaffd6cf641b8d5a1 Mon Sep 17 00:00:00 2001 From: Johannes Hausmann Date: Thu, 28 Sep 2023 13:10:18 +0200 Subject: [PATCH 01/10] Implemented lineage only mode Implemented generate reference only mode --- main.nf | 230 ++++++++++++++++++++++++++++-------------------- nextflow.config | 5 +- 2 files changed, 137 insertions(+), 98 deletions(-) diff --git a/main.nf b/main.nf index 830d7c6..9f65f8b 100755 --- a/main.nf +++ b/main.nf @@ -30,6 +30,7 @@ params.skip_bcftools = false params.skip_gatk = false params.skip_pangolin = false params.skip_normalization = false +params.lineage_mode = false // references params.reference = false @@ -39,7 +40,7 @@ params.gff = false //params.snpeff_config = false params.snpeff_organism = false params.primers = false -params.different_virus = false +params.reference_generate = false params.output = "." params.min_mapping_quality = 20 @@ -206,6 +207,11 @@ else if (params.input_vcfs_list != false || params.vcf != false) { .set { input_vcfs } } } +else if(params.reference_generate) { + genome = params.reference + annotation = gff + annotation_name = snpeff_organism +} else { log.error "missing some input data" exit 1 @@ -240,119 +246,149 @@ if (params.skip_bcftools && params.skip_gatk && params.skip_ivar && params.skip_ exit 1 } +// Lineage mode should only be used when running with assembly or VCF input +// For FASTQ input we can run pangolin as regular rule +if (params.lineage_mode && input_fastqs) { + log.error "lineage mode is only supported with fasta or VCF input" + exit 1 +} workflow { - if (params.reference) { - if (! skip_snpeff && gff) { - SNPEFF_DATABASE(reference, gff, snpeff_organism) - snpeff_data = SNPEFF_DATABASE.out.snpeff_data - snpeff_config = SNPEFF_DATABASE.out.snpeff_config - } - - } - if (input_fastqs) { - if (params.reference) { - BWA_INDEX(reference) - reference = BWA_INDEX.out.reference + if (params.lineage_mode) { + log.info "Running lineage mode. In this mode only Pangolin is executed on either fasta or VCF" + if (input_fastas) { + PANGOLIN_LINEAGE(input_fastas) } - if (library == "paired") { - READ_TRIMMING_PAIRED_END(input_fastqs) - ALIGNMENT_PAIRED_END(READ_TRIMMING_PAIRED_END.out[0], reference) - bam_files = ALIGNMENT_PAIRED_END.out + else if (input_vcfs) { + VARIANT_NORMALIZATION(input_vcfs, reference) + normalized_vcfs = VARIANT_NORMALIZATION.out + VCF2FASTA(normalized_vcfs, reference) + PANGOLIN_LINEAGE(VCF2FASTA.out) } - else { - READ_TRIMMING_SINGLE_END(input_fastqs) - ALIGNMENT_SINGLE_END(READ_TRIMMING_SINGLE_END.out[0], reference) - bam_files = ALIGNMENT_SINGLE_END.out + } + else if (params.reference_generate) { + log.info "Running genome/annotation generate mode. This step generates only bwa and snpEff databases..." + // Custom reference mode. This mode is added to prepare the reference just once when processing many samples + if (genome) { + BWA_INDEX(genome) + if (!skip_snpeff) { + SNPEFF_DATABASE(genome, gff, snpeff_organism) + } } - BAM_PREPROCESSING(bam_files, reference) - preprocessed_bams = BAM_PREPROCESSING.out.preprocessed_bams + } + else { + // Run genome and annotation steps for single sample + if (params.reference) { + if (! skip_snpeff && gff) { + SNPEFF_DATABASE(reference, gff, snpeff_organism) + snpeff_data = SNPEFF_DATABASE.out.snpeff_data + snpeff_config = SNPEFF_DATABASE.out.snpeff_config + } - if (primers) { - PRIMER_TRIMMING_IVAR(preprocessed_bams, primers) - preprocessed_bams = PRIMER_TRIMMING_IVAR.out.trimmed_bam } - COVERAGE_ANALYSIS(preprocessed_bams) - - // variant calling - vcfs_to_normalize = null - if (!params.skip_bcftools) { - VARIANT_CALLING_BCFTOOLS(preprocessed_bams, reference) - vcfs_to_normalize = vcfs_to_normalize == null? - VARIANT_CALLING_BCFTOOLS.out : vcfs_to_normalize.concat(VARIANT_CALLING_BCFTOOLS.out) + if (input_fastqs) { + if (params.reference) { + BWA_INDEX(reference) + reference = BWA_INDEX.out.reference + } + if (library == "paired") { + READ_TRIMMING_PAIRED_END(input_fastqs) + ALIGNMENT_PAIRED_END(READ_TRIMMING_PAIRED_END.out[0], reference) + bam_files = ALIGNMENT_PAIRED_END.out + } + else { + READ_TRIMMING_SINGLE_END(input_fastqs) + ALIGNMENT_SINGLE_END(READ_TRIMMING_SINGLE_END.out[0], reference) + bam_files = ALIGNMENT_SINGLE_END.out + } + BAM_PREPROCESSING(bam_files, reference) + preprocessed_bams = BAM_PREPROCESSING.out.preprocessed_bams + + if (primers) { + PRIMER_TRIMMING_IVAR(preprocessed_bams, primers) + preprocessed_bams = PRIMER_TRIMMING_IVAR.out.trimmed_bam + } + COVERAGE_ANALYSIS(preprocessed_bams) + + // variant calling + vcfs_to_normalize = null + if (!params.skip_bcftools) { + VARIANT_CALLING_BCFTOOLS(preprocessed_bams, reference) + vcfs_to_normalize = vcfs_to_normalize == null? + VARIANT_CALLING_BCFTOOLS.out : vcfs_to_normalize.concat(VARIANT_CALLING_BCFTOOLS.out) + } + if (!params.skip_lofreq) { + VARIANT_CALLING_LOFREQ(preprocessed_bams, reference) + vcfs_to_normalize = vcfs_to_normalize == null? + VARIANT_CALLING_LOFREQ.out : vcfs_to_normalize.concat(VARIANT_CALLING_LOFREQ.out) + } + if (!params.skip_gatk) { + VARIANT_CALLING_GATK(preprocessed_bams, reference) + vcfs_to_normalize = vcfs_to_normalize == null? + VARIANT_CALLING_GATK.out : vcfs_to_normalize.concat(VARIANT_CALLING_GATK.out) + } + if (!params.skip_ivar && gff) { + VARIANT_CALLING_IVAR(preprocessed_bams, reference, gff) + IVAR2VCF(VARIANT_CALLING_IVAR.out, reference) + vcfs_to_normalize = vcfs_to_normalize == null? + IVAR2VCF.out : vcfs_to_normalize.concat(IVAR2VCF.out) + } } - if (!params.skip_lofreq) { - VARIANT_CALLING_LOFREQ(preprocessed_bams, reference) - vcfs_to_normalize = vcfs_to_normalize == null? - VARIANT_CALLING_LOFREQ.out : vcfs_to_normalize.concat(VARIANT_CALLING_LOFREQ.out) + else if (input_fastas) { + if (!params.skip_pangolin) { + // pangolin from fasta + PANGOLIN_LINEAGE(input_fastas) + } + // assembly variant calling + VARIANT_CALLING_ASSEMBLY(input_fastas, reference) + vcfs_to_normalize = VARIANT_CALLING_ASSEMBLY.out } - if (!params.skip_gatk) { - VARIANT_CALLING_GATK(preprocessed_bams, reference) - vcfs_to_normalize = vcfs_to_normalize == null? - VARIANT_CALLING_GATK.out : vcfs_to_normalize.concat(VARIANT_CALLING_GATK.out) + else if (input_vcfs) { + vcfs_to_normalize = input_vcfs } - if (!params.skip_ivar && gff) { - VARIANT_CALLING_IVAR(preprocessed_bams, reference, gff) - IVAR2VCF(VARIANT_CALLING_IVAR.out, reference) - vcfs_to_normalize = vcfs_to_normalize == null? - IVAR2VCF.out : vcfs_to_normalize.concat(IVAR2VCF.out) + + if (! params.skip_normalization) { + VARIANT_NORMALIZATION(vcfs_to_normalize, reference) + normalized_vcfs = VARIANT_NORMALIZATION.out } - } - else if (input_fastas) { - if (!params.skip_pangolin) { - // pangolin from fasta - PANGOLIN_LINEAGE(input_fastas) + else { + normalized_vcfs = vcfs_to_normalize } - // assembly variant calling - VARIANT_CALLING_ASSEMBLY(input_fastas, reference) - vcfs_to_normalize = VARIANT_CALLING_ASSEMBLY.out - } - else if (input_vcfs) { - vcfs_to_normalize = input_vcfs - } - - if (! params.skip_normalization) { - VARIANT_NORMALIZATION(vcfs_to_normalize, reference) - normalized_vcfs = VARIANT_NORMALIZATION.out - } - else { - normalized_vcfs = vcfs_to_normalize - } - - if (input_fastqs || input_vcfs) { - // pangolin from VCF on the normalized VCFs - if (!params.skip_pangolin) { - VCF2FASTA(normalized_vcfs, reference) - PANGOLIN_LINEAGE(VCF2FASTA.out) + if (input_fastqs || input_vcfs) { + // pangolin from VCF on the normalized VCFs + if (!params.skip_pangolin) { + VCF2FASTA(normalized_vcfs, reference) + PANGOLIN_LINEAGE(VCF2FASTA.out) + } } - } - if (! skip_sarscov2_annotations) { - // only optionally add SARS-CoV-2 specific annotations - VARIANT_SARSCOV2_ANNOTATION(normalized_vcfs) - normalized_vcfs = VARIANT_SARSCOV2_ANNOTATION.out.annotated_vcfs - } + if (! skip_sarscov2_annotations) { + // only optionally add SARS-CoV-2 specific annotations + VARIANT_SARSCOV2_ANNOTATION(normalized_vcfs) + normalized_vcfs = VARIANT_SARSCOV2_ANNOTATION.out.annotated_vcfs + } - if (preprocessed_bams) { - // we can only add technical annotations when we have the reads - VAFATOR(normalized_vcfs.combine(preprocessed_bams, by: 0)) - VARIANT_VAF_ANNOTATION(VAFATOR.out.annotated_vcf) - normalized_vcfs = VARIANT_VAF_ANNOTATION.out.vaf_annotated - } + if (preprocessed_bams) { + // we can only add technical annotations when we have the reads + VAFATOR(normalized_vcfs.combine(preprocessed_bams, by: 0)) + VARIANT_VAF_ANNOTATION(VAFATOR.out.annotated_vcf) + normalized_vcfs = VARIANT_VAF_ANNOTATION.out.vaf_annotated + } - // NOTE: phasing has to happen before SnpEff annotation for MNVs to be annotated correctly - if (gff) { - PHASING(normalized_vcfs, reference, gff) - normalized_vcfs = PHASING.out - } + // NOTE: phasing has to happen before SnpEff annotation for MNVs to be annotated correctly + if (gff) { + PHASING(normalized_vcfs, reference, gff) + normalized_vcfs = PHASING.out + } - if (! skip_snpeff) { - // only when configured we run SnpEff - VARIANT_ANNOTATION(normalized_vcfs, snpeff_data, snpeff_config, snpeff_organism) - normalized_vcfs = VARIANT_ANNOTATION.out.annotated_vcfs - } - else { - BGZIP(normalized_vcfs) + if (! skip_snpeff) { + // only when configured we run SnpEff + VARIANT_ANNOTATION(normalized_vcfs, snpeff_data, snpeff_config, snpeff_organism) + normalized_vcfs = VARIANT_ANNOTATION.out.annotated_vcfs + } + else { + BGZIP(normalized_vcfs) + } } } diff --git a/nextflow.config b/nextflow.config index 6ed6ce9..1e59245 100755 --- a/nextflow.config +++ b/nextflow.config @@ -29,7 +29,10 @@ params.pfam_names_header = "$baseDir/reference/pfam_names.header.txt" params.pfam_descriptions_header = "$baseDir/reference/pfam_descriptions.header.txt" profiles { - conda { params.enable_conda = true } + conda { + params.enable_conda = true + conda.enabled = true + } debug { process.beforeScript = 'echo $HOSTNAME' } test { params.cpus = 1 From 27386e74361acba6a93fc89fdcda90ec779c5c40 Mon Sep 17 00:00:00 2001 From: Johannes Hausmann Date: Thu, 5 Oct 2023 16:37:56 +0200 Subject: [PATCH 02/10] Updated pangolin task to accept vcf with caller == input Updated main.nf to respect parameter params.skip_normalization Added integration tests for lineage mode and reference generate mode --- main.nf | 9 +++++++-- modules/00_prepare_annotation.nf | 5 +++++ modules/07_lineage_annotation.nf | 3 ++- tests/scripts/test_14.sh | 32 ++++++++++++++++++++++++++++++++ tests/scripts/test_15.sh | 21 +++++++++++++++++++++ tests/scripts/test_16.sh | 16 ++++++++++++++++ 6 files changed, 83 insertions(+), 3 deletions(-) create mode 100755 tests/scripts/test_14.sh create mode 100755 tests/scripts/test_15.sh create mode 100755 tests/scripts/test_16.sh diff --git a/main.nf b/main.nf index 9f65f8b..c3cbf6a 100755 --- a/main.nf +++ b/main.nf @@ -260,8 +260,13 @@ workflow { PANGOLIN_LINEAGE(input_fastas) } else if (input_vcfs) { - VARIANT_NORMALIZATION(input_vcfs, reference) - normalized_vcfs = VARIANT_NORMALIZATION.out + if (! params.skip_normalization) { + VARIANT_NORMALIZATION(input_vcfs, reference) + normalized_vcfs = VARIANT_NORMALIZATION.out + } + else { + normalized_vcfs = input_vcfs + } VCF2FASTA(normalized_vcfs, reference) PANGOLIN_LINEAGE(VCF2FASTA.out) } diff --git a/modules/00_prepare_annotation.nf b/modules/00_prepare_annotation.nf index f8f0ce8..02a3c9f 100755 --- a/modules/00_prepare_annotation.nf +++ b/modules/00_prepare_annotation.nf @@ -18,6 +18,11 @@ process BWA_INDEX { path("reference/sequences.fa"), emit: reference path("reference/sequences.fa.fai"), emit: fai path("reference/sequences.dict"), emit: gatk_dict + path("reference/sequences.fa.0123") + path("reference/sequences.fa.amb") + path("reference/sequences.fa.ann") + path("reference/sequences.fa.bwt.2bit.64") + path("reference/sequences.fa.pac") script: memory = "${params.memory}".replaceAll(" ", "").toLowerCase() diff --git a/modules/07_lineage_annotation.nf b/modules/07_lineage_annotation.nf index 62e072c..662693c 100644 --- a/modules/07_lineage_annotation.nf +++ b/modules/07_lineage_annotation.nf @@ -19,7 +19,8 @@ process PANGOLIN_LINEAGE { when: // only runs pangolin on LoFreq and the assembly results - caller == "lofreq" || caller == "assembly" + // JoHa: added input to run lineage mode for vcf input + caller == "lofreq" || caller == "assembly" || caller == "input" shell: """ diff --git a/tests/scripts/test_14.sh b/tests/scripts/test_14.sh new file mode 100755 index 0000000..5c49af4 --- /dev/null +++ b/tests/scripts/test_14.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +################################################################################## +# Genome generate +################################################################################## +echo "Running CoVigator pipeline test 14" +source bin/assert.sh +output=tests/output/test14 +nextflow main.nf -profile conda --name test_data \ + --output $output \ + --reference_generate \ + --reference reference/Sars_cov_2.ASM985889v3.dna.toplevel.fa \ + --gff reference/Sars_cov_2.ASM985889v3.101.gff3 \ + --snpeff_organism Sars_cov_2 + +# Test reference genome related output +test -s $output/reference/sequences.fa.fai || { echo "Missing fasta index file!"; exit 1; } +test -s $output/reference/sequences.fa.dict || { echo "Missing GATK dict file!"; exit 1; } + +# Test bwa index files are present +test -s $output/reference/sequences.fa.0123 || { echo "Missing bwa 0123 index file!"; exit 1; } +test -s $output/reference/sequences.fa.amb || { echo "Missing bwa amb index file!"; exit 1; } +test -s $output/reference/sequences.fa.ann || { echo "Missing bwa ann index file!"; exit 1; } +test -s $output/reference/sequences.fa.bwt.2bit.64 || { echo "Missing bwa bwt.2bit.64 index file!"; exit 1; } +test -s $output/reference/sequences.fa.pac || { echo "Missing bwa pac index file!"; exit 1; } + + +# Test snpEff output +test -s $output/snpeff/snpEff.config || { echo "Missing snpEff config file!"; exit 1; } +test -s $output/snpeff/Sars_cov_2/snpEffectPredictor.bin || { echo "Missing snpEff predictor bin file!"; exit 1; } +test -s $output/snpeff/Sars_cov_2/sequences.fa || { echo "Missing snpEff reference genome!"; exit 1; } +test -s $output/snpeff/Sars_cov_2/genes.gff || { echo "Missing snpEff reference annotation!"; exit 1; } diff --git a/tests/scripts/test_15.sh b/tests/scripts/test_15.sh new file mode 100755 index 0000000..2bb23a4 --- /dev/null +++ b/tests/scripts/test_15.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +################################################################################## +# Lineage only mode +################################################################################## + +echo "Running CoVigator pipeline test 15" +source bin/assert.sh +output=tests/output/test15 +nextflow main.nf -profile conda --name test_data \ + --output $output \ + --vcf tests/test_data/test_data.lofreq.vcf \ + --lineage_mode + +test -s $output/test_data.input.fasta || { echo "Missing VCF2FASTA fasta file (lineage mode with vcf input)!"; exit 1; } +test -s $output/test_data.input.pangolin.csv || { echo "Missing pangolin output file (lineage mode with vcf input)!"; exit 1; } +assert_eq `wc -l $output/test_data.input.pangolin.csv` 2 "Wrong number of pangolin results" + + + + \ No newline at end of file diff --git a/tests/scripts/test_16.sh b/tests/scripts/test_16.sh new file mode 100755 index 0000000..e987eb1 --- /dev/null +++ b/tests/scripts/test_16.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +################################################################################## +# Lineage only mode +################################################################################## + +echo "Running CoVigator pipeline test 16" +source bin/assert.sh +output=tests/output/test15 +nextflow main.nf -profile conda --name test_data \ + --output $output \ + --fasta tests/test_data/test_data.fasta \ + --lineage_mode + +test -s $output/test_data.assembly.pangolin.csv || { echo "Missing pangolin output file (lineage mode with vcf input)!"; exit 1; } +assert_eq `wc -l $output/test_data.assembly.pangolin.csv` 2 "Wrong number of pangolin results" \ No newline at end of file From 3fa96e27e3ad20175b23b6b5508dc697a5bc0dd2 Mon Sep 17 00:00:00 2001 From: Jo Hausmann <35309108+johausmann@users.noreply.github.com> Date: Thu, 5 Oct 2023 16:58:56 +0200 Subject: [PATCH 03/10] Update Makefile Added tests for lineage and reference only modes --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index a10ec03..5e054f7 100755 --- a/Makefile +++ b/Makefile @@ -22,4 +22,7 @@ test: bash tests/scripts/test_11.sh bash tests/scripts/test_12.sh bash tests/scripts/test_13.sh + bash tests/scripts/test_14.sh + bash tests/scripts/test_15.sh + bash tests/scripts/test_16.sh bash tests/scripts/test_python_unit_tests.sh From 7b0482ea0c4961c63976c7741d25ffa5ecff440e Mon Sep 17 00:00:00 2001 From: Johannes Hausmann Date: Thu, 5 Oct 2023 17:04:14 +0200 Subject: [PATCH 04/10] Replace whitespace with tab in Makefile --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 5e054f7..4589a34 100755 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ test: bash tests/scripts/test_11.sh bash tests/scripts/test_12.sh bash tests/scripts/test_13.sh - bash tests/scripts/test_14.sh - bash tests/scripts/test_15.sh - bash tests/scripts/test_16.sh + bash tests/scripts/test_14.sh + bash tests/scripts/test_15.sh + bash tests/scripts/test_16.sh bash tests/scripts/test_python_unit_tests.sh From 5e906016d86b70455e708a37d403e8d84b68a9f5 Mon Sep 17 00:00:00 2001 From: Johannes Hausmann Date: Thu, 5 Oct 2023 17:20:37 +0200 Subject: [PATCH 05/10] Updated path in test_14.sh --- tests/scripts/test_14.sh | 4 ++-- tests/scripts/test_15.sh | 2 +- tests/scripts/test_16.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/scripts/test_14.sh b/tests/scripts/test_14.sh index 5c49af4..4e0b050 100755 --- a/tests/scripts/test_14.sh +++ b/tests/scripts/test_14.sh @@ -9,8 +9,8 @@ output=tests/output/test14 nextflow main.nf -profile conda --name test_data \ --output $output \ --reference_generate \ - --reference reference/Sars_cov_2.ASM985889v3.dna.toplevel.fa \ - --gff reference/Sars_cov_2.ASM985889v3.101.gff3 \ + --reference $(pwd)/reference/Sars_cov_2.ASM985889v3.dna.toplevel.fa \ + --gff $(pwd)/reference/Sars_cov_2.ASM985889v3.101.gff3 \ --snpeff_organism Sars_cov_2 # Test reference genome related output diff --git a/tests/scripts/test_15.sh b/tests/scripts/test_15.sh index 2bb23a4..0b58526 100755 --- a/tests/scripts/test_15.sh +++ b/tests/scripts/test_15.sh @@ -9,7 +9,7 @@ source bin/assert.sh output=tests/output/test15 nextflow main.nf -profile conda --name test_data \ --output $output \ - --vcf tests/test_data/test_data.lofreq.vcf \ + --vcf $(pwd)/tests/test_data/test_data.lofreq.vcf \ --lineage_mode test -s $output/test_data.input.fasta || { echo "Missing VCF2FASTA fasta file (lineage mode with vcf input)!"; exit 1; } diff --git a/tests/scripts/test_16.sh b/tests/scripts/test_16.sh index e987eb1..b97b377 100755 --- a/tests/scripts/test_16.sh +++ b/tests/scripts/test_16.sh @@ -9,7 +9,7 @@ source bin/assert.sh output=tests/output/test15 nextflow main.nf -profile conda --name test_data \ --output $output \ - --fasta tests/test_data/test_data.fasta \ + --fasta $(pwd)/tests/test_data/test_data.fasta \ --lineage_mode test -s $output/test_data.assembly.pangolin.csv || { echo "Missing pangolin output file (lineage mode with vcf input)!"; exit 1; } From ea09b0b89bd1b19ec45a17868c73ef7c48b6f6ce Mon Sep 17 00:00:00 2001 From: Jo Hausmann <35309108+johausmann@users.noreply.github.com> Date: Tue, 10 Oct 2023 09:49:22 +0200 Subject: [PATCH 06/10] Update test_15.sh Make CodeFactor happy --- tests/scripts/test_15.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/scripts/test_15.sh b/tests/scripts/test_15.sh index 0b58526..bfa1639 100755 --- a/tests/scripts/test_15.sh +++ b/tests/scripts/test_15.sh @@ -14,8 +14,8 @@ nextflow main.nf -profile conda --name test_data \ test -s $output/test_data.input.fasta || { echo "Missing VCF2FASTA fasta file (lineage mode with vcf input)!"; exit 1; } test -s $output/test_data.input.pangolin.csv || { echo "Missing pangolin output file (lineage mode with vcf input)!"; exit 1; } -assert_eq `wc -l $output/test_data.input.pangolin.csv` 2 "Wrong number of pangolin results" +assert_eq $(wc -l $output/test_data.input.pangolin.csv) 2 "Wrong number of pangolin results" - \ No newline at end of file + From d41a5f082482221bdcbde7b22e66d0425bfe067f Mon Sep 17 00:00:00 2001 From: Jo Hausmann <35309108+johausmann@users.noreply.github.com> Date: Tue, 10 Oct 2023 09:49:55 +0200 Subject: [PATCH 07/10] Update test_16.sh Make CodeFactor happy --- tests/scripts/test_16.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/test_16.sh b/tests/scripts/test_16.sh index b97b377..921852c 100755 --- a/tests/scripts/test_16.sh +++ b/tests/scripts/test_16.sh @@ -13,4 +13,4 @@ nextflow main.nf -profile conda --name test_data \ --lineage_mode test -s $output/test_data.assembly.pangolin.csv || { echo "Missing pangolin output file (lineage mode with vcf input)!"; exit 1; } -assert_eq `wc -l $output/test_data.assembly.pangolin.csv` 2 "Wrong number of pangolin results" \ No newline at end of file +assert_eq $(wc -l $output/test_data.assembly.pangolin.csv) 2 "Wrong number of pangolin results" From 07b3c5210b7395b7ae0dcff07715b9888d99492a Mon Sep 17 00:00:00 2001 From: Jo Hausmann <35309108+johausmann@users.noreply.github.com> Date: Tue, 10 Oct 2023 09:59:13 +0200 Subject: [PATCH 08/10] Update test_14.sh Updated test for GATK dict file --- tests/scripts/test_14.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/test_14.sh b/tests/scripts/test_14.sh index 4e0b050..c2cb317 100755 --- a/tests/scripts/test_14.sh +++ b/tests/scripts/test_14.sh @@ -15,7 +15,7 @@ nextflow main.nf -profile conda --name test_data \ # Test reference genome related output test -s $output/reference/sequences.fa.fai || { echo "Missing fasta index file!"; exit 1; } -test -s $output/reference/sequences.fa.dict || { echo "Missing GATK dict file!"; exit 1; } +test -s $output/reference/sequences.dict || { echo "Missing GATK dict file!"; exit 1; } # Test bwa index files are present test -s $output/reference/sequences.fa.0123 || { echo "Missing bwa 0123 index file!"; exit 1; } From ce941c12173ca45454e542e1461539812e900ab1 Mon Sep 17 00:00:00 2001 From: Johannes Hausmann Date: Tue, 10 Oct 2023 11:23:29 +0200 Subject: [PATCH 09/10] Updated lineage test files and added new VCF file as lofreq VCF gives bcftools errors --- .../test_data.assembly.lineage_mode.vcf.gz | Bin 0 -> 5240 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/test_data/test_data.assembly.lineage_mode.vcf.gz diff --git a/tests/test_data/test_data.assembly.lineage_mode.vcf.gz b/tests/test_data/test_data.assembly.lineage_mode.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..d0cfaae44bd8cbcab624e00c995345453cb9833c GIT binary patch literal 5240 zcmV-;6o=~{iwFb&00000{{{d;LjnNI1l3qubJ{o*ex|?T40%X98yQ1-NwUk%xHdG* z0A_8%_RS;gW231pc_o=P9e(_d%%#w#4ZUTzFU&c|M>?19lR5IqNy>6eg)FG<&fNa; zsbxPpIq46E(_Ya1W8ifsP8gnfsA9=V%>?f@ojiw>Pc zDra-jw%QjT`?;bhC{MaZ211G|*No;l<}6k~D8B_l;swTUh{O~S2uZ2ZwANa&NU#-) zCCjK5lGs+;viEeJDN?c$bH=eHF-3_y##TtBiej;hI+hcTE-#*4TwE^Ct%Orci!#S0 z=2l995RoTDVoWs6V6Tfneg}_g?Di#t5KHKEplx4uF0Y=op<{Q>@2OE1B&kZkcQn>O z1o*F?pI_K^+cKyY^ZeE_jx=ZQF|n3$dQ7|>+O9tiqwCQq3Y{Q~Tz?jIK0>{p3#t!6 zA61$f2@))GtR)o-iJ&Eo-@u$IOu!gf##zc@$~7RDEM64Ib?d__kpdT7F`nh91qYJ^ zam9>upxDGv@OYthsjgbBjOj%+w_;JWP;#YnI&ZmxE0ew}W={@TML8FmYCMKnTil8A5$jC8kw9;&g)3y+Dk|-5OW)*e%`vgE_dv)3X@ZiNr9UOmdVfp5(?DW#IQ`LC z-~3GdvpYy&=v?Wt(nQ~u2=DJ@-Df%X;)|Sn*>nBbAh-$dXWh9Cn_?gqT%la9NvI4` z3UZ{9V*;rZ1?UArNaY+&&t#=Q;Xjq>5zw4v+|clQdG&By*)Qb5cym|KV>z?~^%EA6 zZ=uPo7fgFE183SZ-2CxGS)$Z9mj**u%zkOA%yy-?V)kHMxijifWI2TPyXZ3MJELgq zjCxywSf8I-CMhi-5d~!&I4uyU0*R%N8WUg~dc1ze!5&HviENqDA~JcYAB)}IzF$IT z*U9h4!|R1^FqljS{`f0lNj^OIKccgj-p>H(VEcQq+UxQ$5QV(#rRjDqYCf}UgvaxW z<%#iV6Z@Rlmta4?vfEG2V15e|I?n~TjAtUJoHciQxJA-!u0;UCMX0G(Mj(lJ%axW$ zi(0M9J%w?NGDFVdttthtmoHeVud}6s&&=FyyM4zXQZCHES&zQWeFi$?V@TJpcx)tk zK&UG?bwqBJA<3SJ?{Xl&ji8+qWt3i?QOB{4FQ_tg5?R8qdqA$rNg z#v`Eo4Xoh$)hx8)tx&=WVsGqNrBKjXi#hU4FJJ|(7jEoE+L@iLx3lN#F{P>bTU$Xd z2>c*~7X0l59QyrZPOZd*#pZ10LMb!Tr<;}nrYXi+K`JEZ1yi5i z5>M~$-8yO{(n#+*r6R_o47u@~pp230j~!?o50lfwI>saKtLiX!666Y@z=Tm_Nie3j z|7|(-xo=GTurcr&L9gF%hSSDhH_ouJ{tnPEzuSczn&>1|(>Bhsmx-;l%Ma7bPDHBlPnA?s0u-eE87#l6Gcbub+E!mIu; zIlH_}hQVN%gjY8SeftIuDMu!L+iZMy5?GTm3g5=Dw2G@Cty~K(QJ9UnB1>SJP7*vmnYkw zD!w|oosTE8&jyab+#Ujw;P=SfAHGL9Fo5KU0@Y+gLU|;K=EnRP4EpE&DB1Xm%ResS z^XrR~WEfvvC*jrcRe!jt2q8)Duov%Cgru6v8by#=4U|9lGbjQHk26IKf$@Ps!X5uo z0>+=O3Mw_(Jq*bqmlSxNDz;l63<=9E_R5WN%ERAdbAK2sr02#y7rZt^h4lI87-6yj zx+nMFDhtAr{!_Xx`}LszW|c1PCbQABx*Nx4`E53zeY=}~S(Xa9xcYM+x{#9Qx;WJo zPmO(cx>$>v;`4QZps;t?i+aJnyK&&P8#4_$`aLlBhY>I$hsIVEz&1qN)&d@*&8A|4 z9H4$G-fG^}t%rXcCzns`)yv83^E9>XquDqeAKcHU-~Jx_G1y zozuTygvwK6E1Z>Ci?*!=eWbYCoBSc@NB%m#2Cj3*@rDu3Zj7yP=+~lcYk{7#HXVP5 znSVnLxdsideiTqqjDnaEP0o$2DCgQzv~4X^p@e8Kb#%CS)W$M6R55f(H=;74h zRWpKsvrm-3sj(GKGi%YdJzq0G2-W-d?0+uBNP%w1P|nYdt#FE2i?*$eVnC}#VUrolNWtxPHJXBKId=N5Hr3!zuh-NI;!!ct3$HrDTJ*-9B)Lp;I35`}YnJD)An#n;hl z@-D1XX8z+yRfffD$I z$!f|S<+W^E+bBb2v%;>B?4gj9idvOywLUT~2IIkX0+o7rGoCEc+f_O~*jCC$xTDyu za537|{63k^mn(C29-Zw)4_reYoc#rrz(3RftbX}_lU0|>Sj+Y!EtJ8L!~S>V_M;4+ z69oq)@+Ee_iJ*bWYMeecWZT+C9};l?n|u`(t4qarc4s+a5kenw;N;X~HKlIWvTbdp z8>aTZkRv~fQY2d|MO~PzX04P`wykZHqM6Q|+;D%MRhMcAJ*z=TjDHWUhs7!7vB_#m zHLPXZ+CmM2A#I_CrXE&K(4W~CM9LiBN6xtZXtElIZ$q}NE$~SM;?+UDu`XuvuQZQD zRP^n?|KC5dE}h3L+y||LntpFp{3$~M7ORDiTmt`SvKpt14VkxdP^IEcOiE`yQCGU~ zY+5AOg=x9uN?6lkYf5#jW!u_99SQ{7yWR99WxK0$~d#p(9PL~L-r5!7B zLpN*Lwzkj>2vGZpjqkGb!!2I45ieTJ%cT;>pwA5#M@&&#mY|R9c8oDwzd6emO>q#~ zKXJ4#1mnqa@#TKU&U>dcBqUPX?RNL8*&$9b38>v_pUjp+BOzzwN{W??6p($d@AEu& zs0!A<`#wYc&2;kb^k8{!|NcDvb~|^|+Ylb)k%qo{9z~V=;Wvr}hN5yj3qi3;uv+=b ztExEV)j93J7j2~69rZwZY>@Jtaw!1PYTi2Q$Z;I(`7q_)S;tM2c{&EK6U=jXV$1&{ zAlkGYq}pkq5bRb}g$5`gWOw?S4bT#a_?!cc`49d)U7e{sqwNe(d@y#Il7n`FU9D7s z1_GJ46?;87x`%%5%XeTp$ef20P~^sNR&?Z207Ys*=Lwn7>joWp1C27wmw%wxiQy;< z&`yBr$ILok)+)!a~ECq_7nQ3@2QK{JrM!%Eiepjxs=ypr#mU&>T_ zvmlc5JiruDPH&9lg)>qLl+#MyTxPtZUbxE-q!KxQ#3}i{xpE~Edh35cfG0pejMHfp zO=in5bJ-ZvSz@+x)k=vmU5!WUtUHGL9rs?>3qY8RvIWC{p^Xp|=B!^Sv6`7(2yAcy zpp(_u;N9`Vu{g|z-6FkB?^p9hqXWhM`Wz6N`%4#0sPim`yx>|&OsHzE5`D%CyJj5w zV+_S51w$smW-dd?*hx%64P&p{dR;RPJu)sfy@yP)Yh#~F#?C&->gii3@mD`?#F%2) zkGbu~6dhwHG0AHft0wuAKPuzMBV$o)g)%7uW1maLg~Vj_JSQb@kQVtrI?qvHc?j$n z3GW;GTyQNVd+$*t*UUZyKBBp=&+bx+LGG6!QA1;&9akqIsrDZfux87qZtsS9jh2^k z$d^j*5v1+C92{jQA;@aViZ-|rt^MM(E*S9oOfaJ)zZk8LNZuR!T)-|RL{be~XC^rA zg`w}DOJ}CTpY33bCBT@Ch)1zq?`9i9i2T~vHV55Fh`a{6YOpD^hYWWQL2Hl&n0YUrDdhJ$4KVw1-;ihG5Vv-A@`|I zP+?%~v!h&kdkSkP+qp3Q%K>Q!S|os6X8ZHb0Xan_u8nOgp}VP3iE8K!b_eNRfgSM( zUg$hpcZ!+}jeRb6=UrxmCe^T;WJUg1t8tI^S+gQl7PiBFG}r@gzo7KG^rHlez3XeC;LO-($HPfNRoh|3u~}MW zSI6QrqfF}z^7lD8^l29Zb_slc7X1OS{_i$u) zXGtwS8v9%zEhT$VTm@2eSAoa-JskHcFe*WFx9fxEcxLQ#fw7b{$7+m1KmS%S@^cMv zO=O%8a4qaCqKTVDN=b{#S%kY}^mxB%cOCZ4nqz^T5@lJ=c{aWQP#hapCY!G%sL4C$ z2*S&&UcOEi^ZWT?GJ8n#xV?o8+T-2#OTcxOSg*u{=Z?ogd z*Tq8>OV?Kc6m*e#77L6GXqv%vrM-9jAyq}$3%ty%1rEJ0V=?EHr)CC#qd+P#zA^~+u>?r5*QxJJ~)^6MQ>u|-xiA}eY} zx+=0lDH#~9jz9@lP;w+-n<>k(6r&*Sg|W}G4&6|VcU)uEDOymFfMPajF32{m<1pVO zxG-R&ol2CYfC|)#Nbjy_e<m|8;gN$kfSJm{}V!3ny>cZIL zhm5)c)Dg06m>PJ49OkZb5Bo#)gkGy^7oqlE=+C3cEFG6OL#r*Cx)DuX7fdb=c&c9v zrAJH%SJ*x1?VhQqY;Xj>WzXFDMlBm;Zgg~1vZfFBzb6i)32`8G0hD|PwXgd5VHA4VMs9S!QKw*(PPELX zjpEm|8D&JFpB1*P+KqDSd+dazn$6eSiaReaCGRx)J<{(-2LcS=7qA;|xxAj)yAdG_8 yR>slJ4%3d%-~J08>GC#UhX4Q{iwFb&00000{{{d;LjnLB00RI3000000000=mKl8j literal 0 HcmV?d00001 From 16011f7ff32bda672de36c1c4df5ca31c34a3051 Mon Sep 17 00:00:00 2001 From: Johannes Hausmann Date: Tue, 10 Oct 2023 11:36:47 +0200 Subject: [PATCH 10/10] Last push did not include updated test scripts --- tests/scripts/test_15.sh | 7 ++++--- tests/scripts/test_16.sh | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/scripts/test_15.sh b/tests/scripts/test_15.sh index bfa1639..5bfa53e 100755 --- a/tests/scripts/test_15.sh +++ b/tests/scripts/test_15.sh @@ -9,12 +9,13 @@ source bin/assert.sh output=tests/output/test15 nextflow main.nf -profile conda --name test_data \ --output $output \ - --vcf $(pwd)/tests/test_data/test_data.lofreq.vcf \ - --lineage_mode + --vcf $(pwd)/tests/test_data/test_data.assembly.lineage_mode.vcf.gz \ + --lineage_mode \ + --skip_normalization test -s $output/test_data.input.fasta || { echo "Missing VCF2FASTA fasta file (lineage mode with vcf input)!"; exit 1; } test -s $output/test_data.input.pangolin.csv || { echo "Missing pangolin output file (lineage mode with vcf input)!"; exit 1; } -assert_eq $(wc -l $output/test_data.input.pangolin.csv) 2 "Wrong number of pangolin results" +assert_eq $(wc -l $output/test_data.input.pangolin.csv | cut -d ' ' -f1) 2 "Wrong number of pangolin results" diff --git a/tests/scripts/test_16.sh b/tests/scripts/test_16.sh index 921852c..b91c04d 100755 --- a/tests/scripts/test_16.sh +++ b/tests/scripts/test_16.sh @@ -10,7 +10,7 @@ output=tests/output/test15 nextflow main.nf -profile conda --name test_data \ --output $output \ --fasta $(pwd)/tests/test_data/test_data.fasta \ - --lineage_mode + --lineage_mode test -s $output/test_data.assembly.pangolin.csv || { echo "Missing pangolin output file (lineage mode with vcf input)!"; exit 1; } -assert_eq $(wc -l $output/test_data.assembly.pangolin.csv) 2 "Wrong number of pangolin results" +assert_eq $(wc -l $output/test_data.assembly.pangolin.csv | cut -d ' ' -f1) 2 "Wrong number of pangolin results"