Skip to content

Commit

Permalink
update fastp and fix linting and testing issues (#5813)
Browse files Browse the repository at this point in the history
  • Loading branch information
bgruening authored Mar 5, 2024
1 parent d936ef5 commit 61d055a
Show file tree
Hide file tree
Showing 4 changed files with 442 additions and 40 deletions.
66 changes: 38 additions & 28 deletions tools/fastp/fastp.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<tool id="fastp" name="fastp" version="@WRAPPER_VERSION@+galaxy0">
<description>- fast all-in-one preprocessing for FASTQ files</description>
<tool id="fastp" name="fastp" version="@TOOL_VERSION@+galaxy0" profile="23.1">
<description>fast all-in-one preprocessing for FASTQ files</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="biotools" />
<requirements>
<requirement type="package" version="@WRAPPER_VERSION@">fastp</requirement>
<requirement type="package" version="@TOOL_VERSION@">fastp</requirement>
</requirements>
<version_command>fastp -v</version_command>
<command detect_errors="exit_code"><![CDATA[
Expand All @@ -25,19 +25,19 @@
#set $in2_name = re.sub('[^\w\-\s]', '_', str("%s_%s" % ($single_paired.paired_input.name, "R2"))) + $ext
#set out1 = $output_paired_coll.forward
#set out2 = $output_paired_coll.reverse
ln -s '$in1' '$in1_name' &&
ln -s '$in2' '$in2_name' &&
ln -sf '$in1' '$in1_name' &&
ln -sf '$in2' '$in2_name' &&
#else
#if $in1.is_of_type('fastq.gz')
#set ext = '.fastq.gz'
#end if
#set $in1_name = re.sub('[^\w\-\s]', '_', str($in1.element_identifier)) + $ext
ln -s '$in1' '$in1_name' &&
ln -sf '$in1' '$in1_name' &&
#if str($single_paired.single_paired_selector) == 'paired':
#set $in2_name = re.sub('[^\w\-\s]', '_', str("%s_R2" % $in2.element_identifier)) + $ext
ln -s '$in2' '$in2_name' &&
ln -sf '$in2' '$in2_name' &&
#end if
#end if
Expand Down Expand Up @@ -210,7 +210,6 @@ mv first${ext} '${out1}'
#end if
]]></command>
<inputs>

<conditional name="single_paired">
<param name="single_paired_selector" type="select" label="Single-end or paired reads">
<option value="single" selected="true">Single-end</option>
Expand All @@ -230,8 +229,8 @@ mv first${ext} '${out1}'
</expand>
<expand macro="global_trimming_options_paired" />
</when>
<when value="paired_collection">
<param name="paired_input" type="data_collection" collection_type="paired" format="fastq,fastq.gz" label="Select paired collection(s)"/>
<when value="paired_collection">
<param name="paired_input" type="data_collection" format="fastq,fastq.gz" label="Select paired collection(s)" collection_type="paired"/>
<expand macro="adapter_trimming_options">
<expand macro="adapter_sequence" read_number="2"/>
</expand>
Expand All @@ -256,7 +255,7 @@ mv first${ext} '${out1}'
<section name="length_filtering_options" title="Length filtering options" expanded="True">
<param name="disable_length_filtering" argument="-L" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Disable length filtering" help="Length filtering is enabled by default. If this option is specified, length filtering is disabled."/>
<param name="length_required" argument="-l" type="integer" optional="true" label="Length required" help="Reads shorter than this value will be discarded. Default is 15."/>
<param name="length_limit" argument="--length_limit" type="integer" optional="true" label="Maximum length" help="Reads longer than this value will be discarded. Default is 0 and means no limitation."/>
<param argument="--length_limit" type="integer" optional="true" label="Maximum length" help="Reads longer than this value will be discarded. Default is 0 and means no limitation."/>
</section>

<section name="low_complexity_filter" title="Low complexity filtering options" expanded="True">
Expand Down Expand Up @@ -288,17 +287,17 @@ mv first${ext} '${out1}'
<option value="-x">Enable polyX tail trimming</option>
</param>
<when value="-x">
<param name="poly_x_min_len" argument="--poly_x_min_len" type="integer" optional="true" label="PolyX minimum length"
help="The minimum length to detect polyX in the read tail. 10 by default."/>
<param argument="--poly_x_min_len" type="integer" optional="true" label="PolyX minimum length"
help="The minimum length to detect polyX in the read tail. 10 by default."/>
</when>
<when value="" />
</conditional>

<section name="umi_processing" title="UMI processing" expanded="True">
<param name="umi" argument="-U" type="boolean" truevalue="-U" falsevalue="" checked="false" label="Enable unique molecular identifer" help="Enable unique molecular identifer (UMI) preprocessing."/>
<param name="umi_loc" argument="--umi_loc" type="text" optional="true" label="UMI location" help="Specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none."/>
<param name="umi_len" argument="--umi_len" type="integer" optional="true" label="UMI length" help="If the UMI is in read1/read2, its length should be provided."/>
<param name="umi_prefix" argument="--umi_prefix" type="text" optional="true" label="UMI prefix" help="If specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default."/>
<param argument="--umi_loc" type="text" optional="true" label="UMI location" help="Specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none."/>
<param argument="--umi_len" type="integer" optional="true" label="UMI length" help="If the UMI is in read1/read2, its length should be provided."/>
<param argument="--umi_prefix" type="text" optional="true" label="UMI prefix" help="If specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default."/>
</section>

<section name="cutting_by_quality_options" title="Per read cutting by quality options" expanded="True">
Expand Down Expand Up @@ -338,7 +337,7 @@ mv first${ext} '${out1}'
</outputs>

<tests>
<!-- Ensure default output works -->
<!-- 1. Ensure default output works -->
<test expect_num_outputs="2">
<param name="in1" ftype="fastqsanger" value="R1.fq"/>
<param name="single_paired_selector" value="single"/>
Expand All @@ -349,7 +348,7 @@ mv first${ext} '${out1}'
</assert_contents>
</output>
</test>
<!-- Ensure paired collection works -->
<!-- 2. Ensure paired collection works -->
<test expect_num_outputs="4">
<param name="single_paired_selector" value="paired_collection"/>
<param name="paired_input">
Expand All @@ -368,14 +367,14 @@ mv first${ext} '${out1}'
<element name="reverse" value="out_bwa2.fq" ftype="fastqsanger"/>
</output_collection>
</test>
<!-- Ensure custom adapter works -->
<!-- 3. Ensure custom adapter works -->
<test expect_num_outputs="2">
<param name="in1" ftype="fastq" value="R1.fq"/>
<param name="single_paired_selector" value="single"/>
<param name="adapter_sequence1" value="ATCG"/>
<output name="out1" ftype="fastq" file="out_a.fq"/>
</test>
<!-- Ensure UMI processing works -->
<!-- 4. Ensure UMI processing works -->
<test expect_num_outputs="2">
<param name="in1" ftype="fastq" value="R1.fq"/>
<param name="single_paired_selector" value="single"/>
Expand All @@ -386,7 +385,7 @@ mv first${ext} '${out1}'
</section>
<output name="out1" ftype="fastq" file="out2.fq"/>
</test>
<!-- Ensure UMI processing with different lengths works -->
<!-- 5. Ensure UMI processing with different lengths works -->
<test expect_num_outputs="2">
<param name="in1" ftype="fastq" value="R1.fq"/>
<param name="single_paired_selector" value="single"/>
Expand All @@ -397,15 +396,15 @@ mv first${ext} '${out1}'
</section>
<output name="out1" ftype="fastq" file="out3.fq"/>
</test>
<!-- Ensure paired-end fastq works -->
<!-- 6. Ensure paired-end fastq works -->
<test expect_num_outputs="3">
<param name="in1" ftype="fastq" value="bwa-mem-fastq1.fq"/>
<param name="in2" ftype="fastq" value="bwa-mem-fastq2.fq"/>
<param name="single_paired_selector" value="paired"/>
<output name="out1" ftype="fastq" file="out_bwa1.fq"/>
<output name="out2" ftype="fastq" file="out_bwa2.fq"/>
</test>
<!-- Ensure paired-end UMI processing of Read 1 works -->
<!-- 7. Ensure paired-end UMI processing of Read 1 works -->
<test expect_num_outputs="3">
<param name="in1" ftype="fastq" value="bwa-mem-fastq1.fq"/>
<param name="in2" ftype="fastq" value="bwa-mem-fastq2.fq"/>
Expand All @@ -418,7 +417,7 @@ mv first${ext} '${out1}'
<output name="out1" ftype="fastq" file="out_bwa_umi_read1_1.fq"/>
<output name="out2" ftype="fastq" file="out_bwa_umi_read1_2.fq"/>
</test>
<!-- Ensure paired-end UMI processing of Read 2 works -->
<!-- 8. Ensure paired-end UMI processing of Read 2 works -->
<test expect_num_outputs="3">
<param name="in1" ftype="fastq" value="bwa-mem-fastq1.fq"/>
<param name="in2" ftype="fastq" value="bwa-mem-fastq2.fq"/>
Expand All @@ -431,7 +430,7 @@ mv first${ext} '${out1}'
<output name="out1" ftype="fastq" file="out_bwa_umi_read2_1.fq"/>
<output name="out2" ftype="fastq" file="out_bwa_umi_read2_2.fq"/>
</test>
<!-- Ensure JSON report output works -->
<!-- 9. Ensure JSON report output works -->
<test expect_num_outputs="2">
<param name="in1" ftype="fastqsanger" value="R1.fq"/>
<param name="single_paired_selector" value="single"/>
Expand All @@ -444,15 +443,15 @@ mv first${ext} '${out1}'
</assert_contents>
</output>
</test>
<!-- Ensure polyG trimming works -->
<!-- 10. Ensure polyG trimming works -->
<test expect_num_outputs="2">
<param name="in1" ftype="fastq.gz" value="R1.fq.gz"/>
<param name="single_paired_selector" value="single"/>
<param name="trimming_select" value="-g"/>
<param name="poly_g_min_len" value="10"/>
<output name="out1" ftype="fastq.gz" decompress="True" file="out1.fq.gz"/>
</test>
<!-- Ensure polyX trimming works -->
<!-- 11. Ensure polyX trimming works -->
<test expect_num_outputs="2">
<param name="in1" ftype="fastq.gz" value="R1.fq.gz"/>
<param name="single_paired_selector" value="single"/>
Expand All @@ -461,13 +460,24 @@ mv first${ext} '${out1}'
<param name="poly_x_min_len" value="10"/>
<output name="out1" ftype="fastq.gz" decompress="True" file="out1.fq.gz"/>
</test>
<!-- 12. Test fastq files with different length -->
<test expect_exit_code="255" expect_failure="true">
<param name="single_paired_selector" value="paired_collection"/>
<param name="paired_input">
<collection type="paired">
<element name="forward" value="bwa-mem-fastq1.fq" ftype="fastqsanger" />
<element name="reverse" value="bwa-mem-fastq2_too_long.fq" ftype="fastqsanger" />
</collection>
</param>
</test>
</tests>
<help><![CDATA[
.. class:: infomark
**What it does**
fastp_ is a tool designed to provide fast all-in-one preprocessing for FASTQ files. This tool is developed in C++ with multithreading supported to afford high performance.
fastp_ is a tool designed to provide fast all-in-one preprocessing for FASTQ files. This tool is developed in C++ with multithreading supported to
afford high performance.
*Features*
Expand Down
10 changes: 3 additions & 7 deletions tools/fastp/macros.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<macros>
<token name="@WRAPPER_VERSION@">0.23.2</token>
<token name="@TOOL_VERSION@">0.23.4</token>
<xml name="biotools">
<xrefs>
<xref type="bio.tools">
Expand All @@ -13,10 +13,6 @@
<expand macro="adapter_sequence" read_number="1" />
<yield />
</section>
<section name="global_trimming_options" title="Global Trimming Options" expanded="False">
<param name="trim_front1" argument="-f" type="integer" optional="true" label="Trim front for input 1" help="Trimming how many bases in front for read1, default is 0."/>
<param name="trim_tail1" argument="-t" type="integer" optional="true" label="Trim tail for input 1" help="Trimming how many bases in tail for read1, default is 0."/>
</section>
</xml>

<xml name="global_trimming_options">
Expand All @@ -35,7 +31,7 @@
</xml>

<xml name="adapter_sequence" token_read_number="1">
<param name="adapter_sequence@READ_NUMBER@" argument="--adapter_sequence@READ_NUMBER@" type="text" optional="true" label="Adapter sequence for input @READ_NUMBER@" help="The adapter for read@READ_NUMBER@. For SE data, if not specified, the adapter will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped.">
<param argument="--adapter_sequence@READ_NUMBER@" type="text" optional="true" label="Adapter sequence for input @READ_NUMBER@" help="The adapter for read@READ_NUMBER@. For SE data, if not specified, the adapter will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped.">
<sanitizer>
<valid>
<add value="A"/>
Expand All @@ -52,7 +48,7 @@
</xml>

<xml name="poly_g_min_len">
<param name="poly_g_min_len" argument="--poly_g_min_len" type="integer" optional="true" label="PolyG minimum length"
<param argument="--poly_g_min_len" type="integer" optional="true" label="PolyG minimum length"
help="The minimum length to detect polyG in the read tail. 10 by default."/>
</xml>
</macros>
6 changes: 1 addition & 5 deletions tools/fastp/test-data/bwa-mem-fastq2.fq
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,4 @@ CDCCCFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHGHHHHHHHHGHHHHHHGGGGGHGIHHHIH5DEGHHHF?FGHG
@M01368:8:000000000-A3GHV:1:1114:2404:13066/2
ATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGACCATACTTACTAAAGTGTGTTAATTAATTAATGCTTGTAGGACTGTCTCTTATACACATT
+
CCCCCFFFFFFCGGGGGGGGGGHHHHHHHHHFFHHHHGGGGGHFFFHHFHHHHHHHHHHHHHHHGFEGGGHGEDFCDFHGHFG@@DGGHHHHHHGGGGCGGGGGEHGGCGBB?CF99EGFGGFGG?D9CFFFF/BBFFFFFEF9BFFAFFFFEFFFFFFFFFFFFFFFFFFFFF.FFBBFFFFFFFFFFFF-9;;;BFFFFFB9BFBFBFABFFEFFFFFFFFFF::BFFBFFFF.9//;FFFFF/BFFB/
@M01368:8:000000000-A3GHV:1:1114:9184:6959/2
AAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCCTGTCTCTTA
+
CCCCBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHEHIHHGGGGHHHHHHHHHHHHHGHHHHHHHHGGGGGHHFHHHHHBGHHHHHHHHHHHHHHHHHGHHHHHGGGGGHHHHGHHHHHHHHHHHHHHHHHGHGHGHHGGGGCFFFFFFFFFFFFFFFFFFFFFFFFFF.CFFFFAF=D=EAEFFF0B:0AF-DAFBFFFFFFFFFBFFFFFFFFFFBFFFEFF9B900B0
CCCCCFFFFFFCGGGGGGGGGGHHHHHHHHHFFHHHHGGGGGHFFFHHFHHHHHHHHHHHHHHHGFEGGGHGEDFCDFHGHFG@@DGGHHHHHHGGGGCGGGGGEHGGCGBB?CF99EGFGGFGG?D9CFFFF/BBFFFFFEF9BFFAFFFFEFFFFFFFFFFFFFFFFFFFFF.FFBBFFFFFFFFFFFF-9;;;BFFFFFB9BFBFBFABFFEFFFFFFFFFF::BFFBFFFF.9//;FFFFF/BFFB/
Loading

0 comments on commit 61d055a

Please sign in to comment.