Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check InterProScan seqtype #5891

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 70 additions & 2 deletions tools/interproscan/interproscan.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,38 @@ sed 's|^\(data.directory=\).*$|\1${database.fields.path}/data|' \$(dirname \$(re
export _JAVA_OPTIONS=-Duser.home=\$HOME
&&



#if $check_seqtype
## Guess the seqtype. If we find characters that aren't IUPAC nucleotide
## symbols, we think it's protein.
#set iupac_nt="[^-ABCDGHIKMNRSTUVWY]"
match=\$(grep -v "^>" '$input' | grep -m 1 -iE '$iupac_nt' | head -n 1)
&&
if grep -q '[^[:space:]]' <<< \${match}; then
detected_seqtype="p" ;
match_seqtype="protein" ;
mismatch_seqtype="DNA / RNA" ;
else
detected_seqtype="n" ;
match_seqtype="DNA / RNA" ;
mismatch_seqtype="protein" ;
fi;

if [ \$detected_seqtype != '$seqtype' ]; then
printf '%s'
'You selected '
\${mismatch_seqtype}
', but Galaxy detected '
\${match_seqtype}
' in the FASTA file. '
'If you are sure you want to do this, disable the '
'"Check input sequences" option and submit your job again.'
1>&2 ;
exit 1 ;
fi;
#end if

## Now run interproscan
interproscan.sh

Expand All @@ -45,13 +77,15 @@ $iprlookup
--output-file-base 'output'
]]></command>
<inputs>
<param argument="--input" type="data" format="fasta" label="Protein FASTA File"/>
<param argument="--input" type="data" format="fasta" label="FASTA File"/>

<param argument="--seqtype" type="select" label="Type of the input sequences" help="">
<option value="p" selected="true">Protein</option>
<option value="n">DNA / RNA</option>
</param>

<param name="check_seqtype" type="boolean" checked="true" label="Check input sequences" help="Should Galaxy try to check if the FASTA file matches the selected type?" />

<param name="database" label="InterProScan database" type="select">
<options from_data_table="interproscan">
<column name="value" index="0" />
Expand Down Expand Up @@ -220,7 +254,7 @@ $iprlookup
</assert_contents>
</output>
</test>
<test expect_failure="true" expect_num_outputs="1">
<test expect_failure="true">
<param name="input" value="prots.fa" />
<param name="seqtype" value="p" />
<param name="database" value="@TOOL_VERSION@" />
Expand All @@ -236,6 +270,40 @@ $iprlookup
<has_text text="Analysis TMHMM does not exist or is deactivated" />
</assert_stdout>
</test>
<!-- protein used but nucleotide selected -->
<test expect_failure="true">
<param name="input" value="prots.fa" />
<param name="seqtype" value="n" />
<param name="database" value="@TOOL_VERSION@" />
<param name="applications" value="MobiDBLite" />
<param name="oformat" value="TSV" />
<assert_stderr>
<has_text text="You selected DNA/RNA, but Galaxy detected protein"/>
</assert_stderr>
</test>
<!-- nucleotide used but protein selected -->
<test expect_failure="true">
<param name="input" value="transcripts.fa" />
<param name="seqtype" value="p" />
<param name="database" value="@TOOL_VERSION@" />
<param name="applications" value="MobiDBLite" />
<param name="oformat" value="TSV" />
<assert_stderr>
<has_text text="You selected protein, but Galaxy detected DNA/RNA"/>
</assert_stderr>
</test>
<!-- protein used, nucleotide selected, check disabled -->
<test expect_failure="true">
<param name="input" value="prots.fa" />
<param name="seqtype" value="n" />
<param name="check_seqtype" value="false" />
<param name="database" value="@TOOL_VERSION@" />
<param name="applications" value="MobiDBLite" />
<param name="oformat" value="TSV" />
<assert_stderr>
<has_text text="not a nucleotide sequence"/>
</assert_stderr>
</test>
</tests>

<help><![CDATA[
Expand Down
2 changes: 1 addition & 1 deletion tools/interproscan/macros.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
The version should also be bumped in test-data/interproscan.loc
-->
<token name="@TOOL_VERSION@">5.59-91.0</token>
<token name="@VERSION_SUFFIX@">3</token>
<token name="@VERSION_SUFFIX@">4</token>

<xml name="citations">
<citations>
Expand Down
Loading