Skip to content

Commit

Permalink
Merge pull request #5491 from tuncK/genome
Browse files Browse the repository at this point in the history
Fixed broken NCBI datasets download tool
  • Loading branch information
bgruening authored Sep 21, 2023
2 parents 1383a5b + 795c9cc commit 390b817
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 27 deletions.
2 changes: 1 addition & 1 deletion tools/ncbi_datasets/datasets_gene.xml
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ dataformat
<param name="text_or_file" value="text"/>
<param name="accession" value="NM_000546.6 NM_000492.4"/>
</conditional>
<param name="ortholog" value="true"/>
<param name="ortholog" value="all"/>
</conditional>
<section name="file_choices">
<conditional name="kingdom_cond">
Expand Down
46 changes: 23 additions & 23 deletions tools/ncbi_datasets/datasets_genome.xml
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,6 @@ $filters.exclude_atypical
</collection>
</outputs>
<tests>
<!-- Note: All but one test use the non-default decompress="true"
this is because (at 11/22) Galaxy can not apply text assertions on the content
of compressed files https://github.com/galaxyproject/galaxy/pull/15085
So with decompress="true" more powerfull assertions are powerful.
A single test checks the default, ie decompress="false".
-->
<test expect_num_outputs="3">
<conditional name="query|subcommand">
<param name="download_by" value="taxon"/>
Expand All @@ -185,7 +177,7 @@ $filters.exclude_atypical
<output name="genome_data_report">
<assert_contents>
<has_text text="Assembly Accession&#009;Assembly Name&#009;Assembly Submitter&#009;Organism Name"/>
<has_n_lines n="144"/>
<has_n_lines n="142"/>
<has_n_columns n="4"/>
</assert_contents>
</output>
Expand All @@ -201,8 +193,7 @@ $filters.exclude_atypical
<assert_contents>
<has_n_lines min="1000000"/>
<has_line line="##gff-version 3"/>
<!-- TODO this will only work when the galaxy python packakes for 22.05 have been released
<has_n_columns n="9" comment="#"/> -->
<has_n_columns n="9" comment="#"/>
</assert_contents>
</element>
</output_collection>
Expand All @@ -222,22 +213,24 @@ $filters.exclude_atypical
<param name="include" value="genome"/>
<param name="decompress" value="true"/>
</section>
<output_collection name="genome_fasta" type="list:list" count="14">
<output_collection name="genome_fasta" type="list:list" count="12">
<expand macro="genome_fasta_assert" el1="GCA_000002115.2" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCA_000002125.2" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCA_000002135.3" el2="GCA_000002135.3_CRA_TCAGchr7v2" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCA_000212995.1" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCA_000252825.1" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCA_000306695.2" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCA_000365445.1" el2="chr21" expression=">"/>
<!-- TODO chromosomes argument (or data) seems not reliable https://github.com/ncbi/datasets/issues/188-->
<expand macro="genome_fasta_assert" el1="GCA_000442335.2" el2="GCA_000442335.2_LinearCen1.1_normalized" expression=">" expression_n="25"/>
<expand macro="genome_fasta_assert" el1="GCA_001292825.2" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCA_001524155.4" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCA_001712695.1" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCA_022833125.2" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/>
<!-- According to https://github.com/ncbi/datasets/issues/188, the following should not be included among the returned results anymore 09/2023 -->
<!--
<expand macro="genome_fasta_assert" el1="GCA_000442335.2" el2="GCA_000442335.2_LinearCen1.1_normalized" expression=">" expression_n="25"/>
<expand macro="genome_fasta_assert" el1="GCA_000002135.3" el2="GCA_000002135.3_CRA_TCAGchr7v2" expression=">"/>
-->
</output_collection>
<output name="genome_data_report">
<assert_contents>
Expand All @@ -246,8 +239,8 @@ $filters.exclude_atypical
</assert_contents>
</output>
</test>
<!-- same as previous test but assembly_source (refseq which removes some of the genomes) -->
<test expect_num_outputs="2">
<!-- same as previous test but assembly_source=refseq, which removes all of the genomes -->
<test expect_failure="true">
<conditional name="query|subcommand">
<param name="download_by" value="taxon"/>
<param name="taxon_positional" value="human"/>
Expand All @@ -260,6 +253,13 @@ $filters.exclude_atypical
<param name="include" value="genome"/>
<param name="decompress" value="true"/>
</section>
<assert_stderr>
<has_text text="No assemblies found that match selection"/>
</assert_stderr>
<!-- In the current state of the NCBI tool/DB, no output to check.
But the returned results seem to change from time to time and it might
be necessary to re-enable this code block if the test fails in the future. -->
<!--
<output_collection name="genome_fasta" type="list:list" count="2">
<expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/>
<expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/>
Expand All @@ -270,7 +270,7 @@ $filters.exclude_atypical
<has_n_lines n="5"/>
<has_n_columns n="4"/>
</assert_contents>
</output>
</output> -->
</test>
<test expect_num_outputs="4">
<conditional name="query|subcommand">
Expand Down Expand Up @@ -313,8 +313,8 @@ $filters.exclude_atypical
<element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/>
</output_collection>
<output_collection name="genomic_cds" type="list">
<element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.genomic.cds" compare="contains" decompress="true"/>
<element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.cds" compare="contains" decompress="true"/>
<element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.genomic.cds" compare="contains"/>
<element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.cds" compare="contains"/>
</output_collection>
</test>
<test expect_num_outputs="4">
Expand Down Expand Up @@ -349,7 +349,7 @@ $filters.exclude_atypical
</test>

<!-- should not fail https://github.com/ncbi/datasets/issues/194 -->
<test expect_num_outputs="2" expect_failure="true">
<test expect_num_outputs="2"> <!-- expect_failure="true"> -->
<conditional name="query|subcommand">
<param name="download_by" value="accession"/>
<conditional name="text_or_file">
Expand Down Expand Up @@ -421,14 +421,14 @@ $filters.exclude_atypical
<output_collection name="protein_fasta" type="list" count="1">
<element name="GCF_000146045.2" ftype="fasta.gz">
<assert_contents>
<has_size value="1844838"/>
<has_size value="1845038"/>
</assert_contents>
</element>
</output_collection>
<output_collection name="rna_fasta" type="list" count="1">
<element name="GCF_000146045.2" ftype="fasta.gz">
<assert_contents>
<has_size value="2784534"/>
<has_size value="2784899"/>
</assert_contents>
</element>
</output_collection>
Expand Down
6 changes: 3 additions & 3 deletions tools/ncbi_datasets/macros.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<macros>
<token name="@TOOL_VERSION@">14.6.0</token>
<token name="@TOOL_VERSION@">15.19.1</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">21.01</token>
<token name="@PROFILE@">22.05</token>
<token name="@LICENSE@">MIT</token>
<token name="@PROFILE_AND_LICENSE@">profile="@PROFILE@" license="@LICENSE@"</token>
<token name="@SETUP_CERTIFICATES@"><![CDATA[
Expand All @@ -11,7 +11,7 @@
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">ncbi-datasets-cli</requirement>
<requirement type="package" version="2022.9.24">ca-certificates</requirement>
<requirement type="package" version="2023.7.22">ca-certificates</requirement>
<requirement type="package" version="16.02">p7zip</requirement>
</requirements>
</xml>
Expand Down

0 comments on commit 390b817

Please sign in to comment.