Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PsiCLASS #5469

Merged
merged 13 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions tools/psiclass/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
categories:
- Transcriptomics
description: PsiCLASS is a reference-based transcriptome assembler for single or multiple RNA-seq samples.
homepage_url: https://github.com/splicebox/PsiCLASS
long_description: |
PsiCLASS is a reference-based transcriptome assembler for single or multiple
RNA-seq samples. Unlik e conventional methods that analyze each sample separately
and then merge the outcomes to create a unified set of meta-annotations, PsiCLASS
takes a multi-sample approach, simultaneously analyzing all RNA-seq data sets in
an experiment.
name: psiclass
owner: iuc
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/psiclass
type: unrestricted
17 changes: 17 additions & 0 deletions tools/psiclass/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<macros>
<token name="@TOOL_VERSION@">1.0.3</token>
<token name="@VERSION_SUFFIX@">0</token>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">psiclass</requirement>
</requirements>
</xml>
<xml name="citations">
<citations>
<citation type="doi">10.1038/s41467-019-12990-0</citation>
</citations>
</xml>
<xml name="version_command">
<version_command>psiclass --version</version_command>
</xml>
</macros>
151 changes: 151 additions & 0 deletions tools/psiclass/psiclass.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
<tool id="psiclass" name="PsiCLASS" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
<description>reference-based transcriptome assembler</description>
<macros>
<import>macros.xml</import>
</macros>
<xrefs>
<xref type="bio.tools">psiclass</xref>
</xrefs>
<expand macro="requirements" />
<command detect_errors='exit_code'><![CDATA[
mkdir -p "annotation_files" &&
#set $bam_filenames = []
#for $i,$file in enumerate($bam_files)
#set $name = 'bam_' + str($i) + '.bam'
ln -s '${file}' $name &&
$bam_filenames.append($name)
#end for
#set $allFiles = ','.join( [ str( $file ) for $file in $bam_filenames ] )
psiclass
-b $allFiles
-p \${GALAXY_SLOTS:-8}
#if $splice_conditional.selector == 'true'
-s '${splice_conditional.splice_file}'
#end if
-c $subexonClassifier
--sa $intronCoverage
--vd $transcriptCoverage
#if $stranded
--stranded $stranded
#end if
--maxDpConstraintSize $maxDpConstraintSize
$primaryParalog
--tssTesQuantile $tssTesQuantile
&& mv *sample* "./annotation_files"
#if $splice_conditional.selector == 'false'
&& cat ./splice/psiclass_bam.trusted_splice > '${splice_sites}'
#end if
]]></command>
<inputs>
<param argument="-b" name="bam_files" type="data" format="bam" multiple="true" label="BAM file(s)"
help="PsiCLASS has been tuned to run on alignments generated with the tools HISAT and STAR" />
<conditional name="splice_conditional">
<param name="selector" type="select" label="Provide trusted splice sites coordinates" help="It is possible to supply
an set of trusted introns, for instance generated by RNASTAR or extracted from the GENCODE gene annotations">
<option value="true">Enabled</option>
<option value="false" selected="true">Disabled</option>
</param>
<when value="true">
<param argument="-s" name="splice_file" type="data" format="interval" optional="true" label="Splice junction sites file" help="High confidence collapsed splice junction file" />
</when>
<when value="false"/>
</conditional>
<param argument="-c" type="float" name="subexonClassifier" min="0" max="1" value="0.05" label="Subexon classifier score threshold"
help="Only use the subexons with classifier score minor or equal than the given number" />
<param argument="--sa" name="intronCoverage" type="float" min="0" value="0.5" label="Minimum retained intron converage"
help="Minimum average number of supported read for retained introns" />
<param argument="--vd" name="transcriptCoverage" type="float" min="0" value="1" label="Minimum transcript coverage"
help="Minimum average coverage depth of a transcript to be reported" />
<param argument="--stranded" type="select" optional="true" label="Library strand information" help="Stranded data shows advantages over
non-stranded RNA-Seq data such as higher assembly and differential expression accuracy">
<option value="un">unstranded</option>
<option value="rf">fr-firststrand (rf): first read from the opposite strand.</option>
<option value="fr">fr-secondstrand (fr): first read from the transcript strand</option>
</param>
<param argument="--maxDpConstraintSize" type="integer" min="-1" value="7" label="Constrain cover iN median exon depth (DP)" help="-1 for infinite"/>
<param argument="--primaryParalog" type="boolean" truevalue="--primaryParalog" falsevalue="" checked="false" optional="true"
label="Use primary alignment to retain paralog genes" help="Default: use unique alignments" />
<param argument="--tssTesQuantile" type="float" min="0" max="1" value="0.5" label="Quantile for transcription start/end sites in subexon graph"/>
</inputs>
<outputs>
<data name="meta_anotation" format="gtf" from_work_dir="psiclass_vote.gtf" label="${tool.name} on ${on_string}: meta-annotation" />
<collection name="annotation_collection" type="list" label="${tool.name} on ${on_string}: per-sample annotations">
<discover_datasets pattern="__designation_and_ext__" format="gtf" directory="annotation_files"/>
</collection>
<data name="splice_sites" format="interval" label="${tool.name} on ${on_string}: splice sites">
<filter>splice_conditional["selector"] == "false"</filter>
</data>
</outputs>
<tests>
<test expect_num_outputs="2">
<param name="bam_files" value="reads1.bam,reads2.bam"/>
<param name="subexonClassifier" value="0.05"/>
<param name="intronCoverage" value="0.5"/>
<param name="transcriptCoverage" value="1"/>
<param name="stranded" value="un"/>
<param name="maxDpConstraintSize" value="7"/>
<param name="primaryParalog" value="false"/>
<param name="tssTesQuantile" value="0.5"/>
<conditional name="splice_conditional">
<param name="selector" value="true"/>
<param name="splice_file" value="splice_sites.interval"/>
</conditional>
<output name="meta_anotation" ftype="gtf" file="test01_meta_annotation.gtf" lines_diff="2"/>
<output_collection name="annotation_collection" type="list" count="2">
<element name="psiclass_sample_0" ftype="gtf" file="test01_annotation_sample0.gtf" lines_diff="2"/>
<element name="psiclass_sample_1" ftype="gtf" file="test01_annotation_sample1.gtf" lines_diff="2"/>
</output_collection>
</test>
<test expect_num_outputs="3">
<param name="bam_files" value="reads1.bam,reads2.bam"/>
<param name="subexonClassifier" value="0.05"/>
<param name="intronCoverage" value="0.3"/>
<param name="transcriptCoverage" value="0.5"/>
<param name="stranded" value="rf"/>
<param name="maxDpConstraintSize" value="6"/>
<param name="primaryParalog" value="true"/>
<param name="tssTesQuantile" value="0.4"/>
<conditional name="splice_conditional">
<param name="selector" value="false"/>
</conditional>
<output name="meta_anotation" ftype="gtf" file="test02_meta_annotation.gtf"/>
<output_collection name="annotation_collection" type="list" count="2">
<element name="psiclass_sample_0" ftype="gtf" file="test02_annotation_sample0.gtf" lines_diff="2"/>
<element name="psiclass_sample_1" ftype="gtf" file="test02_annotation_sample1.gtf" lines_diff="2"/>
</output_collection>
<output name="splice_sites" ftype="interval" file="test02_splice_sites.txt"/>
</test>
<test expect_num_outputs="3">
<param name="bam_files" value="reads1.bam"/>
<output name="meta_anotation" ftype="gtf" file="test03_meta_annotation.gtf"/>
<output_collection name="annotation_collection" type="list" count="1">
<element name="psiclass_sample_0" ftype="gtf" file="test03_annotation_sample0.gtf" lines_diff="2"/>
</output_collection>
<output name="splice_sites" file="test03_splice_sites.txt" ftype="interval"/>
</test>
</tests>
<help><![CDATA[

.. class:: infomark

**What is PsiCLASS?**

PsiCLASS is a reference-based transcriptome assembler for single or multiple RNA-seq samples.

Unlike conventional methods that analyze each sample separately and then merge the outcomes to create a unified set of meta-annotations,
PsiCLASS takes a multi-sample approach, simultaneously analyzing all RNA-seq data sets in an experiment.

PsiCLASS is both a transcript assembler and a meta-assembler, producing separate transcript sets for the individual samples and a unified
set of meta-annotations. The algorithmic underpinnings of PsiCLASS include using a global subexon splice graph, statistical cross-sample
feature (intron, subexon) selection methods, and an efficient dynamic programming algorithm to select a subset of transcripts from among
those encoded in the graph, based on the read support in each sample.

Lastly, the set of meta-annotations is selected from among the transcripts generated for individual samples by voting. While PsiCLASS is
highly accurate and efficient for medium-to-large collections of RNA-seq data, its accuracy is equally high for small RNA-seq data sets
(2-10 samples) and is competitive to reference methods for single samples. Additionally, its performance is robust with the aggregation
method used, including the built-in voting and assembly-based approaches such as StringTie-merge and TACO. Therefore, it can be effectively
used as a multi-sample and as a single-sample assembler, as well as in conventional assemble-and-merge protocols.

]]></help>
<expand macro="citations" />
</tool>
Binary file added tools/psiclass/test-data/reads1.bam
Binary file not shown.
Binary file added tools/psiclass/test-data/reads2.bam
Binary file not shown.
2 changes: 2 additions & 0 deletions tools/psiclass/test-data/splice_sites.interval
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
test_chromosome 251 350 1 1 0 27 0 37
test_chromosome 401 500 1 1 0 25 0 36
15 changes: 15 additions & 0 deletions tools/psiclass/test-data/test01_annotation_sample0.gtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#PsiCLASS_v1.0.3
#/home/laptop/miniconda3/envs/mulled-v1-4549fe7ad5b22a1b35ab5aa0ada6322e374f13375e517a18654c3ddd9bb8d124/bin/classes -p 1 -c 0.05 --stranded un --maxDpConstraintSize 7 -b bam_0.bam -b bam_1.bam -s ./subexon/psiclass_subexon_combined.out -o ./psiclass
#bam_0.bam
chr16 PsiCLASS transcript 784996 786471 1000 . . gene_id "chr16.1"; transcript_id "chr16.1.0"; FPKM "90057.137790"; TPM "395522.152415"; cov "33.028455";
chr16 PsiCLASS exon 784996 786471 1000 . . gene_id "chr16.1"; transcript_id "chr16.1.0"; exon_number "1"; FPKM "90057.137790"; TPM "395522.152415"; cov "33.028455";
chr16 PsiCLASS transcript 787203 787744 1000 . . gene_id "chr16.7"; transcript_id "chr16.7.0"; FPKM "14337.566689"; TPM "62969.192406"; cov "5.258303";
chr16 PsiCLASS exon 787203 787744 1000 . . gene_id "chr16.7"; transcript_id "chr16.7.0"; exon_number "1"; FPKM "14337.566689"; TPM "62969.192406"; cov "5.258303";
chr16 PsiCLASS transcript 790525 792347 1000 . . gene_id "chr16.17"; transcript_id "chr16.17.0"; FPKM "19967.535928"; TPM "87695.467370"; cov "7.323094";
chr16 PsiCLASS exon 790525 792347 1000 . . gene_id "chr16.17"; transcript_id "chr16.17.0"; exon_number "1"; FPKM "19967.535928"; TPM "87695.467370"; cov "7.323094";
chr16 PsiCLASS transcript 794054 794361 1000 . . gene_id "chr16.25"; transcript_id "chr16.25.0"; FPKM "10623.323507"; TPM "46656.599158"; cov "3.896104";
chr16 PsiCLASS exon 794054 794361 1000 . . gene_id "chr16.25"; transcript_id "chr16.25.0"; exon_number "1"; FPKM "10623.323507"; TPM "46656.599158"; cov "3.896104";
chr16 PsiCLASS transcript 795132 795356 1000 . . gene_id "chr16.27"; transcript_id "chr16.27.0"; FPKM "30902.067712"; TPM "135718.862884"; cov "11.333333";
chr16 PsiCLASS exon 795132 795356 1000 . . gene_id "chr16.27"; transcript_id "chr16.27.0"; exon_number "1"; FPKM "30902.067712"; TPM "135718.862884"; cov "11.333333";
chr16 PsiCLASS transcript 797839 798063 1000 . . gene_id "chr16.39"; transcript_id "chr16.39.0"; FPKM "61804.135424"; TPM "271437.725768"; cov "22.666667";
chr16 PsiCLASS exon 797839 798063 1000 . . gene_id "chr16.39"; transcript_id "chr16.39.0"; exon_number "1"; FPKM "61804.135424"; TPM "271437.725768"; cov "22.666667";
3 changes: 3 additions & 0 deletions tools/psiclass/test-data/test01_annotation_sample1.gtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#PsiCLASS_v1.0.3
#/home/laptop/miniconda3/envs/mulled-v1-4549fe7ad5b22a1b35ab5aa0ada6322e374f13375e517a18654c3ddd9bb8d124/bin/classes -p 1 -c 0.05 --stranded un --maxDpConstraintSize 7 -b bam_0.bam -b bam_1.bam -s ./subexon/psiclass_subexon_combined.out -o ./psiclass
#bam_1.bam
12 changes: 12 additions & 0 deletions tools/psiclass/test-data/test01_meta_annotation.gtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
chr16 PsiCLASS transcript 784996 786471 1000 . . gene_id "chr16.1"; transcript_id "chr16.1.0"; FPKM "90057.137790"; TPM "395522.152415"; cov "33.028455"; sample_cnt "1";
chr16 PsiCLASS exon 784996 786471 1000 . . gene_id "chr16.1"; transcript_id "chr16.1.0"; exon_number "1"; FPKM "90057.137790"; TPM "395522.152415"; cov "33.028455"; sample_cnt "1";
chr16 PsiCLASS transcript 787203 787744 1000 . . gene_id "chr16.7"; transcript_id "chr16.7.0"; FPKM "14337.566689"; TPM "62969.192406"; cov "5.258303"; sample_cnt "1";
chr16 PsiCLASS exon 787203 787744 1000 . . gene_id "chr16.7"; transcript_id "chr16.7.0"; exon_number "1"; FPKM "14337.566689"; TPM "62969.192406"; cov "5.258303"; sample_cnt "1";
chr16 PsiCLASS transcript 790525 792347 1000 . . gene_id "chr16.17"; transcript_id "chr16.17.0"; FPKM "19967.535928"; TPM "87695.467370"; cov "7.323094"; sample_cnt "1";
chr16 PsiCLASS exon 790525 792347 1000 . . gene_id "chr16.17"; transcript_id "chr16.17.0"; exon_number "1"; FPKM "19967.535928"; TPM "87695.467370"; cov "7.323094"; sample_cnt "1";
chr16 PsiCLASS transcript 794054 794361 1000 . . gene_id "chr16.25"; transcript_id "chr16.25.0"; FPKM "10623.323507"; TPM "46656.599158"; cov "3.896104"; sample_cnt "1";
chr16 PsiCLASS exon 794054 794361 1000 . . gene_id "chr16.25"; transcript_id "chr16.25.0"; exon_number "1"; FPKM "10623.323507"; TPM "46656.599158"; cov "3.896104"; sample_cnt "1";
chr16 PsiCLASS transcript 795132 795356 1000 . . gene_id "chr16.27"; transcript_id "chr16.27.0"; FPKM "30902.067712"; TPM "135718.862884"; cov "11.333333"; sample_cnt "1";
chr16 PsiCLASS exon 795132 795356 1000 . . gene_id "chr16.27"; transcript_id "chr16.27.0"; exon_number "1"; FPKM "30902.067712"; TPM "135718.862884"; cov "11.333333"; sample_cnt "1";
chr16 PsiCLASS transcript 797839 798063 1000 . . gene_id "chr16.39"; transcript_id "chr16.39.0"; FPKM "61804.135424"; TPM "271437.725768"; cov "22.666667"; sample_cnt "1";
chr16 PsiCLASS exon 797839 798063 1000 . . gene_id "chr16.39"; transcript_id "chr16.39.0"; exon_number "1"; FPKM "61804.135424"; TPM "271437.725768"; cov "22.666667"; sample_cnt "1";
68 changes: 68 additions & 0 deletions tools/psiclass/test-data/test01_splice_sites.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
chr16 786377 786827 10 - 10 0 0 0
chr16 786928 787068 10 - 10 0 0 0
chr16 786928 787077 10 - 10 0 0 0
chr16 787179 787354 10 - 10 0 0 0
chr16 787477 787556 10 - 10 0 0 0
chr16 787744 788256 10 - 10 0 0 0
chr16 789125 789210 10 + 10 0 0 0
chr16 789360 789547 10 + 10 0 0 0
chr16 789715 790177 10 + 10 0 0 0
chr16 790269 790347 10 + 10 0 0 0
chr16 790399 790525 10 + 10 0 0 0
chr16 790666 791161 10 + 10 0 0 0
chr16 791370 791851 10 + 10 0 0 0
chr16 791948 792224 10 + 10 0 0 0
chr16 792347 792439 10 + 10 0 0 0
chr16 792590 792718 10 + 10 0 0 0
chr16 792811 792966 10 + 10 0 0 0
chr16 793064 793144 10 + 10 0 0 0
chr16 793274 794054 10 + 10 0 0 0
chr16 794201 795132 10 + 10 0 0 0
chr16 795356 795685 10 + 10 0 0 0
chr16 795834 795947 10 + 10 0 0 0
chr16 796077 796717 10 + 10 0 0 0
chr16 796861 796961 10 + 10 0 0 0
chr16 797092 797694 10 + 10 0 0 0
chr16 797751 797839 10 + 10 0 0 0
chr16 854706 868944 10 - 10 0 0 0
chr16 869056 869883 10 - 10 0 0 0
chr16 870066 870729 10 - 10 0 0 0
chr16 870882 871161 10 - 10 0 0 0
chr16 871341 879570 10 - 10 0 0 0
chr16 893072 910931 10 - 10 0 0 0
chr16 954666 970788 10 - 10 0 0 0
chr19 567648 571440 10 - 10 0 0 0
chr19 571579 580379 10 - 10 0 0 0
chr19 572701 577774 10 - 10 0 0 0
chr19 578121 579500 10 - 10 0 0 0
chr19 579656 580379 10 - 10 0 0 0
chr19 580461 580646 10 - 10 0 0 0
chr19 580782 581315 10 - 10 0 0 0
chr19 581610 582514 10 - 10 0 0 0
chr19 582582 582750 10 - 10 0 0 0
chr19 590577 603544 10 - 10 0 0 0
chr19 603967 605061 10 - 10 0 0 0
chr19 605222 607964 10 - 10 0 0 0
chr19 608182 610259 10 - 10 0 0 0
chr19 610405 613248 10 - 10 0 0 0
chr19 613488 613852 10 - 10 0 0 0
chr19 614016 615795 10 - 10 0 0 0
chr19 617323 617419 10 - 10 0 0 0
chr19 617480 617570 10 - 10 0 0 0
chr19 617655 617777 10 - 10 0 0 0
chr19 617849 618488 10 - 10 0 0 0
chr19 618586 618705 10 - 10 0 0 0
chr19 618760 618997 10 - 10 0 0 0
chr19 619110 619210 10 - 10 0 0 0
chr19 619765 619958 10 - 10 0 0 0
chr19 620080 620365 10 - 10 0 0 0
chr19 620487 621058 10 - 10 0 0 0
chr19 621846 622149 10 - 10 0 0 0
chr19 622373 622582 10 - 10 0 0 0
chr19 622752 622821 10 - 10 0 0 0
chr19 622985 623454 10 - 10 0 0 0
chr19 623603 624719 10 - 10 0 0 0
chr19 624905 625124 10 - 10 0 0 0
chr19 625254 629540 10 - 10 0 0 0
chr19 630168 632834 10 - 10 0 0 0
chr19 632938 633425 10 - 10 0 0 0
Loading