-
Notifications
You must be signed in to change notification settings - Fork 441
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6615 from bernt-matthias/dada2-primer-check
dada2: add tool to check for primers
- Loading branch information
Showing
3 changed files
with
167 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
<tool id="dada2_primerCheck" name="dada2: primer check" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09"> | ||
<description></description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="bio_tools"/> | ||
<expand macro="requirements"/> | ||
<expand macro="stdio"/> | ||
<expand macro="version_command"/> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
Rscript '$dada2_script' | ||
]]></command> | ||
<configfiles> | ||
<configfile name="dada2_script"><![CDATA[ | ||
#import re | ||
library(Biostrings, quietly=T) | ||
library(ShortRead, quietly=T) | ||
FWD <- "$forward_primer" | ||
REV <- "$reverse_primer" | ||
allOrients <- function(primer) { | ||
# Create all orientations of the input sequence | ||
dna <- DNAString(primer) # The Biostrings works w/ DNAString objects rather than character vectors | ||
orients <- c(Forward = dna, Complement = Biostrings::complement(dna), Reverse = Biostrings::reverse(dna), RevComp = Biostrings::reverseComplement(dna)) | ||
return(sapply(orients, toString)) # Convert back to character vector | ||
} | ||
FWD.orients <- allOrients(FWD) | ||
REV.orients <- allOrients(REV) | ||
primerHits <- function(primer, fn) { | ||
## Counts number of reads in which the primer is found | ||
nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE) | ||
return(sum(nhits > 0)) | ||
} | ||
df <- NULL; | ||
#for $i, $read in enumerate($paired_cond.reads): | ||
#set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier)) | ||
#if $paired_cond.paired_select == "single" | ||
#set fwd_reads = $read | ||
#elif $paired_cond.paired_select == "separate" | ||
#set fwd_reads = $read | ||
#set rev_reads = $paired_cond.sdaer[i] | ||
#else | ||
#set fwd_reads = $read.forward | ||
#set rev_reads = $read.reverse | ||
#end if | ||
df <- rbind(df, c('$elid', 'FWD', 'FWD', sapply(FWD.orients, primerHits, fn = '$fwd_reads'))) | ||
df <- rbind(df, c('$elid', 'REV', 'FWD', sapply(REV.orients, primerHits, fn = '$fwd_reads'))) | ||
#if $paired_cond.paired_select != "single" | ||
#if $paired_cond.paired_select == "separate" | ||
#set elid = re.sub('[^\w\-\.]', '_', str($paired_cond.sdaer[i].element_identifier)) | ||
#end if | ||
df <- rbind(df, c('$elid', 'FWD', 'REV', sapply(FWD.orients, primerHits, fn = '$rev_reads'))) | ||
df <- rbind(df, c('$elid', 'REV', 'REV', sapply(REV.orients, primerHits, fn = '$rev_reads'))) | ||
#end if | ||
#end for | ||
colnames(df) <- c('Sample', 'Primer', 'ReadDir', 'Sequence', 'Complement', 'Reverse', 'RevComp') | ||
write.table(df, "$out", quote=F, sep="\t", row.names = F, col.names = T) | ||
]]></configfile> | ||
</configfiles> | ||
<inputs> | ||
<expand macro="fastq_input" multiple="True" collection_type="list:paired" argument_fwd="fl" argument_rev="fl"/> | ||
<param name="forward_primer" type="text" label="Forward primer sequence"> | ||
<validator type="empty_field" message="You need to specify a forward primer sequence"/> | ||
</param> | ||
<param name="reverse_primer" type="text" label="Reverse primer sequence"> | ||
<validator type="empty_field" message="You need to specify a reverse primer sequence"/> | ||
</param> | ||
</inputs> | ||
<outputs> | ||
<data name="out" format="tabular"/> | ||
</outputs> | ||
<tests> | ||
<!-- paired data in paired collection --> | ||
<test expect_num_outputs="1"> | ||
<conditional name="paired_cond"> | ||
<param name="paired_select" value="paired"/> | ||
<param name="reads"> | ||
<collection type="list:paired"> | ||
<element name="F3D0_S188_L001"> | ||
<collection type="paired"> | ||
<element name="forward" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> | ||
<element name="reverse" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> | ||
</collection> | ||
</element> | ||
<element name="F3D141_S207_L001"> | ||
<collection type="paired"> | ||
<element name="forward" value="F3D141_S207_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> | ||
<element name="reverse" value="F3D141_S207_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> | ||
</collection> | ||
</element> | ||
</collection> | ||
</param> | ||
</conditional> | ||
|
||
<param name="forward_primer" value="ACCTGCGGARGGATCA"/> | ||
<param name="reverse_primer" value="GAGATCCRTTGYTRAAAGTT"/> | ||
<output name="out"> | ||
<assert_contents> | ||
<has_n_lines n="9"/> | ||
<has_n_columns n="7"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<!-- paired data in separate collection --> | ||
<test expect_num_outputs="1"> | ||
<conditional name="paired_cond"> | ||
<param name="paired_select" value="separate"/> | ||
<param name="reads" value="F3D0_S188_L001_R1_001.fastq.gz,F3D141_S207_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> | ||
<param name="sdaer" value="F3D0_S188_L001_R2_001.fastq.gz,F3D141_S207_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> | ||
</conditional> | ||
|
||
<param name="forward_primer" value="ACCTGCGGARGGATCA"/> | ||
<param name="reverse_primer" value="GAGATCCRTTGYTRAAAGTT"/> | ||
<output name="out"> | ||
<assert_contents> | ||
<has_n_lines n="9"/> | ||
<has_n_columns n="7"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<!-- single end data --> | ||
<test expect_num_outputs="1"> | ||
<conditional name="paired_cond"> | ||
<param name="paired_select" value="single"/> | ||
<param name="reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> | ||
</conditional> | ||
<param name="forward_primer" value="ACCTGCGGARGGATCA"/> | ||
<param name="reverse_primer" value="GAGATCCRTTGYTRAAAGTT"/> | ||
<output name="out"> | ||
<assert_contents> | ||
<has_n_lines n="3"/> | ||
<has_n_columns n="7"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
|
||
<help><![CDATA[ | ||
Description | ||
........... | ||
Simple check for primer sequences in sequencing data. The tool counts the number | ||
of occurrences of the primer sequence, its complement, the reverse and the | ||
reverse complement. | ||
See also: https://benjjneb.github.io/dada2/ITS_workflow.html#identify-primers | ||
Usage | ||
..... | ||
**Input** FASTQ datasets and forward and reverse primers | ||
**Output** a table listing the counts of the different occurrences in the read files. | ||
@HELP_OVERVIEW@ | ||
]]></help> | ||
<expand macro="citations"/> | ||
</tool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters