Skip to content

Commit

Permalink
Adds tests and usage information
Browse files Browse the repository at this point in the history
  • Loading branch information
pmelsted committed Apr 12, 2017
1 parent faa694f commit fd6e463
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 8 deletions.
4 changes: 2 additions & 2 deletions common.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include <string>

#define PIZZLY_VERSION "0.37.0"
#define PIZZLY_VERSION "0.37.1"

struct ProgramOptions {
std::string gtf;
Expand All @@ -15,7 +15,7 @@ struct ProgramOptions {
int alignScore;
int insertSize;
int kmerScore;
ProgramOptions() : kmerScore(2) {}
ProgramOptions() : kmerScore(2), insertSize(400) {}
};

#endif // COMMON_H
11 changes: 5 additions & 6 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,29 @@ parseCommandLine(ProgramOptions & options, int argc, char const ** argv) {

// Define Options
seqan::addOption(parser, seqan::ArgParseOption(
"k", "", "k-mer size",
"k", "", "k-mer size used in kallisto",
seqan::ArgParseArgument::INTEGER, "K"));
seqan::addOption(parser, seqan::ArgParseOption(
"a", "align-score", "Alignment cutoff",
"a", "align-score", "Maximum number of mismatches allowed (default: 2)",
seqan::ArgParseArgument::INTEGER, "ALIGN_SCORE"));
seqan::addOption(parser, seqan::ArgParseOption(
"i", "insert-size", "Maximum size of fragment",
"i", "insert-size", "Maximum fragment size of library (default: 400)",
seqan::ArgParseArgument::INTEGER, "INSERT_SIZE"));
seqan::addOption(parser, seqan::ArgParseOption(
"o", "output", "Prefix for output files",
seqan::ArgParseArgument::STRING, "OUTPUT_PREFIX"));
seqan::addOption(parser, seqan::ArgParseOption(
"G", "gtf", "Annotation",
"G", "gtf", "Annotation in GTF format",
seqan::ArgParseArgument::STRING, "GTF"));
seqan::addOption(parser, seqan::ArgParseOption(
"C", "cache", "Annotation",
"C", "cache", "File for caching annotation (created if not present, otherwise reused from previous runs)",
seqan::ArgParseArgument::STRING, "cache"));
seqan::addOption(parser, seqan::ArgParseOption(
"F", "fasta", "Fasta reference",
seqan::ArgParseArgument::STRING, "FASTA"));

seqan::setRequired(parser, "k");
seqan::setRequired(parser, "o");
seqan::setRequired(parser, "i");
seqan::setRequired(parser, "F");
seqan::setVersion(parser, std::string(PIZZLY_VERSION));

Expand Down
5 changes: 5 additions & 0 deletions test/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
kallisto_out
pizzly_*
*.kidx
cache*
.snakemake
16 changes: 16 additions & 0 deletions test/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# test data set

Here you will find a small test data set to ensure that kallisto and pizzly are working properly. The contents are:

- `transcripts.fasta.gz`: a gzip compressed transcriptome
- `transcripts.gtf.gz` : a small subset of the GTF file for Ensembl version 85 corresponding to the transcripts provided
- `reads_X.fastq.gz`: gzip compressed "left" and "right" reads
- `Snakefile`: a sample [`snakemake`](https://bitbucket.org/johanneskoester/snakemake/wiki/Home) file (not required for running kallisto or pizzly)

Running `snakemake` will go through the following pipeline

1. Creates the index for use with `kallisto`
2. Runs `kallisto` with `--fusion` to identify potential fusions
3. Runs `pizzly` on the `kallisto` output to identify potential fusions
4. Creates a new index based on the transcriptome and the fusion transcripts identified by `pizzly`
5. Runs `kallisto` in normal quantification mode on the expanded index to quantify both normal transcripts and fusions.
83 changes: 83 additions & 0 deletions test/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os

PRE = "transcripts"
FASTA = "{0}.fasta.gz".format(PRE)
GTF = "{0}.gtf.gz".format(PRE)
INDEX = "{0}.kidx".format(PRE)
K = 31


ZCAT = 'gzcat' if os.uname()[0] == 'Darwin' else 'zcat'

rule all:
input:
"kallisto_out/abundance.h5",
"pizzly_out/output.json",
"pizzly_post/abundance.h5"

rule index:
input: FASTA
output: INDEX
shell:
"kallisto index -k {K} -i {output} {input}"

rule kallisto_quant:
input:
"reads_1.fastq.gz",
"reads_2.fastq.gz",
INDEX
output:
"kallisto_out",
"kallisto_out/abundance.h5",
"kallisto_out/abundance.tsv",
"kallisto_out/run_info.json",
"kallisto_out/fusion.txt"
shell:
"kallisto quant "
"-i {INDEX} "
"-o {output[0]} "
"--fusion "
"{input[0]} {input[1]}"

rule pizzly:
input:
FASTA,
"kallisto_out/fusion.txt"
output:
"pizzly_out/output.json",
"pizzly_out/output.fusions.fasta"
shell:
"../build/pizzly "
"-k {K} "
"--gtf {GTF} "
"--cache cache.txt "
"--align-score 2 "
"--insert-size 400 "
"--fasta {FASTA} "
"--output pizzly_out/output "
"kallisto_out/fusion.txt "

rule append_index:
input:
FASTA,
"pizzly_out/output.fusions.fasta"
output:
"pizzly_post/transcripts_with_fusions.fasta.gz",
"pizzly_post/transcripts_with_fusions.kidx"
shell:
"cat <({ZCAT} {FASTA}) {input[1]} | gzip - > {output[0]} && "
"kallisto index -k {K} -i {output[1]} {output[0]}"

rule requant_kallisto:
input:
"reads_1.fastq.gz",
"reads_2.fastq.gz",
"pizzly_post/transcripts_with_fusions.kidx"
output:
"pizzly_post",
"pizzly_post/abundance.h5"
shell:
"kallisto quant "
"-i {input[2]} "
"-o {output[0]} "
"{input[0]} {input[1]}"
Binary file added test/reads_1.fastq.gz
Binary file not shown.
Binary file added test/reads_2.fastq.gz
Binary file not shown.
Binary file added test/transcripts.fasta.gz
Binary file not shown.
Binary file added test/transcripts.gtf.gz
Binary file not shown.

0 comments on commit fd6e463

Please sign in to comment.