Skip to content

Commit

Permalink
New feature, query MySQL database directly
Browse files Browse the repository at this point in the history
Tool to replace perl tool and query Ensembl MySQL database directly.
Initial commit, basic usage
  • Loading branch information
murphycj committed Feb 22, 2017
1 parent 5b3dbb9 commit ab5f003
Show file tree
Hide file tree
Showing 7 changed files with 321 additions and 279 deletions.
4 changes: 2 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
include README.md
include agfusion/data/agfusion.db
include bin/agfusion
include bin/build_db
include bin/agfusion_builddb
include test/test.py
include test/test_mouse.sh
include test/test_human.sh
include test/test_human.sh
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ For a given gene fusion, AGFusion will predict the cDNA, CDS, and protein sequen

Docs are at http://pythonhosted.org/agfusion/

# Example Usage
# Examples

### Basic Usage

Expand Down Expand Up @@ -101,6 +101,12 @@ agfusion \
![alt tag](https://github.com/murphycj/AGFusion/blob/master/doc/ENSMUST00000132176-ENSMUST00000002487-scale.png)
![alt tag](https://github.com/murphycj/AGFusion/blob/master/doc/ENSMUST00000120187-ENSMUST00000086074.png)

# Advanced Usage

### Building your own database

You need to have mysql and the MySQLdb python package installed.

# Installation

First you need to install pyensembl (and the other dependencies listed at the bottom of this readme) and download the reference genome you will use by running one of the following.
Expand Down
88 changes: 69 additions & 19 deletions agfusion/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,57 @@
import agfusion
import pyensembl


def builddb():

parser = argparse.ArgumentParser(
description='Build the SQLite3 database for a reference ' +
'genomes by querying Biomart. The the database given by --database ' +
'already exists then that portion will be overwritten.'
)
parser.add_argument(
'--database',
type=str,
required=True,
help='Path to the database file (e.g. agfusion.db)'
)
parser.add_argument(
'--build',
type=str,
required=True,
help='homo_sapiens_core_84_38 (for GRCh38), ' +
'homo_sapiens_core_75_37 (for GRCh37), or ' +
'mus_musculus_core_75_38 (for GRCm38)'
)
parser.add_argument(
'--server',
type=str,
required=False,
default='ensembldb.ensembl.org',
help='(optional) Ensembl server (default ensembldb.ensembl.org)'
)
args = parser.parse_args()

db = agfusion.AGFusionDBBManager(args.database, args.build, args.server)

db.logger.info('Fetching alternative gene names...')

#db.fetch_gene_names()

db.logger.info('Fetching transcript tables...')

db.fetch_transcript_table()

db.logger.info('Fetching protein annotation data...')

db.fetch_protein_annotation()


def main():

parser = argparse.ArgumentParser(description='Annotate Gene Fusion (AGFusion)')
parser = argparse.ArgumentParser(
description='Annotate Gene Fusion (AGFusion)'
)
parser.add_argument(
'--gene5prime',
type=str,
Expand All @@ -27,16 +75,16 @@ def main():
'--junction5prime',
type=int,
required=True,
help='Genomic location of predicted fuins for the 5\' gene partner. ' + \
'The 1-based position that is the last nucleotide included in ' + \
help='Genomic location of predicted fuins for the 5\' gene partner. ' +
'The 1-based position that is the last nucleotide included in ' +
'the fusion before the junction.'
)
parser.add_argument(
'--junction3prime',
type=int,
required=True,
help='Genomic location of predicted fuins for the 3\' gene partner. ' + \
'The 1-based position that is the first nucleotide included in ' + \
help='Genomic location of predicted fuins for the 3\' gene partner. ' +
'The 1-based position that is the first nucleotide included in ' +
'the fusion after the junction.'
)
parser.add_argument(
Expand All @@ -56,26 +104,28 @@ def main():
type=str,
default=None,
required=False,
help='(Optional) The SQLite3 database. Defaults to using the database provided by the package.'
help='(Optional) The SQLite3 database. Defaults to using the ' +
'database provided by the package.'
)
parser.add_argument(
'--noncanonical',
action='store_true',
required=False,
default=False,
help='(Optional) Include non-canonical gene transcripts in the analysis (default False).'
help='(Optional) Include non-canonical gene transcripts ' +
'in the analysis (default False).'
)
parser.add_argument(
'--protein_databases',
type=str,
required=False,
nargs='+',
default=['pfam','tmhmm'],
help='(Optional) Space-delimited list of one or more protein ' + \
'feature databases to include when visualizing proteins. ' + \
'Options are: pfam, tigrfam, prints, hmmpanther, ' + \
'blastprodom, gene3d, hamap, pirsf, ncoils, superfamily, seg, ' + \
'signalp, scanprosite, pfscan, tmhmm, and smart. ' + \
default=['pfam', 'tmhmm'],
help='(Optional) Space-delimited list of one or more protein ' +
'feature databases to include when visualizing proteins. ' +
'Options are: pfam, tigrfam, prints, hmmpanther, ' +
'blastprodom, gene3d, hamap, pirsf, ncoils, superfamily, seg, ' +
'signalp, scanprosite, pfscan, tmhmm, and smart. ' +
'(default includes pfam and tmhmm).'
)
parser.add_argument(
Expand All @@ -84,10 +134,10 @@ def main():
required=False,
nargs='+',
default=None,
help='(Optional) Space-delimited list of domain name and color to ' + \
'specify certain colors for domains. Format --color domain_name:color' + \
' (e.g. --color Pkinase_Tyr:blue I-set:#006600). ' + \
'Can use specific color names for hex representation. Default ' + \
help='(Optional) Space-delimited list of domain name and color to ' +
'specify certain colors for domains. Format --color domain_name:color' +
' (e.g. --color Pkinase_Tyr:blue I-set:#006600). ' +
'Can use specific color names for hex representation. Default ' +
'blue for everything.'
)
parser.add_argument(
Expand All @@ -96,8 +146,8 @@ def main():
required=False,
nargs='+',
default=None,
help='(Optional) Space-delimited list of domain name and new name to ' + \
'rename particular domains. Format --rename domain_name:new_domain_name' + \
help='(Optional) Space-delimited list of domain name and new name to ' +
'rename particular domains. Format --rename domain_name:new_domain_name' +
' (e.g. --rename Pkinase_Tyr:Kinase).'
)
parser.add_argument(
Expand Down
Loading

0 comments on commit ab5f003

Please sign in to comment.