Skip to content

Commit

Permalink
Download database now extra step
Browse files Browse the repository at this point in the history
Have to download AGFusion database. This reduces the python package size
  • Loading branch information
murphycj committed May 12, 2017
1 parent e6903d5 commit 8c30ee1
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 57 deletions.
2 changes: 0 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
include README.md
include agfusion/data/agfusion.db.gz
include agfusion/data/042117_pfamA.txt.gz
include bin/agfusion
include test/test.py
include test/test_mouse.sh
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,6 @@ agfusion annotate \
![alt tag](https://github.com/murphycj/AGFusion/blob/master/doc/ENSMUST00000064477-ENSMUST00000002487-rescale.png)
![alt tag](https://github.com/murphycj/AGFusion/blob/master/doc/ENSMUST00000122054-ENSMUST00000070330-rescale.png)

# Advanced Usage

### Building your own database

Under construction...

# Installation

First you need to install pyensembl (and the other dependencies listed at the bottom) and download the reference genome you will use by running one of the following.
Expand Down Expand Up @@ -184,6 +178,12 @@ Then you can install AGFusion via the following:
pip install agfusion
```

Finally, download the AGFusion database:

```
agfusion download
```

# Dependencies

- python 2.7.8
Expand Down
2 changes: 1 addition & 1 deletion agfusion/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.138"
__version__ = "0.139"
65 changes: 46 additions & 19 deletions agfusion/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,25 @@
from agfusion import exceptions
import pyensembl

def downloaddb(args):

import gzip
import urllib

if not os.path.exists(args.dir):
os.mkdir(args.dir)

file_path = os.path.join(
args.dir,
'agfusion.db.gz')

urllib.urlretrieve(
"https://raw.githubusercontent.com/murphycj/AGFusionDB/master/agfusion.db.gz",
file_path)

with gzip.open(file_path, 'rb') as f_in, file(file_path.replace('.gz',''), 'w') as f_out:
shutil.copyfileobj(f_in, f_out)

def annotate(gene5prime,junction5prime,gene3prime,junction3prime,
outdir,colors,rename,scale,db,pyensembl_data,args):

Expand Down Expand Up @@ -190,12 +209,11 @@ def add_common_flags(parser):
help='(Optional) Do not label domains.'
)
parser.add_argument(
'--db',
'--dbpath',
type=str,
default=None,
default=os.path.join(os.path.expanduser('~'),'.agfusion'),
required=False,
help='(Optional) The SQLite3 database. Defaults to using the ' +
'database provided by the package.'
help='(Optional) Path to where the AGFusion databse is located (default: ' + os.path.join(os.path.expanduser('~'),'.agfusion') + ')'
)
parser.add_argument(
'--debug',
Expand Down Expand Up @@ -287,24 +305,36 @@ def main():
)
add_common_flags(batch_parser)

# download database parser
# download database

database_parser = subparsers.add_parser('database', help='Download database for a reference genome.')
database_parser = subparsers.add_parser('download', help='Download database for a reference genome.')
database_parser.add_argument(
'--dir',
type=str,
required=False,
default=os.path.join(os.path.expanduser('~'),'.agfusion'),
help='(Optional) Directory to the database will be downloaded to (default: $HOME/.agfusion/)'
)
args = parser.parse_args()

# build database parser

build_database_parser = subparsers.add_parser('build', help='Build database for a reference genome.')
build_database_parser.add_argument(
'--database',
type=str,
required=True,
help='Path to the database file (e.g. agfusion.db)'
)
database_parser.add_argument(
build_database_parser.add_argument(
'--build',
type=str,
required=True,
help='homo_sapiens_core_84_38 (for GRCh38), ' +
'homo_sapiens_core_75_37 (for GRCh37), or ' +
'mus_musculus_core_84_38 (for GRCm38)'
)
database_parser.add_argument(
build_database_parser.add_argument(
'--server',
type=str,
required=False,
Expand All @@ -313,25 +343,22 @@ def main():
)
args = parser.parse_args()

if args.subparser_name == 'database':
if args.subparser_name == 'build':
builddb(args)
elif args.subparser_name == 'download':
downloaddb(args)
else:
if not os.path.exists(args.out):
os.mkdir(args.out)

# if user does not specify a sqlite database then use the one provided
# by the package

if args.db is None:
file_path = os.path.join(
os.path.split(__file__)[0],
'data',
'agfusion.db'
)

db = agfusion.AGFusionDB(file_path,debug=args.debug)
else:
db = agfusion.AGFusionDB(args.db,debug=args.debug)
file_path = os.path.join(
args.dbpath,
'agfusion.db'
)
db = agfusion.AGFusionDB(file_path,debug=args.debug)

# get the pyensembl data

Expand Down
Binary file removed agfusion/data/042117_pfamA.txt.gz
Binary file not shown.
Binary file removed agfusion/data/agfusion.db.gz
Binary file not shown.
9 changes: 0 additions & 9 deletions agfusion/data/build_db.sh

This file was deleted.

22 changes: 3 additions & 19 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,9 @@
import os
import site
import gzip
from setuptools import setup, find_packages
from setuptools.command.install import install
import re
import shutil
import site

class CustomInstall(install):
def run(self):
install.run(self)

file_path = os.path.join(
site.getsitepackages()[0],
'agfusion',
'data',
'agfusion.db.gz'
)

with gzip.open(file_path, 'rb') as f_in, file(file_path.replace('.gz',''), 'w') as f_out:
shutil.copyfileobj(f_in, f_out)
from setuptools import setup, find_packages

VERSIONFILE = "agfusion/_version.py"
verstrline = open(VERSIONFILE, "rt").read()
Expand All @@ -35,15 +20,14 @@ def run(self):
version=verstr,
name='agfusion',
packages=find_packages(),
description="Python package providing that can visualize different annotations of a gene fusion.",
description="Python package to annotate and visualize gene fusions.",
author='Charles Murphy',
author_email='[email protected]',
license='MIT',
url='https://github.com/murphycj/AGFusion',
long_description=README,
include_package_data=True,
scripts=['bin/agfusion'],
cmdclass={'install': CustomInstall},
install_requires=[
'pyensembl>=0.9.5',
'matplotlib>=1.5.0',
Expand Down
Binary file removed test.pdf
Binary file not shown.
3 changes: 2 additions & 1 deletion test/test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from os.path import join, expanduser
import unittest
import agfusion
from agfusion import utils
import pyensembl
from Bio import SeqIO

data = pyensembl.EnsemblRelease(84,'mouse')
db = agfusion.AGFusionDB(utils.agfusion_db)
db = agfusion.AGFusionDB(join(expanduser('~'),'.agfusion','agfusion.db'))
db.build = 'mus_musculus_core_84_38'

class TestSequencePrediction(unittest.TestCase):
Expand Down

0 comments on commit 8c30ee1

Please sign in to comment.