Skip to content

Commit

Permalink
Merge branch '0.10.x'
Browse files Browse the repository at this point in the history
  • Loading branch information
newgene committed Aug 12, 2021
2 parents 48624ae + 2965571 commit afefe06
Show file tree
Hide file tree
Showing 17 changed files with 483 additions and 262 deletions.
4 changes: 1 addition & 3 deletions requirements_web.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# biothings
biothings[web_extra]==0.9.1
elasticsearch==6.3.1
elasticsearch-dsl==6.3.1
git+git://github.com/biothings/[email protected]#egg=biothings[web_extra]

# for sentry monitoring
raven
316 changes: 161 additions & 155 deletions src/config_hub.py

Large diffs are not rendered by default.

51 changes: 31 additions & 20 deletions src/config_web.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
# -*- coding: utf-8 -*-
import copy
import re

from biothings.web.settings.default import (ANNOTATION_KWARGS, APP_LIST,
QUERY_KWARGS)
from biothings.web.settings.default import (
ANNOTATION_KWARGS, APP_LIST, QUERY_KWARGS)

# *****************************************************************************
# Elasticsearch variables
# *****************************************************************************
ES_HOST = 'es6.biothings.io:9200'
ES_INDEX = 'myvariant_current_hg19'
ES_DOC_TYPE = 'variant'
ES_HOST = 'es7.biothings.io:443'
ES_ARGS = {
'timeout': 120,
'aws': True
}
ES_INDICES = {
None: 'myvariant_current_hg19',
'variant': 'myvariant_current_hg19',
'hg19': 'myvariant_current_hg19',
'hg38': 'myvariant_current_hg38'
}
Expand All @@ -21,8 +24,9 @@
# *****************************************************************************
API_VERSION = 'v1'
APP_LIST = [
(r'/v1/variant/(chr.{1,2}):(?!g\.)[g\.]{0,2}(\d+.*)', 'tornado.web.RedirectHandler', {'url': '/v1/variant/{0}:g.{1}'}),
*APP_LIST,
(r'/v1/variant/(chr.{1,2}):(?!g\.)[g\.]{0,2}(\d+.*)',
'tornado.web.RedirectHandler', {'url': '/v1/variant/{0}:g.{1}'}),
*APP_LIST, # default handlers
(r"/{pre}/metadata/fields/?", 'web.handlers.MVMetadataFieldHandler'),
(r"/{pre}/metadata/?", 'web.handlers.MVMetadataSourceHandler'),
(r"/{pre}/{ver}/metadata/fields/?", 'web.handlers.MVMetadataFieldHandler'),
Expand All @@ -38,18 +42,18 @@
# Analytics & Tracking
# *****************************************************************************

GA_ACTION_QUERY_GET = 'query_get'
GA_ACTION_QUERY_POST = 'query_post'
GA_ACTION_ANNOTATION_GET = 'variant_get'
GA_ACTION_ANNOTATION_POST = 'variant_post'
GA_TRACKER_URL = 'MyVariant.info'
URL_BASE = 'http://myvariant.info'

# for logo on format=html
HTML_OUT_HEADER_IMG = "/static/favicon.ico"

# for title line on format=html
HTML_OUT_TITLE = """<p style="font-family:'Open Sans',sans-serif;font-weight:bold; font-size:16px;"><a href="http://myvariant.info" target="_blank" style="text-decoration: none; color: black">MyVariant.info - Variant Annotation as a Service</a></p>"""
HTML_OUT_TITLE = """
<p style="font-family:'Open Sans',sans-serif;font-weight:bold; font-size:16px;">
<a href="http://myvariant.info" target="_blank" style="text-decoration: none; color: black">
MyVariant.info - Variant Annotation as a Service
</a>
</p>"""

METADATA_DOCS_URL = "http://docs.myvariant.info/en/latest/doc/data.html"
QUERY_DOCS_URL = "http://docs.myvariant.info/en/latest/doc/variant_query_service.html"
Expand All @@ -58,20 +62,27 @@
# kwargs for status check get
STATUS_CHECK = {
'id': 'chr1:g.218631822G>A',
'index': 'myvariant_current_hg19',
'doc_type': 'variant'
'index': 'myvariant_current_hg19'
}

# *****************************************************************************
# User Input Control
# *****************************************************************************
ANNOTATION_ID_REGEX_LIST = [(re.compile(r'rs[0-9]+', re.I), 'dbsnp.rsid'),
(re.compile(r'rcv[0-9\.]+', re.I), 'clinvar.rcv.accession'),
(re.compile(r'var_[0-9]+', re.I), 'uniprot.humsavar.ftid')]
ANNOTATION_ID_REGEX_LIST = [
(re.compile(r'rs[0-9]+', re.I), 'dbsnp.rsid'),
(re.compile(r'rcv[0-9\.]+', re.I), 'clinvar.rcv.accession'),
(re.compile(r'var_[0-9]+', re.I), 'uniprot.humsavar.ftid')
]
ANNOTATION_DEFAULT_SCOPES = ['_id', 'clingen.caid']

# typedef for assembly parameter
ASSEMBLY_TYPEDEF = {'assembly': {'type': str, 'default': 'hg19', 'enum': ('hg19', 'hg38'), 'group': ('esqb', 'es')}}
ASSEMBLY_TYPEDEF = {
'assembly': {
'type': str,
'default': 'hg19',
'enum': ('hg19', 'hg38')
}
}

ANNOTATION_KWARGS = copy.deepcopy(ANNOTATION_KWARGS)
ANNOTATION_KWARGS['*'].update(ASSEMBLY_TYPEDEF)
Expand Down
88 changes: 48 additions & 40 deletions src/hub/dataindex/indexer.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,64 @@
import os, time
import time
import asyncio

import config
import biothings.hub.dataindex.indexer as indexer
from biothings.hub.dataindex.indexer import Indexer, IndexManager, ColdHotIndexer
from biothings.hub.dataexport.ids import export_ids, upload_ids
from biothings.utils.hub_db import get_src_build
from biothings.utils.es import ESIndexer
from utils.stats import update_stats


class BaseVariantIndexer(indexer.Indexer):

def enrich_final_mapping(self,final_mapping):
# enrich with myvariant specific stuff
final_mapping["properties"]["chrom"] = {
'analyzer': 'string_lowercase',
'type': 'text'}
final_mapping["properties"]["observed"] = {
"type": "boolean"}
final_mapping["properties"]["_seqhashed"] = {
"type" : "object",
"properties" : {
"_flag": {
"type" : "boolean"
}
}
}
class BaseVariantIndexer(Indexer):

return final_mapping
def __init__(self, build_doc, indexer_env, target_name, index_name):
super().__init__(build_doc, indexer_env, target_name, index_name)

def get_index_creation_settings(self):
settings = super(BaseVariantIndexer,self).get_index_creation_settings()
settings.setdefault("mapping",{}).setdefault("total_fields",{})["limit"] = 2000
return settings
self.es_index_mappings["properties"]["chrom"] = {
'analyzer': 'string_lowercase',
'type': 'text'
}
self.es_index_mappings["properties"]["observed"] = {
"type": "boolean"
}
self.es_index_mappings["properties"]["_seqhashed"] = {
"type": "object",
"properties": {
"_flag": {
"type": "boolean"
}
}
}
self.es_index_settings["mapping"] = {
"total_fields": {
"limit": 2000
}
}
self.assembly = build_doc["build_config"]["assembly"]

def post_index(self, target_name, index_name, job_manager, steps=["index","post"], batch_size=10000, ids=None, mode=None):
# TODO: not tested yet
@asyncio.coroutine
def post_index(self):
# Migrated from Sebastian's commit 1a7b7a
# It was orginally marked "Not Tested Yet".
self.logger.info("Sleeping for a bit while index is being fully updated...")
time.sleep(3*60)
idxer = ESIndexer(index=index_name,doc_type=self.doc_type,es_host=self.host)
self.logger.info("Updating 'stats' by querying index '%s'" % index_name)
assembly = self.build_config["assembly"]
return update_stats(idxer,assembly)
yield from time.sleep(3*60)
idxer = ESIndexer(
index=self.es_index_name,
doc_type=self.doc_type,
es_host=self.es_client_args.get('hosts'))
self.logger.info("Updating 'stats' by querying index '%s'" % self.es_index_name)
return update_stats(idxer, self.assembly)


class MyVariantIndexerManager(IndexManager):

class MyVariantIndexerManager(indexer.IndexManager):
# New Hub Command

def post_publish(self, snapshot, index, *args, **kwargs):
# assuming build name == index name, and assuming demo index has
# "demo" in its name...
# assuming full index, not demo, guess name now
bdoc = get_src_build().find_one({"_id" : index})
bdoc = get_src_build().find_one({"_id": index})
assert bdoc, "Can't find build doc associated with index '%s' (should be named the same)" % index
ids_file = export_ids(index)
if "hg19" in index or "hg19" in snapshot:
Expand All @@ -59,15 +67,15 @@ def post_publish(self, snapshot, index, *args, **kwargs):
redir = "hg38_ids.xz"
if "demo" in index or "demo" in snapshot:
redir = "demo_%s" % redir
upload_ids(ids_file, redir,
s3_bucket=config.IDS_S3_BUCKET,
aws_key=config.AWS_KEY,
aws_secret=config.AWS_SECRET)
upload_ids(ids_file, redir,
s3_bucket=config.IDS_S3_BUCKET,
aws_key=config.AWS_KEY,
aws_secret=config.AWS_SECRET)


class VariantIndexer(BaseVariantIndexer):
pass

class ColdHotVariantIndexer(indexer.ColdHotIndexer,BaseVariantIndexer):

class ColdHotVariantIndexer(ColdHotIndexer, BaseVariantIndexer):
pass


11 changes: 7 additions & 4 deletions src/index.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
from tornado.web import StaticFileHandler, RedirectHandler

from biothings.web.index_base import main
from biothings.web.launcher import main
from web.beacon.handlers import BeaconHandler, BeaconInfoHandler

if __name__ == "__main__":
main([
(r"/", RedirectHandler, {"url": "/standalone", "permanent": False}), # override default frontpage
(r"/demo/?()", StaticFileHandler, {"path": "docs/demo", "default_filename": "index.html"}),
(r"/standalone/?()", StaticFileHandler, {"path": "docs/standalone", "default_filename": "index.html"}),
# override default frontpage
(r"/", RedirectHandler, {"url": "/standalone", "permanent": False}),
(r"/demo/?()", StaticFileHandler,
{"path": "docs/demo", "default_filename": "index.html"}),
(r"/standalone/?()", StaticFileHandler,
{"path": "docs/standalone", "default_filename": "index.html"}),
(r"/beacon/query?", BeaconHandler),
(r"/beacon/info", BeaconInfoHandler),
])
Empty file removed src/tests/__init__.py
Empty file.
47 changes: 47 additions & 0 deletions src/tests/app/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

"""
Config file to run tests for MyVariant.info
"""
import os as _os
import importlib.util as _imp_util

CONFIG_FILE_NAME = "config_web.py"

# find the path of the config file
_cfg_path = _os.path.abspath(_os.path.join(_os.path.curdir, CONFIG_FILE_NAME))
while True:
if _os.path.exists(_cfg_path):
break
_new_path = _os.path.abspath(_os.path.join(
_os.path.join(_os.path.dirname(_cfg_path), _os.path.pardir),
CONFIG_FILE_NAME)
)
if _new_path == _cfg_path:
raise Exception(f"no config file {CONFIG_FILE_NAME} found")
else:
_cfg_path = _new_path

# load config file using path
_spec = _imp_util.spec_from_file_location("parent_config", _cfg_path)
_config = _imp_util.module_from_spec(_spec)
_spec.loader.exec_module(_config)

# put the config variables into the module namespace
for _k, _v in _config.__dict__.items():
if not _k.startswith('_'):
globals()[_k] = _v

# cleanup
del CONFIG_FILE_NAME

# override default
ES_HOST = 'localhost:9200'
ES_INDICES = {
None: 'mvtest_hg19',
'variant': 'mvtest_hg19',
'hg19': 'mvtest_hg19',
'hg38': 'mvtest_hg38'
}
ES_ARGS = {
'timeout': 120,
}
1 change: 1 addition & 0 deletions src/tests/app/test_data/mv_app_test/mvtest_hg19.json

Large diffs are not rendered by default.

Loading

0 comments on commit afefe06

Please sign in to comment.