Skip to content

Commit

Permalink
Merge pull request #105 from newgene/enhancement-autohub-validator
Browse files Browse the repository at this point in the history
Enhancement autohub: allow easily customize validation logic
  • Loading branch information
newgene authored Mar 23, 2023
2 parents 12f9c8d + 0747063 commit d0712cc
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 9 deletions.
6 changes: 5 additions & 1 deletion biothings/hub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ class HubServer(object):
"version_urls": getattr(config, "VERSION_URLS", []),
"indexer_factory": getattr(config, "AUTOHUB_INDEXER_FACTORY", None),
"es_host": getattr(config, "AUTOHUB_ES_HOST", None),
"validator_class": getattr(config, "AUTOHUB_VALIDATOR_CLASS", None),
}

def __init__(
Expand Down Expand Up @@ -830,6 +831,7 @@ def configure_autohub_feature(self):
version_urls = self.autohub_config["version_urls"]
indexer_factory = self.autohub_config["indexer_factory"]
es_host = self.autohub_config["es_host"]
validator_class = self.autohub_config["validator_class"]
factory = None
if indexer_factory:
assert (
Expand All @@ -842,7 +844,9 @@ def configure_autohub_feature(self):
self.logger.error(
"Couldn't find indexer factory class from '%s': %s" % (indexer_factory, e)
)
self.autohub_feature = AutoHubFeature(autohub_managers, version_urls, factory)
self.autohub_feature = AutoHubFeature(
autohub_managers, version_urls, factory, validator_class=validator_class,
)
try:
self.autohub_feature.configure()
self.autohub_feature.configure_auto_release(config)
Expand Down
4 changes: 4 additions & 0 deletions biothings/hub/default_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@
# A list of URLs to the versions.json files, which contain data release metadata
VERSION_URLS = []

# Use this configuration to customize validation logic of the auto hub feature.
# the AutoHubValidator will be use as default. Any customize class must be extended from it.
AUTOHUB_VALIDATOR_CLASS = None

# Set to True to skip checking application/biothings version matching, before installing
# a data release, in version settings like "app_version", "standalone_version", "biothings_version"
SKIP_CHECK_COMPAT = True
Expand Down
32 changes: 24 additions & 8 deletions biothings/hub/standalone/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@
This standalone module is originally located at "biothings/standalone" repo.
It's used for Standalone/Autohub instance.
"""
import os
import asyncio
import importlib
import logging
import os
import sys
import asyncio
from copy import deepcopy
from functools import partial

from biothings import config as btconfig
from biothings.hub import HubServer
from biothings.hub.autoupdate import BiothingsDumper, BiothingsUploader
from biothings.hub.standalone.validators import AutoHubValidator
from biothings.utils.es import ESIndexer
from biothings.utils.backend import DocESBackend
from biothings.utils.loggers import get_logger
Expand All @@ -26,8 +28,9 @@ class AutoHubFeature(object):

DEFAULT_DUMPER_CLASS = BiothingsDumper
DEFAULT_UPLOADER_CLASS = BiothingsUploader
DEFAULT_VALIDATOR_CLASS = AutoHubValidator

def __init__(self, managers, version_urls, indexer_factory=None, *args, **kwargs):
def __init__(self, managers, version_urls, indexer_factory=None, validator_class=None, *args, **kwargs):
"""
version_urls is a list of URLs pointing to versions.json file. The name
of the data release is taken from the URL (http://...s3.amazon.com/<the_name>/versions.json)
Expand All @@ -48,13 +51,30 @@ def __init__(self, managers, version_urls, indexer_factory=None, *args, **kwargs
When a data release named (from URL) matches an entry, it's used to configured
which ES backend to target, otherwise the default one is used.
If validator_class is passed, it'll be used to provide validation methods for installing step.
If validator_class is None, the AutoHubValidator will be used as fallback.
"""
super().__init__(*args, **kwargs)
self.version_urls = self.extract(version_urls)
self.indexer_factory = indexer_factory
self.managers = managers
self.logger, _ = get_logger("autohub")

if validator_class:
if isinstance(validator_class, str):
parts = validator_class.split(".")
validator_module = ".".join(parts[:-1])
validator_class = parts[-1]
validator_class = getattr(importlib.import_module(validator_module), validator_class)
assert issubclass(validator_class, AutoHubValidator), (
"validator_class must be a subclass of biothings.hub.standalone.AutoHubValidator"
)
self.validator = validator_class(self)
else:
self.validator = self.DEFAULT_VALIDATOR_CLASS(self)

def extract(self, urls):
vurls = []
for url in urls:
Expand All @@ -66,10 +86,6 @@ def extract(self, urls):

return vurls

def validate_release(self, version_path, force=False):
"""Check if the release is valid to install. If not, it should raise an Exception to stop the progress"""
pass

def install(self, src_name, version="latest", dry=False, force=False, use_no_downtime_method=True):
"""
Update hub's data up to the given version (default is latest available),
Expand All @@ -91,7 +107,7 @@ async def do(version):
if dry:
return version_path

self.validate_release(version_path=version_path, force=force)
self.validator.validate(version_path=version_path, force=force)

for step_version in version_path:
logging.info("Downloading data for version '%s'", step_version)
Expand Down
22 changes: 22 additions & 0 deletions biothings/hub/standalone/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
class AutoHubValidateError(Exception):
reason = None

def __init__(self, reason, *args, **kwargs):
self.reason = reason
super().__init__(*args, **kwargs)


class AutoHubValidator:
"""
This class aims to provide an easy way to customize validation logic for installing a hub from a release.
"""

def __init__(self, auto_hub_feature):
self.auto_hub_feature = auto_hub_feature

def validate(self, force=False, **kwargs):
"""
Check if the release is valid to install.
If invalid, it should raise an AutoHubValidateError include any reason to stop the progress.
"""
pass

0 comments on commit d0712cc

Please sign in to comment.