From 07470635b9e85a5db8abbbb19af3dc9d16e60195 Mon Sep 17 00:00:00 2001 From: sengineer0 Date: Thu, 23 Mar 2023 21:33:16 +0700 Subject: [PATCH] Enhancement autohub validator: allow easily customize validator logic --- biothings/hub/__init__.py | 6 ++++- biothings/hub/default_config.py | 4 ++++ biothings/hub/standalone/__init__.py | 32 +++++++++++++++++++------- biothings/hub/standalone/validators.py | 22 ++++++++++++++++++ 4 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 biothings/hub/standalone/validators.py diff --git a/biothings/hub/__init__.py b/biothings/hub/__init__.py index 403749731..6dca976a5 100644 --- a/biothings/hub/__init__.py +++ b/biothings/hub/__init__.py @@ -359,6 +359,7 @@ class HubServer(object): "version_urls": getattr(config, "VERSION_URLS", []), "indexer_factory": getattr(config, "AUTOHUB_INDEXER_FACTORY", None), "es_host": getattr(config, "AUTOHUB_ES_HOST", None), + "validator_class": getattr(config, "AUTOHUB_VALIDATOR_CLASS", None), } def __init__( @@ -830,6 +831,7 @@ def configure_autohub_feature(self): version_urls = self.autohub_config["version_urls"] indexer_factory = self.autohub_config["indexer_factory"] es_host = self.autohub_config["es_host"] + validator_class = self.autohub_config["validator_class"] factory = None if indexer_factory: assert ( @@ -842,7 +844,9 @@ def configure_autohub_feature(self): self.logger.error( "Couldn't find indexer factory class from '%s': %s" % (indexer_factory, e) ) - self.autohub_feature = AutoHubFeature(autohub_managers, version_urls, factory) + self.autohub_feature = AutoHubFeature( + autohub_managers, version_urls, factory, validator_class=validator_class, + ) try: self.autohub_feature.configure() self.autohub_feature.configure_auto_release(config) diff --git a/biothings/hub/default_config.py b/biothings/hub/default_config.py index c01e3f64b..3bf6ffe6c 100644 --- a/biothings/hub/default_config.py +++ b/biothings/hub/default_config.py @@ -342,6 +342,10 @@ # A list of URLs to the versions.json files, which contain data release metadata VERSION_URLS = [] +# Use this configuration to customize validation logic of the auto hub feature. +# the AutoHubValidator will be use as default. Any customize class must be extended from it. +AUTOHUB_VALIDATOR_CLASS = None + # Set to True to skip checking application/biothings version matching, before installing # a data release, in version settings like "app_version", "standalone_version", "biothings_version" SKIP_CHECK_COMPAT = True diff --git a/biothings/hub/standalone/__init__.py b/biothings/hub/standalone/__init__.py index 56809693a..a28dd6711 100644 --- a/biothings/hub/standalone/__init__.py +++ b/biothings/hub/standalone/__init__.py @@ -2,16 +2,18 @@ This standalone module is originally located at "biothings/standalone" repo. It's used for Standalone/Autohub instance. """ -import os +import asyncio +import importlib import logging +import os import sys -import asyncio from copy import deepcopy from functools import partial from biothings import config as btconfig from biothings.hub import HubServer from biothings.hub.autoupdate import BiothingsDumper, BiothingsUploader +from biothings.hub.standalone.validators import AutoHubValidator from biothings.utils.es import ESIndexer from biothings.utils.backend import DocESBackend from biothings.utils.loggers import get_logger @@ -26,8 +28,9 @@ class AutoHubFeature(object): DEFAULT_DUMPER_CLASS = BiothingsDumper DEFAULT_UPLOADER_CLASS = BiothingsUploader + DEFAULT_VALIDATOR_CLASS = AutoHubValidator - def __init__(self, managers, version_urls, indexer_factory=None, *args, **kwargs): + def __init__(self, managers, version_urls, indexer_factory=None, validator_class=None, *args, **kwargs): """ version_urls is a list of URLs pointing to versions.json file. The name of the data release is taken from the URL (http://...s3.amazon.com//versions.json) @@ -48,6 +51,10 @@ def __init__(self, managers, version_urls, indexer_factory=None, *args, **kwargs When a data release named (from URL) matches an entry, it's used to configured which ES backend to target, otherwise the default one is used. + + If validator_class is passed, it'll be used to provide validation methods for installing step. + If validator_class is None, the AutoHubValidator will be used as fallback. + """ super().__init__(*args, **kwargs) self.version_urls = self.extract(version_urls) @@ -55,6 +62,19 @@ def __init__(self, managers, version_urls, indexer_factory=None, *args, **kwargs self.managers = managers self.logger, _ = get_logger("autohub") + if validator_class: + if isinstance(validator_class, str): + parts = validator_class.split(".") + validator_module = ".".join(parts[:-1]) + validator_class = parts[-1] + validator_class = getattr(importlib.import_module(validator_module), validator_class) + assert issubclass(validator_class, AutoHubValidator), ( + "validator_class must be a subclass of biothings.hub.standalone.AutoHubValidator" + ) + self.validator = validator_class(self) + else: + self.validator = self.DEFAULT_VALIDATOR_CLASS(self) + def extract(self, urls): vurls = [] for url in urls: @@ -66,10 +86,6 @@ def extract(self, urls): return vurls - def validate_release(self, version_path, force=False): - """Check if the release is valid to install. If not, it should raise an Exception to stop the progress""" - pass - def install(self, src_name, version="latest", dry=False, force=False, use_no_downtime_method=True): """ Update hub's data up to the given version (default is latest available), @@ -91,7 +107,7 @@ async def do(version): if dry: return version_path - self.validate_release(version_path=version_path, force=force) + self.validator.validate(version_path=version_path, force=force) for step_version in version_path: logging.info("Downloading data for version '%s'", step_version) diff --git a/biothings/hub/standalone/validators.py b/biothings/hub/standalone/validators.py new file mode 100644 index 000000000..24b17ed65 --- /dev/null +++ b/biothings/hub/standalone/validators.py @@ -0,0 +1,22 @@ +class AutoHubValidateError(Exception): + reason = None + + def __init__(self, reason, *args, **kwargs): + self.reason = reason + super().__init__(*args, **kwargs) + + +class AutoHubValidator: + """ + This class aims to provide an easy way to customize validation logic for installing a hub from a release. + """ + + def __init__(self, auto_hub_feature): + self.auto_hub_feature = auto_hub_feature + + def validate(self, force=False, **kwargs): + """ + Check if the release is valid to install. + If invalid, it should raise an AutoHubValidateError include any reason to stop the progress. + """ + pass