Skip to content

Commit

Permalink
Merge pull request #102 from newgene/auto-snapshot-cleanup
Browse files Browse the repository at this point in the history
Implement auto snapshot cleanup feature
  • Loading branch information
newgene authored Mar 9, 2023
2 parents d23f306 + 80f2383 commit 41a8898
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 0 deletions.
12 changes: 12 additions & 0 deletions biothings/hub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ class HubServer(object):
"diff",
"index",
"snapshot",
"auto_snapshot_cleaner",
"release",
"inspect",
"sync",
Expand Down Expand Up @@ -706,6 +707,17 @@ def configure_snapshot_manager(self):
snapshot_manager.poll("snapshot", snapshot_manager.snapshot_a_build)
self.managers["snapshot_manager"] = snapshot_manager

def configure_auto_snapshot_cleaner_manager(self):
assert "snapshot" in self.features, "'auto_snapshot_cleaner' feature requires 'snapshot'"
from biothings.hub.dataindex.auto_snapshot_cleanup import AutoSnapshotCleanupManager

auto_snapshot_cleaner_manager = AutoSnapshotCleanupManager(
snapshot_manager=self.managers["snapshot_manager"],
job_manager=self.managers["job_manager"],
)
auto_snapshot_cleaner_manager.configure(config.AUTO_SNAPSHOT_CLEANUP_CONFIG)
self.managers["auto_snapshot_cleaner_manager"] = auto_snapshot_cleaner_manager

def configure_release_manager(self):
assert "diff" in self.features, "'release' feature requires 'diff'"
assert "snapshot" in self.features, "'release' feature requires 'snapshot'"
Expand Down
60 changes: 60 additions & 0 deletions biothings/hub/dataindex/auto_snapshot_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from functools import partial

from biothings import config as btconfig
from biothings.utils.manager import BaseManager

logger = btconfig.logger


class AutoSnapshotCleanupManager(BaseManager):
"""This feature will add a new console command "auto_snapshot_cleanup", a new api "/auto_snapshot_cleanup".
It is intended to allow automatically cleanup old snapshot, based on configuration.
This feature can be configurated by using AUTO_SNAPSHOT_CLEANUP_CONFIG variable, to determine:
- schedule: how frequency this task should run
- days: how old a snapshot should be deleted
AUTO_SNAPSHOT_CLEANUP_CONFIG = {
"environment_name": {
"schedule": "* 0 * * *", # run daily at 0am UTC
"keep": 3, # the number of most recent snapshots to keep in one group
"group_by": "build_config", # the attr of which its values form groups
"extra_filters": {} # a set of criterions to limit which snapshots are to be cleaned
},
...
}
"""

DEFAULT_SCHEDULE = "* 0 * * *" # run daily at 0am UTC

def __init__(self, snapshot_manager, job_manager, *args, **kwargs):
super().__init__(job_manager, *args, **kwargs)

self.snapshot_manager = snapshot_manager

def configure(self, conf=None):
self.auto_snapshot_cleaner_config = conf or {}

for env_name in self.snapshot_manager.register.keys():
cleaner_config = self.auto_snapshot_cleaner_config.get(env_name)

if not isinstance(cleaner_config, dict):
logger.info(f"Snapshot environment: {env_name}: No cleaner config found!")
continue

schedule = cleaner_config.get("schedule") or self.DEFAULT_SCHEDULE
keep = cleaner_config.get("keep")
group_by = cleaner_config.get("group_by")
extra_filters = cleaner_config.get("extra_filters")

self.job_manager.submit(
partial(
self.snapshot_manager.cleanup,
env=env_name,
keep=keep,
group_by=group_by,
dryrun=False,
**extra_filters,
),
schedule=schedule,
)
14 changes: 14 additions & 0 deletions biothings/hub/default_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,20 @@
# Snapshot environment configuration
SNAPSHOT_CONFIG = {}

# Auto snapshot cleaner feature will use this configuration to get schedule config for corresponding environment.
AUTO_SNAPSHOT_CLEANUP_CONFIG = None
"""
AUTO_SNAPSHOT_CLEANUP_CONFIG = {
"environment_name": {
"schedule": "* 0 * * *", # run daily at 0am UTC
"keep": 3, # the number of most recent snapshots to keep in one group
"group_by": "build_config", # the attr of which its values form groups
"extra_filters": {} # a set of criterions to limit which snapshots are to be cleaned
},
...
}
"""

# reporting diff results, number of IDs to consider (to avoid too much mem usage)
MAX_REPORTED_IDS = 1000
# for diff updates, number of IDs randomly picked as examples when rendering the report
Expand Down

0 comments on commit 41a8898

Please sign in to comment.