-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Prune old develop snapshots #853
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import os | ||
import re | ||
import subprocess | ||
|
||
import sentry_sdk | ||
from github import Github | ||
|
||
|
||
sentry_sdk.init( | ||
# This cron job only runs once weekly, | ||
# so just record all transactions. | ||
traces_sample_rate=1.0, | ||
) | ||
|
||
TAG_REF_REGEX = re.compile(r"^refs/tags/(develop-\d{4}-\d{2}-\d{2})$") | ||
|
||
|
||
def main(): | ||
if "GITHUB_TOKEN" not in os.environ: | ||
raise Exception("GITHUB_TOKEN environment is not set") | ||
|
||
parser = argparse.ArgumentParser( | ||
prog="prune_snapshots.py", | ||
description="Prune expired snapshots", | ||
) | ||
|
||
parser.add_argument( | ||
"-k", | ||
"--keep-last-n", | ||
type=int, | ||
default=8, | ||
help="Prune all but most recent --keep-last-n", | ||
) | ||
parser.add_argument( | ||
"-m", | ||
"--mirror-root", | ||
default="s3://spack-binaries", | ||
help=("Root url of mirror where snapshot binaries are mirrored"), | ||
) | ||
|
||
args = parser.parse_args() | ||
|
||
keep_n = args.keep_last_n | ||
mirror_root_url = args.mirror_root | ||
|
||
# Use the GitHub API to create a tag for this commit of develop. | ||
github_token = os.environ.get("GITHUB_TOKEN") | ||
py_github = Github(github_token) | ||
py_gh_repo = py_github.get_repo("spack/spack", lazy=True) | ||
|
||
# Get a list of all the tags matching the develop snapshot pattern | ||
snapshot_tags = py_gh_repo.get_git_matching_refs("tags/develop-") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The method described below would break this query for mirror names. If we still want to use tags, we could move deleted tags to hidden refs, something like I am not sure which method to prefer for this, maybe use both, one for listing tags to remove, the other for deleting mirrors. |
||
|
||
# Sort them so we can prune all but the KEEP_LAST_N most recent | ||
pruning_candidates = sorted(snapshot_tags, key=lambda ref: ref.ref)[:-keep_n] | ||
|
||
print("Deleting the following snapshots:") | ||
for tag in pruning_candidates: | ||
m = TAG_REF_REGEX.search(tag.ref) | ||
|
||
if not m: | ||
print(f"Unable to parse {tag.ref}, skipping") | ||
continue | ||
|
||
mirror_prefix = m.group(1) | ||
url_to_prune = f"{mirror_root_url}/{mirror_prefix}" | ||
|
||
print(f" Ref: {tag.ref}, Mirror: {url_to_prune}") | ||
|
||
# First, try to delete the mirror associated with the snapshot | ||
try: | ||
subprocess.run(["aws", "s3", "rm", "--recursive", url_to_prune], check=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An idea for a process for making making sure the cache.spack.io and the mirrors are in sync. check if mirror has
This will give a buffer time between dropping the tag and deleting the mirror contents. If we run this weekly that should translate to the cache.spack.io page being updated, it uses the the The timeline for snapshot pruning could be: -- Prune Cron Runs @ 2023/01/01 0100 UTC
-- Generate cache.spack.io @ 2023/01/02 0100 UTC
-- Prune Cron Runs @ 2023/01/08 0100 UTC
|
||
except subprocess.CalledProcessError as cpe: | ||
print(f"Failed to delete the mirror url {url_to_prune}, skipping") | ||
continue | ||
|
||
# If mirror deletion succeeded, also delete the tag from GitHub | ||
tag.delete() | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
awscli==1.32.101 | ||
certifi==2023.5.7 | ||
cffi==1.15.1 | ||
charset-normalizer==3.1.0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# IAM Role for granting delete access to spack-binaries bucket for the snapshot pruner | ||
resource "aws_iam_role" "delete_spack_binaries" { | ||
name = "DeleteFromBucketSpackBinaries${local.suffix}" | ||
description = "Managed by Terraform. Grants Kubernetes pods access to delete objects from the spack-binaries S3 bucket" | ||
assume_role_policy = jsonencode({ | ||
"Version" : "2012-10-17", | ||
"Statement" : [ | ||
{ | ||
"Effect" : "Allow", | ||
"Principal" : { | ||
"Federated" : module.eks.oidc_provider_arn | ||
}, | ||
"Action" : "sts:AssumeRoleWithWebIdentity", | ||
"Condition" : { | ||
"StringEquals" : { | ||
"${module.eks.oidc_provider}:aud" : "sts.amazonaws.com" | ||
} | ||
} | ||
} | ||
] | ||
}) | ||
} | ||
|
||
resource "aws_iam_policy" "delete_spack_binaries" { | ||
name = "DeleteObjectsFromBucketSpackBinaries${local.suffix}" | ||
description = "Allows deletion of any object in the ${module.protected_binary_mirror.bucket_name} bucket." | ||
policy = jsonencode({ | ||
"Version" : "2012-10-17", | ||
"Statement" : [ | ||
{ | ||
"Effect" : "Allow", | ||
"Action" : "s3:DeleteObject", | ||
"Resource" : "${module.protected_binary_mirror.bucket_arn}/*" | ||
} | ||
] | ||
}) | ||
} | ||
|
||
resource "aws_iam_role_policy_attachment" "delete_spack_binaries" { | ||
role = aws_iam_role.delete_spack_binaries.name | ||
policy_arn = aws_iam_policy.delete_spack_binaries.arn | ||
} | ||
|
||
resource "kubectl_manifest" "snapshot_pruner_service_account" { | ||
yaml_body = <<-YAML | ||
apiVersion: v1 | ||
kind: ServiceAccount | ||
metadata: | ||
name: prune-snapshots | ||
namespace: custom | ||
annotations: | ||
# DeleteFromBucketSpackBinaries | ||
eks.amazonaws.com/role-arn: ${aws_iam_role.delete_spack_binaries.arn} | ||
YAML | ||
depends_on = [ | ||
aws_iam_role_policy_attachment.delete_spack_binaries | ||
] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Base on previous comments related to pruning, I think we should avoid passing production paths as defaults and require them be specified.