Skip to content

Commit

Permalink
cmd-buildfetch: add --aws-config-file option
Browse files Browse the repository at this point in the history
This will allow us to specify the AWS_CONFIG_FILE from the command
line without having to set it in the environment.

This required reworking the cosalib/s3 library into an S3() class
so that we could call boto3.client('s3') after setting the
AWS_CONFIG_FILE environment variable in cmd-buildfetch. Otherwise
it would get called as soon as the library was imported and setting
the environment variable would have no effect (too late).
  • Loading branch information
dustymabe committed Jul 6, 2022
1 parent 9214b9b commit 9d55a20
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 62 deletions.
15 changes: 11 additions & 4 deletions src/cmd-buildfetch
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ from cosalib.cmdlib import (
retry_stop,
rm_allow_noent,
sha256sum_file)
from cosalib.s3 import download_file, head_bucket, head_object
from cosalib.s3 import S3

retry_requests_exception = (retry_if_exception_type(requests.Timeout) |
retry_if_exception_type(requests.ReadTimeout) |
Expand All @@ -34,6 +34,8 @@ FCOS_STREAMS_URL = "https://builds.coreos.fedoraproject.org/prod/streams"

def main():
args = parse_args()
if args.aws_config_file:
os.environ["AWS_CONFIG_FILE"] = args.aws_config_file

url = args.url or f'{FCOS_STREAMS_URL}/{args.stream}/builds'
if url.startswith("s3://"):
Expand Down Expand Up @@ -157,6 +159,8 @@ def parse_args():
help="the target architecture(s)")
parser.add_argument("--artifact", default=[], action='append',
help="Fetch given image artifact(s)", metavar="ARTIFACT")
parser.add_argument("--aws-config-file", metavar='CONFIG', default="",
help="Path to AWS config file")
return parser.parse_args()


Expand Down Expand Up @@ -221,20 +225,23 @@ class HTTPFetcher(Fetcher):


class S3Fetcher(Fetcher):
def __init__(self, url_base):
super().__init__(url_base)
self.s3_client = S3()

def fetch_impl(self, url, dest):
assert url.startswith("s3://")
bucket, key = url[len("s3://"):].split('/', 1)
# this function does not need to be retried with the decorator as download_file would
# retry automatically based on s3config settings
download_file(bucket, key, dest)
self.s3_client.download_file(bucket, key, dest)

def exists_impl(self, url):
assert url.startswith("s3://")
bucket, key = url[len("s3://"):].split('/', 1)
# sanity check that the bucket exists and we have access to it
head_bucket(bucket=bucket)
return head_object(bucket=bucket, key=key)
self.s3_client.head_bucket(bucket=bucket)
return self.s3_client.head_object(bucket=bucket, key=key)


class LocalFetcher(Fetcher):
Expand Down
15 changes: 5 additions & 10 deletions src/cosalib/prune.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,7 @@
import json
import os

from cosalib.s3 import (
head_object,
list_objects,
download_file,
delete_object
)
from cosalib.s3 import S3

from cosalib.aws import (
deregister_ami,
Expand All @@ -30,7 +25,7 @@ def get_unreferenced_s3_builds(active_build_set, bucket, prefix):
:type active_build_set: list
"""
print(f"Looking for unreferenced builds in s3://{bucket}/{prefix}")
s3_subdirs = list_objects(bucket, f"{prefix}/", result_key='CommonPrefixes')
s3_subdirs = S3().list_objects(bucket, f"{prefix}/", result_key='CommonPrefixes')
s3_matched = set()
s3_unmatched = set()
for prefixKey in s3_subdirs:
Expand Down Expand Up @@ -58,10 +53,10 @@ def fetch_build_meta(builds, buildid, arch, bucket, prefix):
os.makedirs(build_dir, exist_ok=True)
s3_key = f"{prefix}/{buildid}/{arch}/meta.json"
print(f"Fetching meta.json for '{buildid}' from s3://{bucket}/{prefix} to {meta_json_path}")
head_result = head_object(bucket, s3_key)
head_result = S3().head_object(bucket, s3_key)
if head_result:
print(f"Found s3 key at {s3_key}")
download_file(bucket, s3_key, meta_json_path)
S3().download_file(bucket, s3_key, meta_json_path)
else:
print(f"Failed to find object at {s3_key}")
return None
Expand Down Expand Up @@ -143,4 +138,4 @@ def delete_build(build, bucket, prefix, cloud_config, force=False):

# Delete s3 bucket
print(f"Deleting key {prefix}{build.id} from bucket {bucket}")
delete_object(bucket, f"{prefix}{str(build.id)}")
S3().delete_object(bucket, f"{prefix}{str(build.id)}")
91 changes: 43 additions & 48 deletions src/cosalib/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,51 +9,46 @@
from tenacity import retry


S3 = boto3.client('s3')
S3CONFIG = boto3.s3.transfer.TransferConfig(
num_download_attempts=5
)


def download_file(bucket, key, dest):
S3.download_file(bucket, key, dest, Config=S3CONFIG)


@retry(stop=retry_stop, retry=retry_boto_exception, before_sleep=retry_callback)
def head_bucket(bucket):
S3.head_bucket(Bucket=bucket)


@retry(stop=retry_stop, retry=retry_boto_exception, before_sleep=retry_callback)
def head_object(bucket, key):
try:
S3.head_object(Bucket=bucket, Key=key)
except ClientError as e:
if e.response['Error']['Code'] == '404':
return False
raise e
return True


@retry(stop=retry_stop, retry=retry_boto_exception, before_sleep=retry_callback)
def list_objects(bucket, prefix, delimiter="/", result_key='Contents'):
kwargs = {
'Bucket': bucket,
'Delimiter': delimiter,
'Prefix': prefix,
}
isTruncated = True
while isTruncated:
batch = S3.list_objects_v2(**kwargs)
yield from batch.get(result_key) or []
kwargs['ContinuationToken'] = batch.get('NextContinuationToken')
isTruncated = batch['IsTruncated']


@retry(stop=retry_stop, retry=retry_boto_exception, before_sleep=retry_callback)
def delete_object(bucket, key):
sub_objects = list(list_objects(bucket, key))
if sub_objects != []:
print("S3: deleting {sub_objects}")
S3.delete_objects(Bucket=bucket, Delete=sub_objects)
S3.delete_object(Bucket=bucket, Key=key)
class S3(object):
def __init__(self):
self.client = boto3.client('s3')

def download_file(self, bucket, key, dest):
self.client.download_file(bucket, key, dest,
Config=boto3.s3.transfer.TransferConfig(num_download_attempts=5))

@retry(stop=retry_stop, retry=retry_boto_exception, before_sleep=retry_callback)
def head_bucket(self, bucket):
self.client.head_bucket(Bucket=bucket)

@retry(stop=retry_stop, retry=retry_boto_exception, before_sleep=retry_callback)
def head_object(self, bucket, key):
try:
self.client.head_object(Bucket=bucket, Key=key)
except ClientError as e:
if e.response['Error']['Code'] == '404':
return False
raise e
return True

@retry(stop=retry_stop, retry=retry_boto_exception, before_sleep=retry_callback)
def list_objects(self, bucket, prefix, delimiter="/", result_key='Contents'):
kwargs = {
'Bucket': bucket,
'Delimiter': delimiter,
'Prefix': prefix,
}
isTruncated = True
while isTruncated:
batch = self.client.list_objects_v2(**kwargs)
yield from batch.get(result_key) or []
kwargs['ContinuationToken'] = batch.get('NextContinuationToken')
isTruncated = batch['IsTruncated']

@retry(stop=retry_stop, retry=retry_boto_exception, before_sleep=retry_callback)
def delete_object(self, bucket, key):
sub_objects = list(self.list_objects(bucket, key))
if sub_objects != []:
print("S3: deleting {sub_objects}")
self.client.delete_objects(Bucket=bucket, Delete=sub_objects)
self.client.delete_object(Bucket=bucket, Key=key)

0 comments on commit 9d55a20

Please sign in to comment.