From 48b618abb40fe64378eb8033a99f8ea746f9cf94 Mon Sep 17 00:00:00 2001 From: bturkus Date: Fri, 22 Nov 2024 16:45:13 -0500 Subject: [PATCH] add some aws/eavie scripts --- ami_scripts/compare_aws_eavie.py | 64 ++++++++++++++++++++++++++++++++ ami_scripts/export_s3_to_csv.py | 54 +++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100755 ami_scripts/compare_aws_eavie.py create mode 100755 ami_scripts/export_s3_to_csv.py diff --git a/ami_scripts/compare_aws_eavie.py b/ami_scripts/compare_aws_eavie.py new file mode 100755 index 0000000..38b3b35 --- /dev/null +++ b/ami_scripts/compare_aws_eavie.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +import csv +import argparse + +def extract_unique_ids_from_bucket(bucket_csv): + """Extracts unique 6-digit IDs from the bucket CSV.""" + unique_ids = set() + with open(bucket_csv, 'r') as file: + reader = csv.reader(file, delimiter='\t') # Tab-delimited + for row_num, row in enumerate(reader, start=1): + try: + # Extract the key (1st column) and split to get the 6-digit ID + key = row[0] + id_part = key.split('_')[1] # Assuming format like axv_211010_v01_sc.json + if id_part.isdigit() and len(id_part) == 6: + unique_ids.add(id_part) + except IndexError: + print(f"Skipping malformed row {row_num}: {row}") + except Exception as e: + print(f"Error processing row {row_num}: {row}. Error: {e}") + return unique_ids + +def find_ids_with_issues(bucket_ids, streaming_csv): + """Finds IDs present in the bucket but marked as FALSE in the streaming CSV.""" + issues = [] + with open(streaming_csv, 'r') as file: + reader = csv.DictReader(file) + for row in reader: + idf = row['item_idf'] + media_available = row['media_available'] + if idf in bucket_ids and media_available.upper() == 'FALSE': + issues.append(idf) + return issues + +def main(): + # Set up argument parser + parser = argparse.ArgumentParser(description="Compare AWS bucket and streaming platform lists.") + parser.add_argument('-b', '--bucket', required=True, help="Path to the AWS bucket CSV file") + parser.add_argument('-s', '--streaming', required=True, help="Path to the streaming platform CSV file") + parser.add_argument('-o', '--output', help="Output file to save the results", default='issues.txt') + args = parser.parse_args() + + # Extract IDs from bucket CSV + print("Extracting unique IDs from the bucket CSV...") + bucket_ids = extract_unique_ids_from_bucket(args.bucket) + print(f"Found {len(bucket_ids)} unique IDs in the bucket.") + + # Compare with streaming platform CSV + print("Comparing IDs with the streaming platform list...") + issues = find_ids_with_issues(bucket_ids, args.streaming) + print(f"Found {len(issues)} IDs with issues.") + + # Save results + if issues: + with open(args.output, 'w') as file: + for issue in issues: + file.write(f"{issue}\n") + print(f"Issues saved to {args.output}") + else: + print("No issues found.") + +if __name__ == '__main__': + main() diff --git a/ami_scripts/export_s3_to_csv.py b/ami_scripts/export_s3_to_csv.py new file mode 100755 index 0000000..c0389aa --- /dev/null +++ b/ami_scripts/export_s3_to_csv.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import boto3 +import csv +import argparse + +def list_s3_objects(bucket_name, output_file): + # Initialize S3 client + s3 = boto3.client('s3') + + try: + # Write to CSV + with open(output_file, mode='w', newline='') as file: + writer = csv.writer(file) + writer.writerow(['Key', 'LastModified', 'Size']) # CSV header + + # Pagination logic + continuation_token = None + total_files = 0 + + while True: + if continuation_token: + response = s3.list_objects_v2(Bucket=bucket_name, ContinuationToken=continuation_token) + else: + response = s3.list_objects_v2(Bucket=bucket_name) + + # Write object details to CSV + for obj in response.get('Contents', []): + writer.writerow([obj['Key'], obj['LastModified'], obj['Size']]) + total_files += 1 + + # Check if there are more objects to fetch + if response.get('IsTruncated'): # True if there are more objects to fetch + continuation_token = response.get('NextContinuationToken') + else: + break + + print(f"Export complete! {total_files} files written to {output_file}") + + except Exception as e: + print(f"Error: {e}") + +def main(): + # Set up argument parser + parser = argparse.ArgumentParser(description="Export contents of an S3 bucket to a CSV file.") + parser.add_argument('-b', '--bucket', required=True, help="Name of the S3 bucket") + parser.add_argument('-o', '--out', required=True, help="Output CSV file location and name") + args = parser.parse_args() + + # Call the function to list S3 objects + list_s3_objects(args.bucket, args.out) + +if __name__ == '__main__': + main()