Skip to content

Commit

Permalink
add some aws/eavie scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
bturkus committed Nov 22, 2024
1 parent 9e82e59 commit 48b618a
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 0 deletions.
64 changes: 64 additions & 0 deletions ami_scripts/compare_aws_eavie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env python3

import csv
import argparse

def extract_unique_ids_from_bucket(bucket_csv):
"""Extracts unique 6-digit IDs from the bucket CSV."""
unique_ids = set()
with open(bucket_csv, 'r') as file:
reader = csv.reader(file, delimiter='\t') # Tab-delimited
for row_num, row in enumerate(reader, start=1):
try:
# Extract the key (1st column) and split to get the 6-digit ID
key = row[0]
id_part = key.split('_')[1] # Assuming format like axv_211010_v01_sc.json
if id_part.isdigit() and len(id_part) == 6:
unique_ids.add(id_part)
except IndexError:
print(f"Skipping malformed row {row_num}: {row}")
except Exception as e:
print(f"Error processing row {row_num}: {row}. Error: {e}")
return unique_ids

def find_ids_with_issues(bucket_ids, streaming_csv):
"""Finds IDs present in the bucket but marked as FALSE in the streaming CSV."""
issues = []
with open(streaming_csv, 'r') as file:
reader = csv.DictReader(file)
for row in reader:
idf = row['item_idf']
media_available = row['media_available']
if idf in bucket_ids and media_available.upper() == 'FALSE':
issues.append(idf)
return issues

def main():
# Set up argument parser
parser = argparse.ArgumentParser(description="Compare AWS bucket and streaming platform lists.")
parser.add_argument('-b', '--bucket', required=True, help="Path to the AWS bucket CSV file")
parser.add_argument('-s', '--streaming', required=True, help="Path to the streaming platform CSV file")
parser.add_argument('-o', '--output', help="Output file to save the results", default='issues.txt')
args = parser.parse_args()

# Extract IDs from bucket CSV
print("Extracting unique IDs from the bucket CSV...")
bucket_ids = extract_unique_ids_from_bucket(args.bucket)
print(f"Found {len(bucket_ids)} unique IDs in the bucket.")

# Compare with streaming platform CSV
print("Comparing IDs with the streaming platform list...")
issues = find_ids_with_issues(bucket_ids, args.streaming)
print(f"Found {len(issues)} IDs with issues.")

# Save results
if issues:
with open(args.output, 'w') as file:
for issue in issues:
file.write(f"{issue}\n")
print(f"Issues saved to {args.output}")
else:
print("No issues found.")

if __name__ == '__main__':
main()
54 changes: 54 additions & 0 deletions ami_scripts/export_s3_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python3

import boto3
import csv
import argparse

def list_s3_objects(bucket_name, output_file):
# Initialize S3 client
s3 = boto3.client('s3')

try:
# Write to CSV
with open(output_file, mode='w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['Key', 'LastModified', 'Size']) # CSV header

# Pagination logic
continuation_token = None
total_files = 0

while True:
if continuation_token:
response = s3.list_objects_v2(Bucket=bucket_name, ContinuationToken=continuation_token)
else:
response = s3.list_objects_v2(Bucket=bucket_name)

# Write object details to CSV
for obj in response.get('Contents', []):
writer.writerow([obj['Key'], obj['LastModified'], obj['Size']])
total_files += 1

# Check if there are more objects to fetch
if response.get('IsTruncated'): # True if there are more objects to fetch
continuation_token = response.get('NextContinuationToken')
else:
break

print(f"Export complete! {total_files} files written to {output_file}")

except Exception as e:
print(f"Error: {e}")

def main():
# Set up argument parser
parser = argparse.ArgumentParser(description="Export contents of an S3 bucket to a CSV file.")
parser.add_argument('-b', '--bucket', required=True, help="Name of the S3 bucket")
parser.add_argument('-o', '--out', required=True, help="Output CSV file location and name")
args = parser.parse_args()

# Call the function to list S3 objects
list_s3_objects(args.bucket, args.out)

if __name__ == '__main__':
main()

0 comments on commit 48b618a

Please sign in to comment.