diff --git a/git-fat b/git-fat index 5221532..cb1740e 100755 --- a/git-fat +++ b/git-fat @@ -11,6 +11,7 @@ import subprocess import shlex import shutil import itertools +import json import threading import time import collections @@ -176,6 +177,30 @@ class GitFat(object): raise RuntimeError('No rsync.remote in %s' % cfgpath) return remote, ssh_port, ssh_user, options + def list_objects(self, s3_bucket): + bucket = s3_bucket[5:] + cmd = [ + "aws", + "s3api", + "list-objects", + "--bucket", + bucket + ] + + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = process.communicate() + + if out: + # try to parse list from json + data = json.loads(out) + keys = [key.get('Key') for key in data.get('Contents')] + return keys + else: + print("Failed to list bucket with error: %s" % err) + sys.exit(1) + + return [] + def get_aws_cmd(self, push, s3_bucket, files): if not which('aws'): sys.stderr.write('Could not find aws cli install.\n') @@ -183,18 +208,23 @@ class GitFat(object): if not s3_bucket.startswith('s3://'): s3_bucket = "s3://{}".format(s3_bucket) + cmds = [] if push: self.verbose('Pushing to %s' % (s3_bucket)) + remote_files = self.list_objects(s3_bucket) for file in files: - cmd = [ - "aws", - "s3", - "cp", - self.objdir + "/" + file, - s3_bucket + "/" + file - ] - cmds.append(cmd) + # only push files that do not exist in the remote + if file not in remote_files: + self.verbose("%s does not exist in remote, will be pushed" % file) + cmd = [ + "aws", + "s3", + "cp", + self.objdir + "/" + file, + s3_bucket + "/" + file + ] + cmds.append(cmd) else: self.verbose('Pulling from %s' % (s3_bucket)) for file in files: @@ -206,6 +236,7 @@ class GitFat(object): self.objdir + "/" + file ] cmds.append(cmd) + return cmds def get_rsync_command(self,push,files):