Skip to content

Commit

Permalink
Initial commit from Niall McCarroll's blog
Browse files Browse the repository at this point in the history
  • Loading branch information
vincetse committed Aug 26, 2016
0 parents commit 9bf622b
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 0 deletions.
23 changes: 23 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
*.py[c,o]
*~
\#*\#
\.\#*
TAGS
docs/_build
docs/api
.coverage
coverage
.DS_Store
.vagrant
manifests/build

# swap
[._]*.s[a-w][a-z]
[._]s[a-w][a-z]
# session
Session.vim
# temporary
.netrwhist
*~
# auto-generated tag files
tags
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# S3 Cache for Python

[Niall McCarroll](http://www.mccarroll.net/) wrote a neat little [AWS S3 cache](http://www.mccarroll.net/snippets/s3boto/index.html) on his blog that I found really useful, but could not find on [PyPI](https://pypi.python.org/pypi), so here it is in a refactored form as a public service.

## References

1. [A local file cache for amazon S3 using python and boto](http://www.mccarroll.net/snippets/s3boto/index.html)
18 changes: 18 additions & 0 deletions persist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from s3cache import s3cache

s3cache.setVerbosity(True)
s3cache.setCaching(False)

f = s3cache.open("/abc/world.txt","w")
f.write("Hello")
f.close()

f = s3cache.open("/abc/world.txt","a")
f.write(" World")
f.close()

f2 = s3cache.open("/abc/world.txt","r")
print f2.readline()
f2.close()

s3cache.remove("/abc/world.txt")
70 changes: 70 additions & 0 deletions s3cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import boto
import os
from s3file import s3file

class s3cache(object):

# singleton instance
instance = None

# initialize the cache
def __init__(self,tmpdir,bucket_name):
self.bucket_name = bucket_name
self.conn = None
self.bucket = None
self.tmpdir = tmpdir
self.verbosity = False
self.caching = True
s3cache.instance = self

# ensure that a connection to s3 exists
def connect(self):
if self.conn == None:
try:
self.conn = boto.connect_s3()
self.bucket = self.conn.create_bucket(self.bucket_name)
except:
raise "Error - cannot connect to S3"

# write a message to the log (if verbosity is on)
def log(self,msg):
if self.verbosity:
print msg

# remove a file
def removePath(self,path):
self.connect()
s3f = s3file(s3cache.instance,path)
s3f.remove()

# set verbosity on/off (default=off)
@staticmethod
def setVerbosity(verbosity):
s3cache.instance.verbosity = verbosity

# set local file caching on/off (default=on)
@staticmethod
def setCaching(caching):
s3cache.instance.caching = caching

# open a file in the cache and return a file-like object
@staticmethod
def open(path,mode):
s3cache.instance.connect()
s3f = s3file(s3cache.instance,path)
s3f.open(mode)
return s3f

@staticmethod
def remove(path):
return s3cache.instance.removePath(path)

# Configuration
#
# create the singleton instance

# define the local cache directory
local_cache_directory = "/tmp"
s3_bucket_name = "mccarroll.net.test"

s3cache(local_cache_directory,s3_bucket_name)
111 changes: 111 additions & 0 deletions s3file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import boto
from boto.s3.key import Key
import os

#
# wrap a local file with code to copy the contents into
# and out of S3
#
class s3file(object):

# create and open a file using the cache manager, file path and mode
def __init__(self,mgr,path):
self.mgr = mgr
self.path = path
self.mode = None
self.tmppath = os.path.join(mgr.tmpdir,self.mangle(path))

def removeCache(self):
# remove local cache copy
if os.path.exists(self.tmppath):
try:
os.remove(self.tmppath)
self.log("removed local cache file("+self.tmppath+")")
except:
self.log("problem removing local cache file("+self.tmppath+")")
pass

def remove(self):
self.log("removing file")
self.removeCache()
self.log("removing file from S3")
k = Key(self.mgr.bucket)
k.key=self.path
try:
k.delete()
except:
self.log("problem removing file")

def open(self,mode):
self.mode = mode
if 'r' in self.mode or 'a' in self.mode:
# opening an existing file, try to copy in from s3 if not in local cache
self.log("trying to open existing file")
use_local_copy = self.mgr.caching
if use_local_copy:
if not os.path.exists(self.tmppath):
self.log("not found in local cache, attempting to load from S3")
use_local_copy = False
if not use_local_copy:
try:
k = Key(self.mgr.bucket)
k.key=self.path
k.get_contents_to_filename(self.tmppath)
self.log("file located in S3, downloaded from S3 to cache")
except:
self.log("file not found in S3, opening new empty file in local cache")
pass
else:
self.log("file found in local cache")
else:
self.log("opening new file in local cache for writing")
# open the local file
self.log("opening local cache file("+self.tmppath+")")
self.file = open(self.tmppath,self.mode)

# mangle the original file path to replace separators with underscores
# and double up existing underscores
def mangle(self,path):
mangled_path = ''
for c in path:
if c == '/':
mangled_path += '_'
elif c == '_':
mangled_path += '__'
else:
mangled_path += c
return mangled_path

def __getattr__(self,name):
return s3file.delegator(self.file,name)

# utility class to delegate
# a call on this class to the local file
class delegator(object):

def __init__(self,target,name):
self.target = target
self.name = name

def __call__(self,*args,**kwargs):
method = self.target.__class__.__dict__[self.name]
oargs = [self.target]
oargs += args
return method(*oargs,**kwargs)

# on closing the file, copy it back to s3 if it was opened for writing/appending
def close(self):
self.log("closing local cache file("+self.tmppath+")")
self.file.close()
if 'w' in self.mode or 'a' in self.mode:
self.log("writing updated cache file contents to S3")
try:
k = Key(self.mgr.bucket)
k.key=self.path
k.set_contents_from_filename(self.tmppath)
self.log("write complete")
except:
self.log("ERROR - write to S3 failed")

def log(self,msg):
self.mgr.log("s3file("+self.path+"): "+msg)

0 comments on commit 9bf622b

Please sign in to comment.