-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial commit from Niall McCarroll's blog
- Loading branch information
0 parents
commit 9bf622b
Showing
5 changed files
with
229 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
*.py[c,o] | ||
*~ | ||
\#*\# | ||
\.\#* | ||
TAGS | ||
docs/_build | ||
docs/api | ||
.coverage | ||
coverage | ||
.DS_Store | ||
.vagrant | ||
manifests/build | ||
|
||
# swap | ||
[._]*.s[a-w][a-z] | ||
[._]s[a-w][a-z] | ||
# session | ||
Session.vim | ||
# temporary | ||
.netrwhist | ||
*~ | ||
# auto-generated tag files | ||
tags |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# S3 Cache for Python | ||
|
||
[Niall McCarroll](http://www.mccarroll.net/) wrote a neat little [AWS S3 cache](http://www.mccarroll.net/snippets/s3boto/index.html) on his blog that I found really useful, but could not find on [PyPI](https://pypi.python.org/pypi), so here it is in a refactored form as a public service. | ||
|
||
## References | ||
|
||
1. [A local file cache for amazon S3 using python and boto](http://www.mccarroll.net/snippets/s3boto/index.html) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from s3cache import s3cache | ||
|
||
s3cache.setVerbosity(True) | ||
s3cache.setCaching(False) | ||
|
||
f = s3cache.open("/abc/world.txt","w") | ||
f.write("Hello") | ||
f.close() | ||
|
||
f = s3cache.open("/abc/world.txt","a") | ||
f.write(" World") | ||
f.close() | ||
|
||
f2 = s3cache.open("/abc/world.txt","r") | ||
print f2.readline() | ||
f2.close() | ||
|
||
s3cache.remove("/abc/world.txt") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import boto | ||
import os | ||
from s3file import s3file | ||
|
||
class s3cache(object): | ||
|
||
# singleton instance | ||
instance = None | ||
|
||
# initialize the cache | ||
def __init__(self,tmpdir,bucket_name): | ||
self.bucket_name = bucket_name | ||
self.conn = None | ||
self.bucket = None | ||
self.tmpdir = tmpdir | ||
self.verbosity = False | ||
self.caching = True | ||
s3cache.instance = self | ||
|
||
# ensure that a connection to s3 exists | ||
def connect(self): | ||
if self.conn == None: | ||
try: | ||
self.conn = boto.connect_s3() | ||
self.bucket = self.conn.create_bucket(self.bucket_name) | ||
except: | ||
raise "Error - cannot connect to S3" | ||
|
||
# write a message to the log (if verbosity is on) | ||
def log(self,msg): | ||
if self.verbosity: | ||
print msg | ||
|
||
# remove a file | ||
def removePath(self,path): | ||
self.connect() | ||
s3f = s3file(s3cache.instance,path) | ||
s3f.remove() | ||
|
||
# set verbosity on/off (default=off) | ||
@staticmethod | ||
def setVerbosity(verbosity): | ||
s3cache.instance.verbosity = verbosity | ||
|
||
# set local file caching on/off (default=on) | ||
@staticmethod | ||
def setCaching(caching): | ||
s3cache.instance.caching = caching | ||
|
||
# open a file in the cache and return a file-like object | ||
@staticmethod | ||
def open(path,mode): | ||
s3cache.instance.connect() | ||
s3f = s3file(s3cache.instance,path) | ||
s3f.open(mode) | ||
return s3f | ||
|
||
@staticmethod | ||
def remove(path): | ||
return s3cache.instance.removePath(path) | ||
|
||
# Configuration | ||
# | ||
# create the singleton instance | ||
|
||
# define the local cache directory | ||
local_cache_directory = "/tmp" | ||
s3_bucket_name = "mccarroll.net.test" | ||
|
||
s3cache(local_cache_directory,s3_bucket_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import boto | ||
from boto.s3.key import Key | ||
import os | ||
|
||
# | ||
# wrap a local file with code to copy the contents into | ||
# and out of S3 | ||
# | ||
class s3file(object): | ||
|
||
# create and open a file using the cache manager, file path and mode | ||
def __init__(self,mgr,path): | ||
self.mgr = mgr | ||
self.path = path | ||
self.mode = None | ||
self.tmppath = os.path.join(mgr.tmpdir,self.mangle(path)) | ||
|
||
def removeCache(self): | ||
# remove local cache copy | ||
if os.path.exists(self.tmppath): | ||
try: | ||
os.remove(self.tmppath) | ||
self.log("removed local cache file("+self.tmppath+")") | ||
except: | ||
self.log("problem removing local cache file("+self.tmppath+")") | ||
pass | ||
|
||
def remove(self): | ||
self.log("removing file") | ||
self.removeCache() | ||
self.log("removing file from S3") | ||
k = Key(self.mgr.bucket) | ||
k.key=self.path | ||
try: | ||
k.delete() | ||
except: | ||
self.log("problem removing file") | ||
|
||
def open(self,mode): | ||
self.mode = mode | ||
if 'r' in self.mode or 'a' in self.mode: | ||
# opening an existing file, try to copy in from s3 if not in local cache | ||
self.log("trying to open existing file") | ||
use_local_copy = self.mgr.caching | ||
if use_local_copy: | ||
if not os.path.exists(self.tmppath): | ||
self.log("not found in local cache, attempting to load from S3") | ||
use_local_copy = False | ||
if not use_local_copy: | ||
try: | ||
k = Key(self.mgr.bucket) | ||
k.key=self.path | ||
k.get_contents_to_filename(self.tmppath) | ||
self.log("file located in S3, downloaded from S3 to cache") | ||
except: | ||
self.log("file not found in S3, opening new empty file in local cache") | ||
pass | ||
else: | ||
self.log("file found in local cache") | ||
else: | ||
self.log("opening new file in local cache for writing") | ||
# open the local file | ||
self.log("opening local cache file("+self.tmppath+")") | ||
self.file = open(self.tmppath,self.mode) | ||
|
||
# mangle the original file path to replace separators with underscores | ||
# and double up existing underscores | ||
def mangle(self,path): | ||
mangled_path = '' | ||
for c in path: | ||
if c == '/': | ||
mangled_path += '_' | ||
elif c == '_': | ||
mangled_path += '__' | ||
else: | ||
mangled_path += c | ||
return mangled_path | ||
|
||
def __getattr__(self,name): | ||
return s3file.delegator(self.file,name) | ||
|
||
# utility class to delegate | ||
# a call on this class to the local file | ||
class delegator(object): | ||
|
||
def __init__(self,target,name): | ||
self.target = target | ||
self.name = name | ||
|
||
def __call__(self,*args,**kwargs): | ||
method = self.target.__class__.__dict__[self.name] | ||
oargs = [self.target] | ||
oargs += args | ||
return method(*oargs,**kwargs) | ||
|
||
# on closing the file, copy it back to s3 if it was opened for writing/appending | ||
def close(self): | ||
self.log("closing local cache file("+self.tmppath+")") | ||
self.file.close() | ||
if 'w' in self.mode or 'a' in self.mode: | ||
self.log("writing updated cache file contents to S3") | ||
try: | ||
k = Key(self.mgr.bucket) | ||
k.key=self.path | ||
k.set_contents_from_filename(self.tmppath) | ||
self.log("write complete") | ||
except: | ||
self.log("ERROR - write to S3 failed") | ||
|
||
def log(self,msg): | ||
self.mgr.log("s3file("+self.path+"): "+msg) |