Skip to content
This repository has been archived by the owner on Oct 15, 2022. It is now read-only.

Commit

Permalink
#3: start an experimental branch for AWS integration
Browse files Browse the repository at this point in the history
first: S3 instead of GridFS for content storage (meta-data still in
MongoDB)
  • Loading branch information
thiloplanz committed Mar 15, 2012
1 parent d392812 commit b5b3974
Show file tree
Hide file tree
Showing 6 changed files with 259 additions and 11 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# v7files
# v7files (experimental AWS branch)

v7files is a WebDAV server backed by MongoDB GridFS file storage.
v7files is a WebDAV server backed by MongoDB (and optionally Amazon S3).

It is written in Java, using the Jetty embedded web server and the Milton WebDAV library.

All file contents are stored in GridFS, but the file (and folder) metadata is [stored in separate collection](https://github.com/thiloplanz/v7files/wiki/StorageFormat),
All file contents are stored in GridFS (or S3), but the file (and folder) metadata is [stored in separate collection](https://github.com/thiloplanz/v7files/wiki/StorageFormat),
where it can also be versioned. Identical content is only stored once, even if more than one file refers (or used to refer) to it,
and content can also be compressed using zip compression (good for text files) or delta storage (good for files that are similar to others).

Expand Down
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -199,5 +199,12 @@
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk</artifactId>
<version>1.3.4</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
</dependencies>
</project>
16 changes: 10 additions & 6 deletions src/main/java/v7db/files/CatCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;

import v7db.files.aws.S3ContentStorage;

import com.amazonaws.services.s3.model.S3Object;
import com.mongodb.MongoException;
import com.mongodb.gridfs.GridFSDBFile;

class CatCommand {

Expand All @@ -40,10 +42,9 @@ public static void main(String[] args) throws MongoException, IOException {
System.exit(1);
}

V7GridFS fs = new V7GridFS(Configuration.getMongo().getDB(
Configuration.getProperty("mongo.db")));

if ("-sha".equals(args[1])) {
S3ContentStorage storage = S3ContentStorage.configure(Configuration
.getProperties());
String sha = args[2];
try {
byte[] id;
Expand All @@ -55,18 +56,21 @@ public static void main(String[] args) throws MongoException, IOException {
}
if (id.length > 20)
throw new DecoderException("too long");
GridFSDBFile file = fs.storage.findContentByPrefix(id);
S3Object file = storage.findContentByPrefix(id);
if (file == null) {
System.err.println("file not found");
System.exit(1);
}
IOUtils.copy(fs.storage.readContent(file), System.out);
IOUtils.copy(storage.getInputStream(file), System.out);
} catch (DecoderException e) {
System.err.println("invalid parameter :" + sha
+ " is not a hex-encoded SHA-1 prefix");
System.exit(1);
}
} else {
V7GridFS fs = new V7GridFS(Configuration.getMongo().getDB(
Configuration.getProperty("mongo.db")));

String root = args[1];
String path = args[2];
String[] fullPath = ArrayUtils.add(StringUtils.split(path, '/'), 0,
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/v7db/files/Compression.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class Compression {
public class Compression {

// minimum overhead: 10 byte header, 8 byte trailer
static final int GZIP_STORAGE_OVERHEAD = 18;
Expand Down Expand Up @@ -115,7 +115,7 @@ static InputStream unzip(InputStream in) throws IOException {
* exception)
*/

static File gzip(File data) {
public static File gzip(File data) {
File file = null;
try {
file = File.createTempFile(data.getName(), ".gz");
Expand Down
59 changes: 59 additions & 0 deletions src/main/java/v7db/files/UploadCommand.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/**
* Copyright (c) 2011-2012, Thilo Planz. All rights reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package v7db.files;

import java.io.File;
import java.io.IOException;

import org.apache.commons.codec.binary.Hex;

import v7db.files.aws.S3ContentStorage;

import com.mongodb.MongoException;

class UploadCommand {

public static void main(String[] args) throws MongoException, IOException {

if (args.length < 2) {
System.err
.println("Upload the contents of one or more files and print their SHA digests:");
System.err
.println(" by hash: upload <file> [file] [file] [...]");
System.exit(1);
}

S3ContentStorage storage = S3ContentStorage.configure(Configuration
.getProperties());

for (int i = 1; i < args.length; i++) {
File f = new File(args[i]);
if (f.isFile() && f.canRead()) {
try {
String sha = Hex.encodeHexString(storage.insertContents(f,
f.getName(), null));
System.out.format("- %10d %80s %40s\n", f.length(), f
.getName(), sha);
} catch (Exception e) {
e.printStackTrace();
}
}
}

}
}
178 changes: 178 additions & 0 deletions src/main/java/v7db/files/aws/S3ContentStorage.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/**
* Copyright (c) 2011-2012, Thilo Planz. All rights reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package v7db.files.aws;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.zip.GZIPInputStream;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;

import v7db.files.Compression;

import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;

/**
* Content storage in Amazon S3 instead of Mongo GridFS.
*
* <p>
* Content as stored in the given bucket, named as the Hex-encoded SHA id of the
* content. It can be stored either raw or gzip-compressed, which will be
* indicated by a Content-Encoding header (all clients should be able to handle
* this, so it does not prevent direct downloads without any special
* v7files-aware software).
*
*
*/

public class S3ContentStorage {

private final AmazonS3 s3;

private final String bucketName;

public S3ContentStorage(AmazonS3 s3, String bucketName) {
this.s3 = s3;
this.bucketName = bucketName;
}

public static S3ContentStorage configure(Properties props) {
AmazonS3 s3 = new AmazonS3Client(
new BasicAWSCredentials(props.getProperty("s3.accessKey"),
props.getProperty("s3.secretKey")));
return new S3ContentStorage(s3, props.getProperty("s3.bucket"));
}

public boolean contentAlreadyExists(byte[] sha) throws IOException {
return !s3.listObjects(bucketName, Hex.encodeHexString(sha))
.getObjectSummaries().isEmpty();
}

S3Object findContent(byte[] sha) throws IOException {
String key = Hex.encodeHexString(sha);
S3Object object = s3.getObject(bucketName, key);
return object;
}

public S3Object findContentByPrefix(byte[] sha) throws IOException {
if (sha.length == 20)
return findContent(sha);

if (sha.length > 20)
throw new IllegalArgumentException();

String prefix = Hex.encodeHexString(sha);

ObjectListing objectListing = s3.listObjects(new ListObjectsRequest()
.withBucketName(bucketName).withPrefix(prefix).withMaxKeys(2));

List<S3ObjectSummary> result = objectListing.getObjectSummaries();
if (result.isEmpty())
return null;

if (result.size() == 1)
return s3.getObject(bucketName, result.get(0).getKey());

throw new IllegalArgumentException(prefix
+ " is not a unique SHA prefix");

}

private ObjectMetadata makeMetaData(long length, String filename,
String contentType) {
ObjectMetadata metaData = new ObjectMetadata();
metaData.setContentLength(length);
metaData.setContentType("application/octet-stream");
if (StringUtils.isNotBlank(contentType))
metaData.setContentType(contentType);
Map<String, String> v7MetaData = new HashMap<String, String>();
if (StringUtils.isNotBlank(filename))
v7MetaData.put("v7-filename", filename);
metaData.setUserMetadata(v7MetaData);
return metaData;
}

private void insertGzipContents(InputStream deflatedData, long length,
byte[] sha, String filename, String contentType) throws IOException {
String key = Hex.encodeHexString(sha);
ObjectMetadata metaData = makeMetaData(length, filename, contentType);
metaData.setContentEncoding("gzip");
s3.putObject(bucketName, key, deflatedData, metaData);
deflatedData.close();
}

public byte[] insertContents(File data, String filename, String contentType)
throws IOException {

FileInputStream fis = new FileInputStream(data);
byte[] sha = DigestUtils.sha(fis);
fis.close();

if (contentAlreadyExists(sha))
return sha;

final File compressed = Compression.gzip(data);
if (compressed != null) {
try {
insertGzipContents(new FileInputStream(compressed), compressed
.length(), sha, filename, contentType);
return sha;
} finally {
compressed.delete();
}
}

String key = Hex.encodeHexString(sha);
ObjectMetadata metaData = makeMetaData(data.length(), filename,
contentType);
s3.putObject(bucketName, key, new FileInputStream(data), metaData);
return sha;

}

public InputStream getInputStream(byte[] sha) throws IOException {
return getInputStream(findContent(sha));
}

public InputStream getInputStream(S3Object file) throws IOException {
if (file == null)
return null;
String encoding = file.getObjectMetadata().getContentEncoding();
if ("gzip".equals(encoding))
return new GZIPInputStream(file.getObjectContent());
if (StringUtils.isBlank(encoding))
return file.getObjectContent();
throw new IllegalArgumentException("unsupported content encoding '"
+ encoding + "'");
}
}

0 comments on commit b5b3974

Please sign in to comment.