Skip to content

Commit

Permalink
Merge pull request #102 from SpazioDati/export-stream
Browse files Browse the repository at this point in the history
Streaming RDF export
  • Loading branch information
fadmaa committed Dec 23, 2014
2 parents 9a8e494 + 0a9490f commit 3925b93
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 86 deletions.
5 changes: 5 additions & 0 deletions module/styles/rdf-schema-alignment-dialog.css
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ div.rdf-schema-alignment-dialog-preview {
height: 375px;
}

div.rdf-schema-alignment-dialog-preview pre {
overflow: auto;
height: 100%;
}

div.rdf-schema-alignment-dialog-canvas {
height: 350px;
}
Expand Down
65 changes: 30 additions & 35 deletions src/org/deri/grefine/rdf/commands/PreviewRdfCommand.java
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
package org.deri.grefine.rdf.commands;

import java.io.IOException;
import java.io.StringWriter;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import com.google.refine.browsing.Engine;
import com.google.refine.commands.Command;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.ParsingUtilities;
import org.deri.grefine.rdf.Node;
import org.deri.grefine.rdf.RdfSchema;
import org.deri.grefine.rdf.exporters.RdfExporter;
import org.deri.grefine.rdf.exporters.RdfExporter.RdfRowVisitor;
import org.deri.grefine.rdf.vocab.Vocabulary;
import org.json.JSONObject;
import org.json.JSONWriter;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.Rio;

import com.google.refine.browsing.Engine;
import com.google.refine.commands.Command;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.ParsingUtilities;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.StringWriter;

public class PreviewRdfCommand extends Command {

Expand All @@ -43,8 +39,10 @@ public void doPost(HttpServletRequest request, HttpServletResponse response)
String jsonString = request.getParameter("schema");
JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString);
final RdfSchema schema = RdfSchema.reconstruct(json);

RdfRowVisitor visitor = new RdfRowVisitor(schema) {

StringWriter sw = new StringWriter();
RDFWriter w = Rio.createWriter(RDFFormat.TURTLE, sw);
RdfRowVisitor visitor = new RdfRowVisitor(schema, w) {
final int limit = 10;
int _count;
@Override
Expand All @@ -56,29 +54,26 @@ public boolean visit(Project project, int rowIndex, Row row) {
root.createNode(baseUri, factory, con, project, row, rowIndex,blanks);
}
_count +=1;

try {
flushStatements();
} catch (RepositoryException e) {
e.printStackTrace();
return true;
} catch (RDFHandlerException e) {
e.printStackTrace();
return true;
}

return false;
}
};

Repository model = RdfExporter.buildModel(project, engine, visitor);
StringWriter sw = new StringWriter();
try{
RepositoryConnection con = model.getConnection();
try{
RDFWriter w = Rio.createWriter(RDFFormat.TURTLE, sw);
for(Vocabulary v:schema.getPrefixesMap().values()){
w.handleNamespace(v.getName(), v.getUri());
}
con.export(w);
}finally{
con.close();
}
}catch(RepositoryException ex){
throw new RuntimeException(ex);
}catch(RDFHandlerException ex){
throw new RuntimeException(ex);
}

for(Vocabulary v:schema.getPrefixesMap().values()){
w.handleNamespace(v.getName(), v.getUri());
}
RdfExporter.buildModel(project, engine, visitor);

JSONWriter writer = new JSONWriter(response.getWriter());
writer.object();
writer.key("v");
Expand Down
131 changes: 80 additions & 51 deletions src/org/deri/grefine/rdf/exporters/RdfExporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
import org.deri.grefine.rdf.app.ApplicationContext;
import org.deri.grefine.rdf.vocab.Vocabulary;
import org.deri.grefine.rdf.vocab.VocabularyIndexException;
import org.openrdf.model.BNode;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.*;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
Expand All @@ -25,18 +24,24 @@
import org.openrdf.rio.Rio;
import org.openrdf.sail.memory.MemoryStore;

import info.aduna.iteration.CloseableIteration;

import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.exporters.WriterExporter;
import com.google.refine.model.Project;
import com.google.refine.model.Row;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RdfExporter implements WriterExporter{

private RDFFormat format;
private ApplicationContext applicationContext;

final static Logger logger = LoggerFactory.getLogger("RdfExporter");

public RdfExporter(ApplicationContext ctxt, RDFFormat f){
this.format = f;
this.applicationContext = ctxt;
Expand All @@ -48,73 +53,55 @@ public ApplicationContext getApplicationContext() {

public void export(Project project, Properties options, Engine engine,
OutputStream outputStream) throws IOException {
RdfSchema schema;
try{
schema = Util.getProjectSchema(applicationContext,project);
}catch(VocabularyIndexException ve){
throw new IOException("Unable to create index for RDF schema",ve);
}
Repository model = buildModel(project, engine, schema);
try{
RepositoryConnection con = model.getConnection();
try{
RDFWriter writer = Rio.createWriter(format, outputStream);
for(Vocabulary v:schema.getPrefixesMap().values()){
writer.handleNamespace(v.getName(), v.getUri());
}
con.export(writer);
}finally{
con.close();
}
}catch(RepositoryException ex){
throw new RuntimeException(ex);
}catch(RDFHandlerException ex){
throw new RuntimeException(ex);
}
export(project, options, engine, Rio.createWriter(format, outputStream));
}


public void export(Project project, Properties options, Engine engine,
Writer writer) throws IOException {
public void export(Project project, Properties options, Engine engine,
Writer writer) throws IOException {
export(project, options, engine, Rio.createWriter(format, writer));
}

private void export(Project project, Properties options, Engine engine,
RDFWriter writer) throws IOException {
RdfSchema schema;
try{
schema = Util.getProjectSchema(applicationContext,project);
}catch(VocabularyIndexException ve){
throw new IOException("Unable to create index for RDF schema",ve);
}
Repository model = buildModel(project, engine, schema);
try{
RepositoryConnection con = model.getConnection();
try{
RDFWriter w = Rio.createWriter(format, writer);
for(Vocabulary v:schema.getPrefixesMap().values()){
w.handleNamespace(v.getName(),v.getUri());
}
con.export(w);
}finally{
con.close();
}
}catch(RepositoryException ex){
throw new RuntimeException(ex);
for(Vocabulary v:schema.getPrefixesMap().values()){
writer.handleNamespace(v.getName(), v.getUri());
}

exportModel(project, engine, schema, writer);
}catch(RDFHandlerException ex){
throw new RuntimeException(ex);
}
}

public Repository buildModel(final Project project, Engine engine, RdfSchema schema) throws IOException{
RdfRowVisitor visitor = new RdfRowVisitor(schema) {
public Repository exportModel(final Project project, Engine engine, RdfSchema schema, RDFWriter writer) throws IOException{
RdfRowVisitor visitor = new RdfRowVisitor(schema, writer) {

@Override
public boolean visit(Project project, int rowIndex, Row row) {
for(Node root:roots){
root.createNode(baseUri, factory, con, project, row, rowIndex,blanks);

try {
// flush here to preserve root ordering in the output file
flushStatements();
} catch (RepositoryException e) {
e.printStackTrace();
} catch (RDFHandlerException e) {
e.printStackTrace();
}
}
return false;

return false;
}
};
Repository model = buildModel(project, engine,visitor);

return model;
return buildModel(project, engine, visitor);
}

public static Repository buildModel(Project project, Engine engine, RdfRowVisitor visitor) {
Expand Down Expand Up @@ -146,13 +133,15 @@ public static abstract class RdfRowVisitor implements RowVisitor{

protected ValueFactory factory;
protected RepositoryConnection con;
protected RDFWriter writer;

public Repository getModel() {
return model;
}

public RdfRowVisitor(RdfSchema schema){
public RdfRowVisitor(RdfSchema schema, RDFWriter writer){
this.schema = schema;
this.writer = writer;
baseUri = schema.getBaseUri();
roots = schema.getRoots();

Expand All @@ -176,23 +165,63 @@ public RdfRowVisitor(RdfSchema schema){
}
public void end(Project project) {
try {
writer.endRDF();
if(con.isOpen()){
con.close();
}
} catch (RepositoryException e) {
throw new RuntimeException("",e);
}
} catch (RDFHandlerException e) {
throw new RuntimeException("",e);
}
}

public void start(Project project) {
try{
con = model.getConnection();
factory = con.getValueFactory();

// Open RDF output
writer.startRDF();

// Export namespace information
CloseableIteration<? extends Namespace, RepositoryException> nsIter = con.getNamespaces();
try {
while (nsIter.hasNext()) {
Namespace ns = nsIter.next();
writer.handleNamespace(ns.getPrefix(), ns.getName());
}
} finally {
nsIter.close();
}

}catch(RepositoryException ex){
throw new RuntimeException("",ex);
}
} catch (RDFHandlerException e) {
e.printStackTrace();
}
}

protected void flushStatements() throws RepositoryException, RDFHandlerException{
List<Resource> resourceList = con.getContextIDs().asList();
Resource[] resources = resourceList.toArray(new Resource[resourceList.size()]);

// Export statements
CloseableIteration<? extends Statement, RepositoryException> stIter =
con.getStatements(null, null, null, false, resources);

try {
while (stIter.hasNext()) {
this.writer.handleStatement(stIter.next());
}
} finally {
stIter.close();
}

// empty the repository
con.clear();
}

abstract public boolean visit(Project project, int rowIndex, Row row);
public RdfSchema getRdfSchema(){
return schema;
Expand Down

0 comments on commit 3925b93

Please sign in to comment.