Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Streaming RDF export #102

Merged
merged 4 commits into from
Dec 23, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions module/styles/rdf-schema-alignment-dialog.css
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ div.rdf-schema-alignment-dialog-preview {
height: 375px;
}

div.rdf-schema-alignment-dialog-preview pre {
overflow: auto;
height: 100%;
}

div.rdf-schema-alignment-dialog-canvas {
height: 350px;
}
Expand Down
65 changes: 30 additions & 35 deletions src/org/deri/grefine/rdf/commands/PreviewRdfCommand.java
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
package org.deri.grefine.rdf.commands;

import java.io.IOException;
import java.io.StringWriter;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import com.google.refine.browsing.Engine;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moving these imports when they're unchanged adds noise to the diff

import com.google.refine.commands.Command;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.ParsingUtilities;
import org.deri.grefine.rdf.Node;
import org.deri.grefine.rdf.RdfSchema;
import org.deri.grefine.rdf.exporters.RdfExporter;
import org.deri.grefine.rdf.exporters.RdfExporter.RdfRowVisitor;
import org.deri.grefine.rdf.vocab.Vocabulary;
import org.json.JSONObject;
import org.json.JSONWriter;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.Rio;

import com.google.refine.browsing.Engine;
import com.google.refine.commands.Command;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.ParsingUtilities;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.StringWriter;

public class PreviewRdfCommand extends Command {

Expand All @@ -43,8 +39,10 @@ public void doPost(HttpServletRequest request, HttpServletResponse response)
String jsonString = request.getParameter("schema");
JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString);
final RdfSchema schema = RdfSchema.reconstruct(json);

RdfRowVisitor visitor = new RdfRowVisitor(schema) {

StringWriter sw = new StringWriter();
RDFWriter w = Rio.createWriter(RDFFormat.TURTLE, sw);
RdfRowVisitor visitor = new RdfRowVisitor(schema, w) {
final int limit = 10;
int _count;
@Override
Expand All @@ -56,29 +54,26 @@ public boolean visit(Project project, int rowIndex, Row row) {
root.createNode(baseUri, factory, con, project, row, rowIndex,blanks);
}
_count +=1;

try {
flushStatements();
} catch (RepositoryException e) {
e.printStackTrace();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These errors don't look like they get returned to a place where they're visible by the user.

return true;
} catch (RDFHandlerException e) {
e.printStackTrace();
return true;
}

return false;
}
};

Repository model = RdfExporter.buildModel(project, engine, visitor);
StringWriter sw = new StringWriter();
try{
RepositoryConnection con = model.getConnection();
try{
RDFWriter w = Rio.createWriter(RDFFormat.TURTLE, sw);
for(Vocabulary v:schema.getPrefixesMap().values()){
w.handleNamespace(v.getName(), v.getUri());
}
con.export(w);
}finally{
con.close();
}
}catch(RepositoryException ex){
throw new RuntimeException(ex);
}catch(RDFHandlerException ex){
throw new RuntimeException(ex);
}

for(Vocabulary v:schema.getPrefixesMap().values()){
w.handleNamespace(v.getName(), v.getUri());
}
RdfExporter.buildModel(project, engine, visitor);

JSONWriter writer = new JSONWriter(response.getWriter());
writer.object();
writer.key("v");
Expand Down
131 changes: 80 additions & 51 deletions src/org/deri/grefine/rdf/exporters/RdfExporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
import org.deri.grefine.rdf.app.ApplicationContext;
import org.deri.grefine.rdf.vocab.Vocabulary;
import org.deri.grefine.rdf.vocab.VocabularyIndexException;
import org.openrdf.model.BNode;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.*;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
Expand All @@ -25,18 +24,24 @@
import org.openrdf.rio.Rio;
import org.openrdf.sail.memory.MemoryStore;

import info.aduna.iteration.CloseableIteration;

import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.exporters.WriterExporter;
import com.google.refine.model.Project;
import com.google.refine.model.Row;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RdfExporter implements WriterExporter{

private RDFFormat format;
private ApplicationContext applicationContext;

final static Logger logger = LoggerFactory.getLogger("RdfExporter");

public RdfExporter(ApplicationContext ctxt, RDFFormat f){
this.format = f;
this.applicationContext = ctxt;
Expand All @@ -48,73 +53,55 @@ public ApplicationContext getApplicationContext() {

public void export(Project project, Properties options, Engine engine,
OutputStream outputStream) throws IOException {
RdfSchema schema;
try{
schema = Util.getProjectSchema(applicationContext,project);
}catch(VocabularyIndexException ve){
throw new IOException("Unable to create index for RDF schema",ve);
}
Repository model = buildModel(project, engine, schema);
try{
RepositoryConnection con = model.getConnection();
try{
RDFWriter writer = Rio.createWriter(format, outputStream);
for(Vocabulary v:schema.getPrefixesMap().values()){
writer.handleNamespace(v.getName(), v.getUri());
}
con.export(writer);
}finally{
con.close();
}
}catch(RepositoryException ex){
throw new RuntimeException(ex);
}catch(RDFHandlerException ex){
throw new RuntimeException(ex);
}
export(project, options, engine, Rio.createWriter(format, outputStream));
}


public void export(Project project, Properties options, Engine engine,
Writer writer) throws IOException {
public void export(Project project, Properties options, Engine engine,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the indentation changed here?

Writer writer) throws IOException {
export(project, options, engine, Rio.createWriter(format, writer));
}

private void export(Project project, Properties options, Engine engine,
RDFWriter writer) throws IOException {
RdfSchema schema;
try{
schema = Util.getProjectSchema(applicationContext,project);
}catch(VocabularyIndexException ve){
throw new IOException("Unable to create index for RDF schema",ve);
}
Repository model = buildModel(project, engine, schema);
try{
RepositoryConnection con = model.getConnection();
try{
RDFWriter w = Rio.createWriter(format, writer);
for(Vocabulary v:schema.getPrefixesMap().values()){
w.handleNamespace(v.getName(),v.getUri());
}
con.export(w);
}finally{
con.close();
}
}catch(RepositoryException ex){
throw new RuntimeException(ex);
for(Vocabulary v:schema.getPrefixesMap().values()){
writer.handleNamespace(v.getName(), v.getUri());
}

exportModel(project, engine, schema, writer);
}catch(RDFHandlerException ex){
throw new RuntimeException(ex);
}
}

public Repository buildModel(final Project project, Engine engine, RdfSchema schema) throws IOException{
RdfRowVisitor visitor = new RdfRowVisitor(schema) {
public Repository exportModel(final Project project, Engine engine, RdfSchema schema, RDFWriter writer) throws IOException{
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if anyone has built on or extended the RDF extension, but a better engineering practice would be to deprecate the existing public method and add your new one rather than immediately deleting the old method (since it's public).

RdfRowVisitor visitor = new RdfRowVisitor(schema, writer) {

@Override
public boolean visit(Project project, int rowIndex, Row row) {
for(Node root:roots){
root.createNode(baseUri, factory, con, project, row, rowIndex,blanks);

try {
// flush here to preserve root ordering in the output file
flushStatements();
} catch (RepositoryException e) {
e.printStackTrace();
} catch (RDFHandlerException e) {
e.printStackTrace();
}
}
return false;

return false;
}
};
Repository model = buildModel(project, engine,visitor);

return model;
return buildModel(project, engine, visitor);
}

public static Repository buildModel(Project project, Engine engine, RdfRowVisitor visitor) {
Expand Down Expand Up @@ -146,13 +133,15 @@ public static abstract class RdfRowVisitor implements RowVisitor{

protected ValueFactory factory;
protected RepositoryConnection con;
protected RDFWriter writer;

public Repository getModel() {
return model;
}

public RdfRowVisitor(RdfSchema schema){
public RdfRowVisitor(RdfSchema schema, RDFWriter writer){
this.schema = schema;
this.writer = writer;
baseUri = schema.getBaseUri();
roots = schema.getRoots();

Expand All @@ -176,23 +165,63 @@ public RdfRowVisitor(RdfSchema schema){
}
public void end(Project project) {
try {
writer.endRDF();
if(con.isOpen()){
con.close();
}
} catch (RepositoryException e) {
throw new RuntimeException("",e);
}
} catch (RDFHandlerException e) {
throw new RuntimeException("",e);
}
}

public void start(Project project) {
try{
con = model.getConnection();
factory = con.getValueFactory();

// Open RDF output
writer.startRDF();

// Export namespace information
CloseableIteration<? extends Namespace, RepositoryException> nsIter = con.getNamespaces();
try {
while (nsIter.hasNext()) {
Namespace ns = nsIter.next();
writer.handleNamespace(ns.getPrefix(), ns.getName());
}
} finally {
nsIter.close();
}

}catch(RepositoryException ex){
throw new RuntimeException("",ex);
}
} catch (RDFHandlerException e) {
e.printStackTrace();
}
}

protected void flushStatements() throws RepositoryException, RDFHandlerException{
List<Resource> resourceList = con.getContextIDs().asList();
Resource[] resources = resourceList.toArray(new Resource[resourceList.size()]);

// Export statements
CloseableIteration<? extends Statement, RepositoryException> stIter =
con.getStatements(null, null, null, false, resources);

try {
while (stIter.hasNext()) {
this.writer.handleStatement(stIter.next());
}
} finally {
stIter.close();
}

// empty the repository
con.clear();
}

abstract public boolean visit(Project project, int rowIndex, Row row);
public RdfSchema getRdfSchema(){
return schema;
Expand Down