-
Notifications
You must be signed in to change notification settings - Fork 55
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Streaming RDF export #102
Streaming RDF export #102
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,28 @@ | ||
package org.deri.grefine.rdf.commands; | ||
|
||
import java.io.IOException; | ||
import java.io.StringWriter; | ||
|
||
import javax.servlet.ServletException; | ||
import javax.servlet.http.HttpServletRequest; | ||
import javax.servlet.http.HttpServletResponse; | ||
|
||
import com.google.refine.browsing.Engine; | ||
import com.google.refine.commands.Command; | ||
import com.google.refine.model.Project; | ||
import com.google.refine.model.Row; | ||
import com.google.refine.util.ParsingUtilities; | ||
import org.deri.grefine.rdf.Node; | ||
import org.deri.grefine.rdf.RdfSchema; | ||
import org.deri.grefine.rdf.exporters.RdfExporter; | ||
import org.deri.grefine.rdf.exporters.RdfExporter.RdfRowVisitor; | ||
import org.deri.grefine.rdf.vocab.Vocabulary; | ||
import org.json.JSONObject; | ||
import org.json.JSONWriter; | ||
import org.openrdf.repository.Repository; | ||
import org.openrdf.repository.RepositoryConnection; | ||
import org.openrdf.repository.RepositoryException; | ||
import org.openrdf.rio.RDFFormat; | ||
import org.openrdf.rio.RDFHandlerException; | ||
import org.openrdf.rio.RDFWriter; | ||
import org.openrdf.rio.Rio; | ||
|
||
import com.google.refine.browsing.Engine; | ||
import com.google.refine.commands.Command; | ||
import com.google.refine.model.Project; | ||
import com.google.refine.model.Row; | ||
import com.google.refine.util.ParsingUtilities; | ||
import javax.servlet.ServletException; | ||
import javax.servlet.http.HttpServletRequest; | ||
import javax.servlet.http.HttpServletResponse; | ||
import java.io.IOException; | ||
import java.io.StringWriter; | ||
|
||
public class PreviewRdfCommand extends Command { | ||
|
||
|
@@ -43,8 +39,10 @@ public void doPost(HttpServletRequest request, HttpServletResponse response) | |
String jsonString = request.getParameter("schema"); | ||
JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); | ||
final RdfSchema schema = RdfSchema.reconstruct(json); | ||
|
||
RdfRowVisitor visitor = new RdfRowVisitor(schema) { | ||
|
||
StringWriter sw = new StringWriter(); | ||
RDFWriter w = Rio.createWriter(RDFFormat.TURTLE, sw); | ||
RdfRowVisitor visitor = new RdfRowVisitor(schema, w) { | ||
final int limit = 10; | ||
int _count; | ||
@Override | ||
|
@@ -56,29 +54,26 @@ public boolean visit(Project project, int rowIndex, Row row) { | |
root.createNode(baseUri, factory, con, project, row, rowIndex,blanks); | ||
} | ||
_count +=1; | ||
|
||
try { | ||
flushStatements(); | ||
} catch (RepositoryException e) { | ||
e.printStackTrace(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These errors don't look like they get returned to a place where they're visible by the user. |
||
return true; | ||
} catch (RDFHandlerException e) { | ||
e.printStackTrace(); | ||
return true; | ||
} | ||
|
||
return false; | ||
} | ||
}; | ||
|
||
Repository model = RdfExporter.buildModel(project, engine, visitor); | ||
StringWriter sw = new StringWriter(); | ||
try{ | ||
RepositoryConnection con = model.getConnection(); | ||
try{ | ||
RDFWriter w = Rio.createWriter(RDFFormat.TURTLE, sw); | ||
for(Vocabulary v:schema.getPrefixesMap().values()){ | ||
w.handleNamespace(v.getName(), v.getUri()); | ||
} | ||
con.export(w); | ||
}finally{ | ||
con.close(); | ||
} | ||
}catch(RepositoryException ex){ | ||
throw new RuntimeException(ex); | ||
}catch(RDFHandlerException ex){ | ||
throw new RuntimeException(ex); | ||
} | ||
|
||
for(Vocabulary v:schema.getPrefixesMap().values()){ | ||
w.handleNamespace(v.getName(), v.getUri()); | ||
} | ||
RdfExporter.buildModel(project, engine, visitor); | ||
|
||
JSONWriter writer = new JSONWriter(response.getWriter()); | ||
writer.object(); | ||
writer.key("v"); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,8 +13,7 @@ | |
import org.deri.grefine.rdf.app.ApplicationContext; | ||
import org.deri.grefine.rdf.vocab.Vocabulary; | ||
import org.deri.grefine.rdf.vocab.VocabularyIndexException; | ||
import org.openrdf.model.BNode; | ||
import org.openrdf.model.ValueFactory; | ||
import org.openrdf.model.*; | ||
import org.openrdf.repository.Repository; | ||
import org.openrdf.repository.RepositoryConnection; | ||
import org.openrdf.repository.RepositoryException; | ||
|
@@ -25,18 +24,24 @@ | |
import org.openrdf.rio.Rio; | ||
import org.openrdf.sail.memory.MemoryStore; | ||
|
||
import info.aduna.iteration.CloseableIteration; | ||
|
||
import com.google.refine.browsing.Engine; | ||
import com.google.refine.browsing.FilteredRows; | ||
import com.google.refine.browsing.RowVisitor; | ||
import com.google.refine.exporters.WriterExporter; | ||
import com.google.refine.model.Project; | ||
import com.google.refine.model.Row; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
public class RdfExporter implements WriterExporter{ | ||
|
||
private RDFFormat format; | ||
private ApplicationContext applicationContext; | ||
|
||
final static Logger logger = LoggerFactory.getLogger("RdfExporter"); | ||
|
||
public RdfExporter(ApplicationContext ctxt, RDFFormat f){ | ||
this.format = f; | ||
this.applicationContext = ctxt; | ||
|
@@ -48,73 +53,55 @@ public ApplicationContext getApplicationContext() { | |
|
||
public void export(Project project, Properties options, Engine engine, | ||
OutputStream outputStream) throws IOException { | ||
RdfSchema schema; | ||
try{ | ||
schema = Util.getProjectSchema(applicationContext,project); | ||
}catch(VocabularyIndexException ve){ | ||
throw new IOException("Unable to create index for RDF schema",ve); | ||
} | ||
Repository model = buildModel(project, engine, schema); | ||
try{ | ||
RepositoryConnection con = model.getConnection(); | ||
try{ | ||
RDFWriter writer = Rio.createWriter(format, outputStream); | ||
for(Vocabulary v:schema.getPrefixesMap().values()){ | ||
writer.handleNamespace(v.getName(), v.getUri()); | ||
} | ||
con.export(writer); | ||
}finally{ | ||
con.close(); | ||
} | ||
}catch(RepositoryException ex){ | ||
throw new RuntimeException(ex); | ||
}catch(RDFHandlerException ex){ | ||
throw new RuntimeException(ex); | ||
} | ||
export(project, options, engine, Rio.createWriter(format, outputStream)); | ||
} | ||
|
||
|
||
public void export(Project project, Properties options, Engine engine, | ||
Writer writer) throws IOException { | ||
public void export(Project project, Properties options, Engine engine, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the indentation changed here? |
||
Writer writer) throws IOException { | ||
export(project, options, engine, Rio.createWriter(format, writer)); | ||
} | ||
|
||
private void export(Project project, Properties options, Engine engine, | ||
RDFWriter writer) throws IOException { | ||
RdfSchema schema; | ||
try{ | ||
schema = Util.getProjectSchema(applicationContext,project); | ||
}catch(VocabularyIndexException ve){ | ||
throw new IOException("Unable to create index for RDF schema",ve); | ||
} | ||
Repository model = buildModel(project, engine, schema); | ||
try{ | ||
RepositoryConnection con = model.getConnection(); | ||
try{ | ||
RDFWriter w = Rio.createWriter(format, writer); | ||
for(Vocabulary v:schema.getPrefixesMap().values()){ | ||
w.handleNamespace(v.getName(),v.getUri()); | ||
} | ||
con.export(w); | ||
}finally{ | ||
con.close(); | ||
} | ||
}catch(RepositoryException ex){ | ||
throw new RuntimeException(ex); | ||
for(Vocabulary v:schema.getPrefixesMap().values()){ | ||
writer.handleNamespace(v.getName(), v.getUri()); | ||
} | ||
|
||
exportModel(project, engine, schema, writer); | ||
}catch(RDFHandlerException ex){ | ||
throw new RuntimeException(ex); | ||
} | ||
} | ||
|
||
public Repository buildModel(final Project project, Engine engine, RdfSchema schema) throws IOException{ | ||
RdfRowVisitor visitor = new RdfRowVisitor(schema) { | ||
public Repository exportModel(final Project project, Engine engine, RdfSchema schema, RDFWriter writer) throws IOException{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know if anyone has built on or extended the RDF extension, but a better engineering practice would be to deprecate the existing public method and add your new one rather than immediately deleting the old method (since it's public). |
||
RdfRowVisitor visitor = new RdfRowVisitor(schema, writer) { | ||
|
||
@Override | ||
public boolean visit(Project project, int rowIndex, Row row) { | ||
for(Node root:roots){ | ||
root.createNode(baseUri, factory, con, project, row, rowIndex,blanks); | ||
|
||
try { | ||
// flush here to preserve root ordering in the output file | ||
flushStatements(); | ||
} catch (RepositoryException e) { | ||
e.printStackTrace(); | ||
} catch (RDFHandlerException e) { | ||
e.printStackTrace(); | ||
} | ||
} | ||
return false; | ||
|
||
return false; | ||
} | ||
}; | ||
Repository model = buildModel(project, engine,visitor); | ||
|
||
return model; | ||
return buildModel(project, engine, visitor); | ||
} | ||
|
||
public static Repository buildModel(Project project, Engine engine, RdfRowVisitor visitor) { | ||
|
@@ -146,13 +133,15 @@ public static abstract class RdfRowVisitor implements RowVisitor{ | |
|
||
protected ValueFactory factory; | ||
protected RepositoryConnection con; | ||
protected RDFWriter writer; | ||
|
||
public Repository getModel() { | ||
return model; | ||
} | ||
|
||
public RdfRowVisitor(RdfSchema schema){ | ||
public RdfRowVisitor(RdfSchema schema, RDFWriter writer){ | ||
this.schema = schema; | ||
this.writer = writer; | ||
baseUri = schema.getBaseUri(); | ||
roots = schema.getRoots(); | ||
|
||
|
@@ -176,23 +165,63 @@ public RdfRowVisitor(RdfSchema schema){ | |
} | ||
public void end(Project project) { | ||
try { | ||
writer.endRDF(); | ||
if(con.isOpen()){ | ||
con.close(); | ||
} | ||
} catch (RepositoryException e) { | ||
throw new RuntimeException("",e); | ||
} | ||
} catch (RDFHandlerException e) { | ||
throw new RuntimeException("",e); | ||
} | ||
} | ||
|
||
public void start(Project project) { | ||
try{ | ||
con = model.getConnection(); | ||
factory = con.getValueFactory(); | ||
|
||
// Open RDF output | ||
writer.startRDF(); | ||
|
||
// Export namespace information | ||
CloseableIteration<? extends Namespace, RepositoryException> nsIter = con.getNamespaces(); | ||
try { | ||
while (nsIter.hasNext()) { | ||
Namespace ns = nsIter.next(); | ||
writer.handleNamespace(ns.getPrefix(), ns.getName()); | ||
} | ||
} finally { | ||
nsIter.close(); | ||
} | ||
|
||
}catch(RepositoryException ex){ | ||
throw new RuntimeException("",ex); | ||
} | ||
} catch (RDFHandlerException e) { | ||
e.printStackTrace(); | ||
} | ||
} | ||
|
||
protected void flushStatements() throws RepositoryException, RDFHandlerException{ | ||
List<Resource> resourceList = con.getContextIDs().asList(); | ||
Resource[] resources = resourceList.toArray(new Resource[resourceList.size()]); | ||
|
||
// Export statements | ||
CloseableIteration<? extends Statement, RepositoryException> stIter = | ||
con.getStatements(null, null, null, false, resources); | ||
|
||
try { | ||
while (stIter.hasNext()) { | ||
this.writer.handleStatement(stIter.next()); | ||
} | ||
} finally { | ||
stIter.close(); | ||
} | ||
|
||
// empty the repository | ||
con.clear(); | ||
} | ||
|
||
abstract public boolean visit(Project project, int rowIndex, Row row); | ||
public RdfSchema getRdfSchema(){ | ||
return schema; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moving these imports when they're unchanged adds noise to the diff