Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

processSentenceStream() now splits correctly for @prefix notation, too #91

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 31 additions & 8 deletions src/main/java/org/acoli/conll/rdf/CoNLLRDFUpdater.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.LinkedHashMap;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -800,8 +802,7 @@ protected void processSentenceStream() throws IOException {
initThreads();
running = true;


String prefixCache = new String();
Map<String,String> prefix2declaration = new LinkedHashMap<String,String>();
String line;
String lastLine ="";
String buffer="";
Expand All @@ -818,22 +819,37 @@ protected void processSentenceStream() throws IOException {
// and the previous line did not start with @ or # or PREFIX
// check if the buffer contains a ttl prefix
if (buffer.contains("@prefix") || buffer.contains("PREFIX")) {
prefixCache = new String();
String newbuffer="";
for (String buffLine:buffer.split("\n")) {
if (buffLine.trim().startsWith("@prefix") || buffLine.trim().startsWith("PREFIX")) {
prefixCache += buffLine+"\n";
String prefix = buffLine.replaceAll("^.*(PREFIX|@prefix)\\s*","").replaceAll("\\s.*","");
prefix2declaration.put(prefix,buffLine+"\n");
} else {
newbuffer=newbuffer+buffLine+"\n";
}
}
} else {
buffer = prefixCache+buffer;
buffer=newbuffer;
}
}

if(!buffer.replaceAll("#[^\\n]*","").trim().equals("") &&
((line.startsWith("@") || line.startsWith("#")) || (line.startsWith("PREFIX")))
) {
buffer=buffer.trim();
List pfxlines=Arrays.asList(prefix2declaration.values());
Collections.reverse(pfxlines);
for(Object pfxline: pfxlines) {
buffer = pfxline.toString().replaceAll("[\\[\\],]","\n").trim()+"\n"+buffer;
}
buffer="\n\n"+buffer+"\n\n";

// GRAPH OUTPUT determine first sentence's id, if none were specified
if ((graphOutputDir != null) && (graphOutputSentences.isEmpty())) {
String sentID = readFirstSentenceID(buffer);
graphOutputSentences.add(sentID);
LOG.debug("Graph Output defaults to first sentence: " + sentID);
}

// TRIPLES OUTPUT determine first sentence's id, if none were specified
if ((triplesOutputDir != null) && (triplesOutputSentences.isEmpty())) {
String sentID = readFirstSentenceID(buffer);
Expand Down Expand Up @@ -866,10 +882,17 @@ protected void processSentenceStream() throws IOException {
}

// FINAL SENTENCE (with prefixes if necessary)
if (!(buffer.contains("@prefix") || buffer.contains("PREFIX"))) {
buffer = prefixCache+buffer;
if (! buffer.trim().equals("")) {
buffer=buffer.trim();
List pfxlines=Arrays.asList(prefix2declaration.values());
Collections.reverse(pfxlines);
for(Object pfxline: pfxlines) {
buffer = pfxline.toString().replaceAll("[\\[\\],]","\n").trim()+"\n"+buffer;
}
buffer="\n\n"+buffer+"\n\n";
}


// To address the edge case of no comments or prefixes occuring after the first sentence of a stream
// GRAPH OUTPUT determine first sentence's id, if none were specified
if ((graphOutputDir != null) && (graphOutputSentences.isEmpty())) {
Expand Down