Skip to content

Commit

Permalink
add new reader for tripletext
Browse files Browse the repository at this point in the history
  • Loading branch information
xamde committed Oct 29, 2023
1 parent 49747b3 commit 963f2bf
Show file tree
Hide file tree
Showing 13 changed files with 334 additions and 0 deletions.
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<module>reader-graphml</module>
<module>reader-tgf</module>
<module>reader-example</module>
<module>reader-tripletext</module>
<module>reader-jgrapht</module>
<module>engine</module>
<module>app-cmdline</module>
Expand Down
35 changes: 35 additions & 0 deletions reader-tripletext/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>com.calpano.graphinout</groupId>
<artifactId>graphinout</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>reader-tripletext</artifactId>
<name>reader-tripletext</name>
<description>TripleText Reader</description>

<dependencies>
<dependency>
<groupId>com.calpano.graphinout</groupId>
<artifactId>base</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<!-- test -->
<dependency><groupId>org.junit.jupiter</groupId><artifactId>junit-jupiter</artifactId><scope>test</scope></dependency>
<dependency><groupId>io.github.classgraph</groupId><artifactId>classgraph</artifactId><scope>test</scope></dependency>
<dependency>
<groupId>com.calpano.graphinout</groupId>
<artifactId>base</artifactId>
<version>1.0-SNAPSHOT</version>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.calpano.graphinout.reader.example;

import javax.annotation.Nullable;

public interface ITripleHandler<S, P, O> {

void onTriple(S s, P p, O o, @Nullable String meta);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.calpano.graphinout.reader.example;

import javax.annotation.Nullable;

public class Triple<S,P,O> {
final S s;
final P p;
final O o;
final @Nullable String meta;

public Triple(S s, P p, O o, @Nullable String meta) {
this.s = s;
this.p = p;
this.o = o;
this.meta = meta;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package com.calpano.graphinout.reader.example;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

/**
* TODO sort index;
* TODO meta
*
* @param <S>
* @param <P>
* @param <O>
*/
public class TripleIndex<S, P, O> {

private Map<S, Map<P, Set<O>>> index = new HashMap<>();

public void forEach(ITripleHandler<S, P, O> handler) {
index.forEach((s, ps) -> //
ps.forEach((p, os) -> //
os.forEach(o -> //
handler.onTriple(s, p, o, null))));
}

public void index(S s, P p, O o) {
index.computeIfAbsent(s, _s -> new HashMap<>()).computeIfAbsent(p, _p -> new HashSet<>()).add(o);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.calpano.graphinout.reader.example;

import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TripleText {

static final String SPACE = "[ \t]*+";
static final Pattern p = Pattern.compile(SPACE //
// subject
+ "(?<s>.*?)" //
+ SPACE
// --predicate--
+ "--" + SPACE + "(?<p>.*?)" + SPACE + "--" + SPACE //
// object
+ "(?<o>"
// negative lookahead
+ "(?:.(?![.][.]))" //
+ "*)" +
// .. meta
SPACE + "([.][.]" + SPACE + "(?<m>.*?)" + ")?" + SPACE);

public static void parseLine(String line, ITripleHandler<String, String, String> handler) {
Matcher m = p.matcher(line);
if (m.matches()) {
handler.onTriple(m.group("s"), m.group("p"), m.group("o"), m.group("m"));
}
}

/**
* Bad code, but good for using in tests.
*/
public static Triple<String, String, String> parseToTriple(String line) {
AtomicReference<Triple<String, String, String>> t = new AtomicReference<>();
parseLine(line, (s, p, o, m) -> t.set(new Triple<>(s, p, o, m)));
return t.get();
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package com.calpano.graphinout.reader.example;

import java.util.HashMap;
import java.util.Map;

public class TripleTextModel {

public static class Node {
final String id;
String label;

public Node(String id) {
this.id = id;
}
}


private final TripleIndex<Node, String, String> index = new TripleIndex<>();
private final Map<String, Node> nodes = new HashMap<>();

public void forEachTriple(ITripleHandler<Node,String,String> handler) {
index.forEach(handler);
}

public void indexTriple(String s, String p, String o) {
if (p.equals("label")) {
Node node = new Node(s);
node.label = o;
nodes.put(s, node);
return;
}
index.index(nodes.computeIfAbsent(s, Node::new), p, o);
}

public Iterable<Node> nodes() {
return nodes.values();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package com.calpano.graphinout.reader.example;

import com.calpano.graphinout.base.gio.GioData;
import com.calpano.graphinout.base.gio.GioDataType;
import com.calpano.graphinout.base.gio.GioDocument;
import com.calpano.graphinout.base.gio.GioEdge;
import com.calpano.graphinout.base.gio.GioEndpoint;
import com.calpano.graphinout.base.gio.GioEndpointDirection;
import com.calpano.graphinout.base.gio.GioGraph;
import com.calpano.graphinout.base.gio.GioKey;
import com.calpano.graphinout.base.gio.GioKeyForType;
import com.calpano.graphinout.base.gio.GioNode;
import com.calpano.graphinout.base.gio.GioWriter;
import com.calpano.graphinout.base.input.InputSource;
import com.calpano.graphinout.base.input.SingleInputSource;
import com.calpano.graphinout.base.reader.ContentError;
import com.calpano.graphinout.base.reader.GioFileFormat;
import com.calpano.graphinout.base.reader.GioReader;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.function.Consumer;

public class TripleTextReader implements GioReader {

private Consumer<ContentError> errorHandler;

@Override
public void errorHandler(Consumer<ContentError> errorHandler) {
this.errorHandler = errorHandler;
}

@Override
public GioFileFormat fileFormat() {
return new GioFileFormat("tripletext", "TripleText Format", ".tt", "triple.txt", ".tripletext");
}

@Override
public void read(InputSource inputSource, GioWriter writer) throws IOException {
if (inputSource.isMulti()) {
throw new IllegalArgumentException("Cannot handle multi-sources");
}
assert inputSource instanceof SingleInputSource;
SingleInputSource sis = (SingleInputSource) inputSource;
TripleTextModel tripleTextModel = new TripleTextModel();
try (InputStreamReader isr = new InputStreamReader(sis.inputStream(), StandardCharsets.UTF_8); BufferedReader reader = new BufferedReader(isr)) {
String line;
while ((line = reader.readLine()) != null) {
TripleText.parseLine(line, (s, p, o, m) -> tripleTextModel.indexTriple(s, p, o));
}
}

// and write the graph to our GioWriter
writer.startDocument(GioDocument.builder().build());

// declare keys
writer.key(GioKey.builder().id("label").forType(GioKeyForType.Node).attributeName("label").attributeType(GioDataType.typeString).build());

writer.startGraph(GioGraph.builder().build());

// write nodes
for (TripleTextModel.Node node : tripleTextModel.nodes()) {
writer.startNode(GioNode.builder().id(node.id).build());
if (node.label != null) writer.data(GioData.builder().id("label").value(node.label).build());
writer.endNode(null);
}

// write edges
tripleTextModel.forEachTriple((sNode, p, o, meta) -> {
assert sNode != null;
assert sNode.id != null;
assert p != null;
assert o != null;
try {
GioEndpoint sEndpoint = GioEndpoint.builder().node(sNode.id).type(GioEndpointDirection.Out).build();
GioEndpoint oEndpoint = GioEndpoint.builder().node(o).type(GioEndpointDirection.In).build();
writer.startEdge(GioEdge.builder().endpoints(Arrays.asList(sEndpoint, oEndpoint)).build());
// TODO meta
writer.endEdge();
} catch (IOException e) {
throw new RuntimeException(e);
}
});

writer.endGraph(null);
writer.endDocument();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.calpano.graphinout.reader.example;

import com.calpano.graphinout.base.GioService;
import com.calpano.graphinout.base.reader.GioReader;

import java.util.Arrays;
import java.util.List;

public class TripleTextService implements GioService {
@Override
public String id() {
return "reader-example";
}

@Override
public List<GioReader> readers() {
return List.of(new TripleTextReader());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
com.calpano.graphinout.reader.example.TripleTextService
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.calpano.graphinout.reader.example;

import com.calpano.graphinout.base.AbstractReaderTest;
import com.calpano.graphinout.base.reader.GioReader;
import org.junit.jupiter.api.Test;

import java.util.Arrays;
import java.util.List;

import static org.junit.jupiter.api.Assertions.*;

class TripleTextReaderTest extends AbstractReaderTest {

@Override
protected List<GioReader> readersToTest() {
return Arrays.asList(new TripleTextReader());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package com.calpano.graphinout.reader.example;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.*;

class TripleTextTest {

@Test
void testNoMeta() {
Triple triple = TripleText.parseToTriple("A --is-- B");
assertEquals("A", triple.s);
assertEquals("is", triple.p);
assertEquals("B", triple.o);
}

@Test
void testMeta() {
Triple triple = TripleText.parseToTriple("A --is-- B .. meta");
assertEquals("A", triple.s);
assertEquals("is", triple.p);
assertEquals("B", triple.o);
assertEquals("meta", triple.meta);
}

}
6 changes: 6 additions & 0 deletions reader-tripletext/src/test/resources/sample1.triple.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Hello!
Welcome to the World of TripleText.
TripleText --has type-- Embedded Syntax
Embedded Syntax --is subtype of-- Syntax
TripleText --created in-- 2019
And on goes the normal text.

0 comments on commit 963f2bf

Please sign in to comment.