Skip to content

Commit

Permalink
Move test fixtures into TSV plain text files.
Browse files Browse the repository at this point in the history
Test data for RelEx and Stanford are moved into TSV files.

The Test classes (`TestRelEx` and `TestStanford`), in addition to having most of its content factored out, also had unnormalized line endings, so to Git these classes look replaced entirely.

The entirety of test data now reside in these `.tsv` files. I think the format should be self-explanatory just by looking at it.

Ant build and tests still work.

@linas I hope this is acceptable.

Fixed opencog#103.
  • Loading branch information
ceefour committed Jul 15, 2014
1 parent a5939b9 commit 2cec5cd
Show file tree
Hide file tree
Showing 12 changed files with 1,580 additions and 1,490 deletions.
34 changes: 20 additions & 14 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<property name="debuglevel" value="source,lines,vars"/>
<property name="target" value="1.5"/> <!-- this 1.5 is java, not relex -->
<property name="source" value="1.5"/> <!-- this 1.5 is java, not relex -->
<property name="maven.profile" value="java5"/>

<path id="relex.classpath">
<pathelement location="bin"/>
Expand Down Expand Up @@ -77,7 +78,7 @@
</not>
</condition>
</fail>
<artifact:settings file="java5.settings.xml"/>
<artifact:settings file="${maven.profile}.settings.xml"/>
<artifact:pom file="pom.xml"/>
<artifact:resolve>
<path refid="cp.compile" classpath="compile"/>
Expand Down Expand Up @@ -180,22 +181,27 @@
<target name="realclean" depends="clean"/>

<!-- Run tests -->
<target name="test" depends="build-tests">
<java classname="relex.test.TestStanford" failonerror="true" fork="yes">
<jvmarg line="-Xmx1024m"/>
<jvmarg line="-Djava.library.path=/usr/lib/jni:/usr/lib:/usr/local/lib/jni:/usr/local/lib"/>
<classpath refid="cp.test"/>
<target name="test-stanford" depends="build-tests">
<junit fork="true" printsummary="true" showoutput="true" maxmemory="1024m">
<jvmarg value="-Djava.library.path=/usr/lib/jni:/usr/lib:/usr/local/lib/jni:/usr/local/lib"/>
<classpath refid="cp.test"/>
<classpath refid="relex.classpath"/>
<arg line=""/>
</java>
<java classname="relex.test.TestRelEx" failonerror="true" fork="yes">
<jvmarg line="-Xmx1024m"/>
<jvmarg line="-Djava.library.path=/usr/lib/jni:/usr/lib:/usr/local/lib/jni:/usr/local/lib"/>
<classpath refid="cp.test"/>
<classpath path="src/test/resources"/>
<formatter type="brief" usefile="false"/>
<test name="relex.test.TestStanford"/>
</junit>
</target>
<target name="test-relex" depends="build-tests">
<junit fork="true" printsummary="true" showoutput="true" maxmemory="1024m">
<jvmarg value="-Djava.library.path=/usr/lib/jni:/usr/lib:/usr/local/lib/jni:/usr/local/lib"/>
<classpath refid="cp.test"/>
<classpath refid="relex.classpath"/>
<arg line=""/>
</java>
<classpath path="src/test/resources"/>
<formatter type="brief" usefile="false"/>
<test name="relex.test.TestRelEx"/>
</junit>
</target>
<target name="test" depends="test-stanford, test-relex"/>

<!-- Build a jar file, for public consumption -->
<target name="jar" depends="build-project">
Expand Down
4 changes: 4 additions & 0 deletions java7.settings.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<settings xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">
</settings>
41 changes: 24 additions & 17 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,17 @@
</resources>
</build>
<dependencies>
<dependency>
<groupId>net.sf.jwordnet</groupId>
<artifactId>jwnl</artifactId>
<version>1.4_rc3</version>
<exclusions>
<exclusion>
<artifactId>commons-logging</artifactId>
<groupId>commons-logging</groupId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>net.sf.opencsv</groupId>
Expand Down Expand Up @@ -239,6 +250,12 @@
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>17.0</version>
</dependency>

<dependency>
<groupId>org.apache.odftoolkit</groupId>
<artifactId>odfdom-java</artifactId>
Expand Down Expand Up @@ -317,17 +334,6 @@
<version>3.2</version>
</dependency>

<dependency>
<groupId>net.sf.jwordnet</groupId>
<artifactId>jwnl</artifactId>
<version>1.4_rc3</version>
<exclusions>
<exclusion>
<artifactId>commons-logging</artifactId>
<groupId>commons-logging</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.linkgrammar</groupId>
<artifactId>linkgrammar</artifactId>
Expand All @@ -349,12 +355,6 @@
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava-jdk5</artifactId>
<version>17.0</version>
</dependency>
</dependencies>
</profile>
<profile>
Expand All @@ -371,6 +371,13 @@
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava-jdk5</artifactId>
<version>17.0</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>
135 changes: 135 additions & 0 deletions src/java_test/relex/test/RelExCases.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package relex.test;

import java.io.InputStreamReader;
import java.net.URL;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;

import com.google.common.base.Function;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;

/**
* @author ceefour
*
*/
public class RelExCases {

private static final Logger log = LoggerFactory
.getLogger(RelExCases.class);

static class RelExCase {
String sentence;
Set<String> relations;
Optional<String> description;

RelExCase(String sentence, Set<String> relations, Optional<String> description) {
super();
this.description = description;
this.sentence = sentence;
this.relations = relations;
}
}

static class RelExCaseToObjectArray implements Function<RelExCase, Object[]> {
public Object[] apply(RelExCase input) {
return new Object[] { input.sentence, input.relations, input.description };
}
}

protected static ImmutableList<RelExCase> parseTsv(String casesPath) {
URL casesTsv = Preconditions.checkNotNull(RelExCases.class.getResource(casesPath),
"Cannot load '%s' from classpath", casesPath);
log.info("Loading '{}'...", casesTsv);
try {
CSVReader reader = new CSVReader(new InputStreamReader(casesTsv.openStream()),
'\t', CSVWriter.DEFAULT_QUOTE_CHARACTER, CSVWriter.NO_ESCAPE_CHARACTER);
try {
Optional<String> curDescription = Optional.absent();
Optional<String> curSentence = Optional.absent();
ImmutableSet.Builder<String> curRelations = ImmutableSet.builder();
ImmutableList.Builder<RelExCase> cases = ImmutableList.builder();
reader.readNext(); // skip header line
while (true) {
String[] row = reader.readNext();
if (row == null) {
break;
}
if (row.length == 0) {
continue;
}
if (row[0].startsWith("//")) {
// add previous sentence
if (curSentence.isPresent()) {
cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription));
curSentence = Optional.absent();
curRelations = ImmutableSet.builder();
}
curDescription = Optional.of(row[0].substring(2).trim());
continue;
}
if (!row[0].trim().isEmpty()) {
// add previous sentence
if (curSentence.isPresent()) {
cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription));
curSentence = Optional.absent();
curRelations = ImmutableSet.builder();
}
// sentence row
curSentence = Optional.of(row[0].trim());
} else if (row.length >= 2 && !row[1].trim().isEmpty()) {
// relation row
curRelations.add(row[1].trim());
}
}
// add previous sentence
if (curSentence.isPresent()) {
cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription));
curSentence = Optional.absent();
curRelations = ImmutableSet.builder();
}
final ImmutableList<RelExCase> caseList = cases.build();
log.info("Got {} cases from '{}'", caseList.size(), casesPath);
return caseList;
} finally {
reader.close();
}
} catch (Exception e) {
throw new RuntimeException("Cannot read " + casesTsv, e);
}
}

public static Object[] provideComparatives() {
return FluentIterable.from(parseTsv("/relex-comparatives.tsv"))
.transform(new RelExCaseToObjectArray()).toArray(Object[].class);
}

public static Object[] provideConjunction() {
return FluentIterable.from(parseTsv("/relex-conjunction.tsv"))
.transform(new RelExCaseToObjectArray()).toArray(Object[].class);
}

public static Object[] provideExtraposition() {
return FluentIterable.from(parseTsv("/relex-extraposition.tsv"))
.transform(new RelExCaseToObjectArray()).toArray(Object[].class);
}

public static Object[] provideStanfordUntagged() {
return FluentIterable.from(parseTsv("/stanford-untagged.tsv"))
.transform(new RelExCaseToObjectArray()).toArray(Object[].class);
}

public static Object[] provideStanfordTagged() {
return FluentIterable.from(parseTsv("/stanford-tagged.tsv"))
.transform(new RelExCaseToObjectArray()).toArray(Object[].class);
}

}
Loading

0 comments on commit 2cec5cd

Please sign in to comment.