Skip to content
This repository has been archived by the owner on Aug 11, 2020. It is now read-only.

Commit

Permalink
Implement basic public domain switch, #6
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Nov 14, 2014
1 parent 710a8f6 commit a296205
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 17 deletions.
85 changes: 77 additions & 8 deletions src/main/java/dm2e2edm/Dm2e2Edm.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.util.Properties;
import java.util.Set;

import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -94,6 +95,9 @@ private SparqlQueries(String resName) {
public static final Map<Resource,LinkedHashSet<Resource>> dm2eSuperClasses = new HashMap<Resource, LinkedHashSet<Resource>>();

private final static Resource OWL_THING = edmModel.createResource(NS.OWL.THING);
private final static DateTime EARLIEST_DATE = new DateTime(0, 1, 1, 0, 0);
private static final Resource PUBLIC_DOMAIN_MARK = edmModel.createResource(NS.LICENSE.PUBLIC_DOMAIN_MARK);

private static final Resource[] prettyTypes = {
edmModel.createResource(NS.ORE.CLASS_AGGREGATION),
edmModel.createResource(NS.EDM.CLASS_AGENT),
Expand Down Expand Up @@ -247,7 +251,22 @@ private static LinkedHashSet<Resource> findSuperClassesIn(Model source, Resource
private final Properties configProps;
private final Set<Resource> skipSet = new HashSet<>();
private final Set<String> skosPrefLabelCache = new HashSet<>();

private final DateTime publicDomainCutoffDate;

public Dm2e2Edm(Model inputModel, Model outputModel, DateTime publicDomainCutoffDate) {
this.inputModel = inputModel;
this.outputModel = outputModel;
inputModel.setNsPrefixes(nsPrefixes);
outputModel.setNsPrefixes(nsPrefixes);
this.outputFile = null;
this.outputSerialization = null;
this.inputFile = null;
this.inputSerialization = null;
this.configProps = new Properties();
this.publicDomainCutoffDate = publicDomainCutoffDate;
}


public Dm2e2Edm(Model inputModel, Model outputModel) {
this.inputModel = inputModel;
this.outputModel = outputModel;
Expand All @@ -258,10 +277,10 @@ public Dm2e2Edm(Model inputModel, Model outputModel) {
this.inputFile = null;
this.inputSerialization = null;
this.configProps = new Properties();
this.publicDomainCutoffDate = EARLIEST_DATE;
}

public Dm2e2Edm(Path inputFile, String inputSerialization,
Path outputFile, String outputSerialization, Properties configProps) {
public Dm2e2Edm(Path inputFile, String inputSerialization, Path outputFile, String outputSerialization, Properties configProps, DateTime publicDomainCutoffDate) {
super();
this.inputModel = ModelFactory.createDefaultModel();
this.outputModel = ModelFactory.createDefaultModel();
Expand All @@ -272,11 +291,15 @@ public Dm2e2Edm(Path inputFile, String inputSerialization,
this.outputFile = outputFile;
this.outputSerialization = outputSerialization;
this.configProps = configProps;
this.publicDomainCutoffDate = publicDomainCutoffDate;
}

public Dm2e2Edm(Path inputFile, String inputSerialization,
Path outputFile, String outputSerialization) {
this(inputFile, inputSerialization, outputFile, outputSerialization, new Properties());
public Dm2e2Edm(Path inputFile, String inputSerialization, Path outputFile, String outputSerialization, DateTime publicDomainCutoffDate) {
this(inputFile, inputSerialization, outputFile, outputSerialization, new Properties(), publicDomainCutoffDate);
}

public Dm2e2Edm(Path inputFile, String inputSerialization, Path outputFile, String outputSerialization) {
this(inputFile, inputSerialization, outputFile, outputSerialization, new Properties(), EARLIEST_DATE);
}

private synchronized void convertResourceInInputModel(Resource res) {
Expand Down Expand Up @@ -355,7 +378,7 @@ private synchronized void convertResourceInInputModel(Resource res) {
}

private final synchronized Property res(String uri) {
return inputModel.createProperty(uri);
return prop(uri);
}

private final synchronized String getLiteralString(RDFNode res, Property prop) {
Expand Down Expand Up @@ -413,7 +436,7 @@ private synchronized void addToTarget(Resource targetSubject, Property targetPro
// log.debug(" S: {}", targetSubject);
// log.debug(" P: {}", targetProp);
// log.debug(" O: {}", targetObject);
log.debug("ORIGINAL PROPERTY {} ", origProp);
// log.debug("ORIGINAL PROPERTY {} ", origProp);


// If this flag is set, skip adding the statement using the generic solution
Expand Down Expand Up @@ -590,6 +613,48 @@ else if (targetProp.getURI().equals(NS.SKOS.PROP_PREF_LABEL) && targetObject.isL
}
}
}


//
// Public Domain cutoff date
//
} else if (targetProp.equals(inputModel.createProperty(NS.EDM.PROP_RIGHTS))
&& this.publicDomainCutoffDate.isAfter(EARLIEST_DATE)) {

boolean workIsInPublicDomain = false;

// get dct:issued
DateTime dctIssued = null;
StmtIterator iterDctIssued = targetSubject.asResource().listProperties(prop(NS.DCTERMS.PROP_ISSUED));
while (iterDctIssued.hasNext()) {
dctIssued = DateTime.parse(iterDctIssued.next().getObject().toString());
break;
}

// get dm2e:sentOn
DateTime dm2eSentOn = null;
StmtIterator iterDm2eSentOn = targetSubject.asResource().listProperties(prop(NS.DM2E.PROP_SENT_ON));
while (iterDm2eSentOn.hasNext()) {
dm2eSentOn = DateTime.parse(iterDm2eSentOn.next().getObject().toString());
break;
}

if (dctIssued != null && dctIssued.isBefore(this.publicDomainCutoffDate)) {
log.debug("PD because dct:issued {} < {}", dctIssued, this.publicDomainCutoffDate);
workIsInPublicDomain = true;
}
if (dm2eSentOn != null && dm2eSentOn.isBefore(this.publicDomainCutoffDate)) {
log.debug("PD because dm2e:sentOn {} < {}", dm2eSentOn, this.publicDomainCutoffDate);
workIsInPublicDomain = true;
}

if (workIsInPublicDomain) {
log.info("Changing rights statement to Public Domain.");
outputModel.add(targetSubject, targetProp, PUBLIC_DOMAIN_MARK);
skipGeneric = true;
}


}

log.debug("PROP: {}", targetProp.getURI());
Expand All @@ -614,6 +679,10 @@ else if (targetProp.getURI().equals(NS.SKOS.PROP_PREF_LABEL) && targetObject.isL
}

}

private Property prop(String theProp) {
return inputModel.createProperty(theProp);
}
// private void addToTarget(Resource targetSubject, Property targetProp, String targetObject) {
// outputModel.add(targetSubject, targetProp, targetObject);
// }
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/eu/dm2e/NS.java
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,7 @@ public static final class DM2E {
public static final String PROP_SUBTITLE = BASE + "subtitle";
public static final String PROP_WATERMARK = BASE + "watermark";
public static final String PROP_WRITER = BASE + "writer";
public static final String PROP_SENT_ON = BASE + "sentOn";
public static final String CLASS_WORK = BASE + "Work";
public static final String CLASS_PARAGRAPH = BASE + "Paragraph";
public static final String CLASS_PUBLICATION = BASE + "Publication";
Expand Down Expand Up @@ -776,4 +777,7 @@ public static final class DAMOVA {
public static final String BASE = "http://www.mozajka.co/LOD/MM/";
}

public static final class LICENSE {
public static final String PUBLIC_DOMAIN_MARK = "http://creativecommons.org/publicdomain/mark/1.0/";
}
}
48 changes: 39 additions & 9 deletions src/main/java/eu/dm2e/utils/Dm2e2EdmCLI.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
package eu.dm2e.utils;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
Expand All @@ -18,8 +14,7 @@
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.joda.time.DateTime;

import dm2e2edm.Dm2e2Edm;

Expand Down Expand Up @@ -81,6 +76,8 @@ public static void main(String[] args) throws ParseException {
// ExecutorService threadPool = Executors.newFixedThreadPool(NUMBER_OF_THREADS);
// ExecutorService threadPool = Executors.newCachedThreadPool();

String cutoffDateVal = line.getOptionValue("public_domain_date");

// Run !
if (line.hasOption("input_dir")) {
Path inputDir = Paths.get(line.getOptionValue("input_dir"));
Expand All @@ -100,7 +97,10 @@ public static void main(String[] args) throws ParseException {
}
Path curOut = Paths.get(outputDir.toString(), curIn.getFileName() + suffix );
System.out.print(String.format("[%d/%d] Converting %s -> %s.\r", ++cur, total, curIn, curOut));
Dm2e2Edm worker = new Dm2e2Edm(curIn, inFormat, curOut, outFormat);
Dm2e2Edm worker =
null == cutoffDateVal
? new Dm2e2Edm(curIn, inFormat, curOut, outFormat)
: new Dm2e2Edm(curIn, inFormat, curOut, outFormat, DateTime.parse(cutoffDateVal));
// threadPool.execute(worker);
worker.run();
}
Expand All @@ -110,7 +110,10 @@ public static void main(String[] args) throws ParseException {
Path curIn = Paths.get(filename);
Path curOut = Paths.get(outputDir.toString(), curIn.getFileName() + suffix );
System.out.print(String.format("Converting %s -> %s.\r", curIn, curOut));
Dm2e2Edm worker = new Dm2e2Edm(curIn, inFormat, curOut, outFormat);
Dm2e2Edm worker =
null == cutoffDateVal
? new Dm2e2Edm(curIn, inFormat, curOut, outFormat)
: new Dm2e2Edm(curIn, inFormat, curOut, outFormat, DateTime.parse(cutoffDateVal));
worker.run();
}
}
Expand Down Expand Up @@ -156,7 +159,16 @@ private static CommandLine parseOptions(String[] args)
}
}

if (! line.hasOption("input_dir") && ! line.hasOption("input_file")) {
if (line.hasOption("help")) {
HelpFormatter formatter = new HelpFormatter();
formatter.setWidth(100);
formatter.printHelp("java -jar dm2e2edm.jar --input_file <input_file> | --input_dir <input_dir> [options]",
"Convert DM2E to EDM",
getOptions(),
""
);
System.exit(0);
} else if (! line.hasOption("input_dir") && ! line.hasOption("input_file")) {
dieHelpfully("Must set either 'input_dir' or 'input_file'");
} else if (line.hasOption("input_dir")) {
// --input_dir
Expand Down Expand Up @@ -194,6 +206,16 @@ private static CommandLine parseOptions(String[] args)
dieHelpfully("Error creating output directory: " + DEFAULT_OUTPUT_DIR, e, true);
}
}
// --cutoff-date
final String cutoffDateVal = line.getOptionValue("public_domain_date");
if (null != cutoffDateVal) {
try {
DateTime.parse(cutoffDateVal);
} catch (Exception e) {
dieHelpfully("Error parsing date value for 'public_domain_date'");
}
}

} catch (ParseException e) {
dieHelpfully("Error parsing command line options: ", e, true);
}
Expand All @@ -204,6 +226,9 @@ private static CommandLine parseOptions(String[] args)
private static Options getOptions() {
Options options = new Options();

options.addOption(OptionBuilder
.withDescription("Show help")
.create("help"));
options.addOption(OptionBuilder
.hasArgs()
.withDescription("Input RDF file")
Expand All @@ -212,6 +237,11 @@ private static Options getOptions() {
.hasArgs(1)
.withDescription("Input directory of RDF files")
.create("input_dir"));
options.addOption(OptionBuilder
.hasArgs(1)
.withArgName("YYYY-MM-DD")
.withDescription("Cutoff Date for works in the public domain [Default: 0000-01-01]")
.create("public_domain_date"));
options.addOption(OptionBuilder
.hasArgs(1)
.withArgName("directory")
Expand Down
28 changes: 28 additions & 0 deletions src/test/java/dm2e2edm/Dm2e2EdmTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.nio.file.Path;
import java.nio.file.Paths;

import org.joda.time.DateTime;
import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
Expand Down Expand Up @@ -117,6 +118,7 @@ public void testHasMetInAggregation1()
assertThat(outputModel.contains(agg, outputModel.createProperty(NS.DC.PROP_DATE))).isFalse();
}

@SuppressWarnings("deprecation")
@Test
public void testHasMetInAggregation2() throws URISyntaxException, MalformedURLException, IOException {
Path inFile = Paths.get(Dm2e2Edm.class.getResource("/onbcodices2BZ9671240X.xml").toURI());
Expand Down Expand Up @@ -295,6 +297,32 @@ public void testHoldingInst() throws Exception {
out.write(sw, "TURTLE");
log.debug(sw.toString());
}

@Test
public void testPublicDomain() throws Exception {
Model m = ModelFactory.createDefaultModel();
Model out1934 = ModelFactory.createDefaultModel();
Model outNone = ModelFactory.createDefaultModel();

final Property dctIssued = prop(m, NS.DCTERMS.PROP_ISSUED);
final Property edmRights = prop(m, NS.EDM.PROP_RIGHTS);
final Resource restrictiveLicense = res(m, "http://example.org/more-restrictive-license");
final Resource pdLicense = res(m, NS.LICENSE.PUBLIC_DOMAIN_MARK);

final Resource someCHO = m.createResource("http://example.org/someCHO");
m.add(someCHO, prop(m, NS.RDF.PROP_TYPE), res(m, NS.EDM.CLASS_PROVIDED_CHO));
m.add(someCHO, dctIssued, "1933-01-01");
m.add(someCHO, edmRights, restrictiveLicense);

new Dm2e2Edm(m, out1934, DateTime.parse("1934-01-01")).run();;
new Dm2e2Edm(m, outNone).run();

assertThat(out1934.contains(someCHO, edmRights, pdLicense)).isTrue();
assertThat(out1934.contains(someCHO, edmRights, restrictiveLicense)).isFalse();
assertThat(outNone.contains(someCHO, edmRights, pdLicense)).isFalse();
assertThat(outNone.contains(someCHO, edmRights, restrictiveLicense)).isTrue();

}
/*
@Test
public void testInversePartOf() throws Exception {
Expand Down

0 comments on commit a296205

Please sign in to comment.