From 3593dd149d1e8b367199294fead9ad75f0d6206d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sat, 15 Nov 2014 15:07:20 +0100 Subject: [PATCH] Public Domain calculation: No default date, extensible properties, but still lots of TODOS, #6 --- src/main/java/dm2e2edm/Dm2e2Edm.java | 92 ++++++++++++------- .../java/eu/dm2e/utils/Dm2eValidationCLI.java | 52 ++++------- src/test/java/dm2e2edm/Dm2e2EdmTest.java | 21 ++++- 3 files changed, 93 insertions(+), 72 deletions(-) diff --git a/src/main/java/dm2e2edm/Dm2e2Edm.java b/src/main/java/dm2e2edm/Dm2e2Edm.java index 7418ee6..144449c 100644 --- a/src/main/java/dm2e2edm/Dm2e2Edm.java +++ b/src/main/java/dm2e2edm/Dm2e2Edm.java @@ -95,7 +95,7 @@ private SparqlQueries(String resName) { public static final Map> dm2eSuperClasses = new HashMap>(); private final static Resource OWL_THING = edmModel.createResource(NS.OWL.THING); - private final static DateTime EARLIEST_DATE = new DateTime(0, 1, 1, 0, 0); +// private final static DateTime EARLIEST_DATE = new DateTime(0, 1, 1, 0, 0); private static final Resource PUBLIC_DOMAIN_MARK = edmModel.createResource(NS.LICENSE.PUBLIC_DOMAIN_MARK); private static final Resource[] prettyTypes = { @@ -277,7 +277,7 @@ public Dm2e2Edm(Model inputModel, Model outputModel) { this.inputFile = null; this.inputSerialization = null; this.configProps = new Properties(); - this.publicDomainCutoffDate = EARLIEST_DATE; + this.publicDomainCutoffDate = null; } public Dm2e2Edm(Path inputFile, String inputSerialization, Path outputFile, String outputSerialization, Properties configProps, DateTime publicDomainCutoffDate) { @@ -299,7 +299,7 @@ public Dm2e2Edm(Path inputFile, String inputSerialization, Path outputFile, Stri } public Dm2e2Edm(Path inputFile, String inputSerialization, Path outputFile, String outputSerialization) { - this(inputFile, inputSerialization, outputFile, outputSerialization, new Properties(), EARLIEST_DATE); + this(inputFile, inputSerialization, outputFile, outputSerialization, new Properties(), null); } private synchronized void convertResourceInInputModel(Resource res) { @@ -618,43 +618,67 @@ else if (targetProp.getURI().equals(NS.SKOS.PROP_PREF_LABEL) && targetObject.isL // // Public Domain cutoff date // - } else if (targetProp.equals(inputModel.createProperty(NS.EDM.PROP_RIGHTS)) - && this.publicDomainCutoffDate.isAfter(EARLIEST_DATE)) { + } else if (this.publicDomainCutoffDate != null && targetProp.equals(prop(NS.EDM.PROP_RIGHTS))) { - boolean workIsInPublicDomain = false; - - // get dct:issued - DateTime dctIssued = null; - StmtIterator iterDctIssued = targetSubject.asResource().listProperties(prop(NS.DCTERMS.PROP_ISSUED)); - while (iterDctIssued.hasNext()) { - dctIssued = DateTime.parse(iterDctIssued.next().getObject().toString()); - break; + boolean subjectIsAggregation = getRdfTypes(targetSubject).contains(res(NS.ORE.CLASS_AGGREGATION)); + boolean subjectIsWebResource = getRdfTypes(targetSubject).contains(res(NS.EDM.CLASS_WEBRESOURCE)); + + Resource agg = null; + Resource cho = null; + + if (! subjectIsWebResource && ! subjectIsAggregation) { + log.error("<{}> has edm:rights but is neither aggregation nor webresource", targetSubject); + throw new RuntimeException(); + } else if (subjectIsAggregation) { + log.debug("it's an aggregation."); + agg = targetSubject; + } else if (subjectIsWebResource) { + StmtIterator iterAgg = inputModel.listStatements(null, null, targetSubject); + while (iterAgg.hasNext()) { + Resource possibleAgg = iterAgg.next().getSubject(); + log.debug("possibleAgg: {}", possibleAgg); + if (getRdfTypes(possibleAgg).contains(res(NS.ORE.CLASS_AGGREGATION))) { + agg = possibleAgg; + // TODO find the right aggregation. this is complicated and must probably + // be handled with global state. We need to know the right aggregation and + // mustn't produce contradicting right statements + break; + } + } + } + StmtIterator iterCHO = agg.asResource().listProperties(prop(NS.EDM.PROP_AGGREGATED_CHO)); + if (iterCHO.hasNext()) { + cho = iterCHO.next().getObject().asResource(); } - - // get dm2e:sentOn - DateTime dm2eSentOn = null; - StmtIterator iterDm2eSentOn = targetSubject.asResource().listProperties(prop(NS.DM2E.PROP_SENT_ON)); - while (iterDm2eSentOn.hasNext()) { - dm2eSentOn = DateTime.parse(iterDm2eSentOn.next().getObject().toString()); - break; + if (null == agg || null == cho) { + log.error("Couldnt find CHO or Agg for <{}> (CHO: <{}>)", targetSubject, cho); + throw new RuntimeException(); } - if (dctIssued != null && dctIssued.isBefore(this.publicDomainCutoffDate)) { - log.debug("PD because dct:issued {} < {}", dctIssued, this.publicDomainCutoffDate); - workIsInPublicDomain = true; - } - if (dm2eSentOn != null && dm2eSentOn.isBefore(this.publicDomainCutoffDate)) { - log.debug("PD because dm2e:sentOn {} < {}", dm2eSentOn, this.publicDomainCutoffDate); - workIsInPublicDomain = true; - } - if (workIsInPublicDomain) { - log.info("Changing rights statement to Public Domain."); - outputModel.add(targetSubject, targetProp, PUBLIC_DOMAIN_MARK); - skipGeneric = true; + // Check properties + List publicDomainProperties = new ArrayList<>(); + publicDomainProperties.add(prop(NS.DM2E.PROP_SENT_ON)); + publicDomainProperties.add(prop(NS.DCTERMS.PROP_ISSUED)); + for (Property pdProp : publicDomainProperties) { + StmtIterator iter = cho.listProperties(pdProp); + DateTime dateToCheck = null; + while (iter.hasNext()) { + dateToCheck = DateTime.parse(iter.next().getObject().toString()); + break; + } + if (dateToCheck != null && dateToCheck.isBefore(this.publicDomainCutoffDate)) { + log.debug(String.format("Public Domain because '%s' on <%s> was before '%s'", dateToCheck, pdProp, this.publicDomainCutoffDate)); + log.info("Changing rights statement of <{}> to Public Domain.", targetSubject); + skipGeneric = true; + outputModel.add(targetSubject, targetProp, PUBLIC_DOMAIN_MARK); + // TODO +// if (subjectIsWebResource) { +// log.info("Skipping edm:rights for this Public Domain WebResource <{}>", targetSubject); +// outputModel.add(targetSubject, targetProp, PUBLIC_DOMAIN_MARK); +// } + } } - - } log.debug("PROP: {}", targetProp.getURI()); diff --git a/src/main/java/eu/dm2e/utils/Dm2eValidationCLI.java b/src/main/java/eu/dm2e/utils/Dm2eValidationCLI.java index 4ae8dad..e73d799 100644 --- a/src/main/java/eu/dm2e/utils/Dm2eValidationCLI.java +++ b/src/main/java/eu/dm2e/utils/Dm2eValidationCLI.java @@ -19,17 +19,6 @@ import eu.dm2e.validation.ValidationLevel; import eu.dm2e.validation.validator.Dm2eValidatorVersion; -/** - * @author Konstantin Baierer - * - * Algorithm: - * Get data out of triplestore - * Translate properties and classes - * Fit additional requirements - * Translate datetime to lower granularity (strip the day part) - * Add dc:source to CHO, Feed.The.Pundit ... - * - */ public class Dm2eValidationCLI { @@ -72,7 +61,7 @@ private static void executeMain(String[] args) throws Exception { : line.getOptionValue("format"); // Minimum level - final ValidationLevel minLevel = (null == line.getOptionValue("level")) + final ValidationLevel level = (null == line.getOptionValue("level")) ? DEFAULT_LEVEL : ValidationLevel.valueOf(line.getOptionValue("level")); @@ -87,9 +76,6 @@ private static void executeMain(String[] args) throws Exception { // Whether to write to stdout final boolean writeToStdout = line.hasOption("stdout"); - // Skip okay results - boolean skipOk = line.hasOption("skipOk"); - // Input files final List fileList = line.getArgList(); @@ -112,11 +98,16 @@ private static void executeMain(String[] args) throws Exception { } if (writeToStdout) { - if (report.getHighestLevel().ordinal() >= minLevel.ordinal()) { - System.out.println(report.exportToString(minLevel, true, terse)); - } + System.out.println(report.exportToString(level, true, terse)); System.err.println("DONE validating " + fileName); } else { + final String outputFileName = fileName + outputFileSuffix; + File outfile = new File(outputFileName); + try { + FileUtils.writeStringToFile(outfile, report.exportToString(level, true, terse)); + } catch (IOException e) { + dieHelpfully("Error writing file to output file", e); + } StringBuilder sb = new StringBuilder(); sb.append("DONE ["); sb.append(++currentFile); @@ -125,19 +116,9 @@ private static void executeMain(String[] args) throws Exception { sb.append("]"); sb.append("["); sb.append(report.getHighestLevel().name()); - sb.append("]"); - if (! skipOk || report.getHighestLevel().ordinal() >= minLevel.ordinal()) { - final String outputFileName = fileName + outputFileSuffix; - File outfile = new File(outputFileName); - try { - FileUtils.writeStringToFile(outfile, report.exportToString(minLevel, true, terse)); - } catch (IOException e) { - dieHelpfully("Error writing file to output file", e); - } - sb.append(" See '"); - sb.append(outputFileName); - sb.append("'."); - } + sb.append("] See '"); + sb.append(outputFileName); + sb.append("'."); System.err.println(sb.toString()); } } @@ -213,16 +194,21 @@ private static void dieHelpfully(String msg, Exception e, boolean showUsage) { private static Options getOptions() { Options options = new Options(); + StringBuilder versionSB = new StringBuilder(); + for (Dm2eValidatorVersion validatorVersion : Dm2eValidatorVersion.values()) { + versionSB.append(validatorVersion.getVersionString()); + if (validatorVersion.ordinal() < Dm2eValidatorVersion.values().length -1) + versionSB.append(" | "); + } StringBuilder levelSB = new StringBuilder(); for (ValidationLevel thisLevel : ValidationLevel.values()) { levelSB.append(thisLevel.name()); if (thisLevel.ordinal() < ValidationLevel.values().length -1) levelSB.append(" | "); } - options.addOption("skipOk", false, "Skip writing results for valid results"); options.addOption(OptionBuilder .hasArgs(1) - .withArgName(Dm2eValidatorVersion.valuesAsTerseString()) + .withArgName(versionSB.toString()) .withDescription("DM2E Data Model version [REQUIRED]") .isRequired() .create("version")); diff --git a/src/test/java/dm2e2edm/Dm2e2EdmTest.java b/src/test/java/dm2e2edm/Dm2e2EdmTest.java index 11c85d4..869fa6b 100644 --- a/src/test/java/dm2e2edm/Dm2e2EdmTest.java +++ b/src/test/java/dm2e2edm/Dm2e2EdmTest.java @@ -306,21 +306,32 @@ public void testPublicDomain() throws Exception { final Property dctIssued = prop(m, NS.DCTERMS.PROP_ISSUED); final Property edmRights = prop(m, NS.EDM.PROP_RIGHTS); + final Property edmAggregatedCHO = prop(m, NS.EDM.PROP_AGGREGATED_CHO); final Resource restrictiveLicense = res(m, "http://example.org/more-restrictive-license"); final Resource pdLicense = res(m, NS.LICENSE.PUBLIC_DOMAIN_MARK); + final Resource someAgg = m.createResource("http://example.org/someAgg"); final Resource someCHO = m.createResource("http://example.org/someCHO"); + final Resource someWR = m.createResource("http://example.org/someWebResource"); m.add(someCHO, prop(m, NS.RDF.PROP_TYPE), res(m, NS.EDM.CLASS_PROVIDED_CHO)); + m.add(someAgg, prop(m, NS.RDF.PROP_TYPE), res(m, NS.ORE.CLASS_AGGREGATION)); + m.add(someWR, prop(m, NS.RDF.PROP_TYPE), res(m, NS.EDM.CLASS_WEBRESOURCE)); + m.add(someAgg, prop(m, NS.EDM.PROP_AGGREGATED_CHO), someCHO); + m.add(someAgg, prop(m, NS.EDM.PROP_IS_SHOWN_AT), someWR); m.add(someCHO, dctIssued, "1933-01-01"); - m.add(someCHO, edmRights, restrictiveLicense); + m.add(someAgg, edmRights, restrictiveLicense); + m.add(someWR, edmRights, restrictiveLicense); new Dm2e2Edm(m, out1934, DateTime.parse("1934-01-01")).run();; new Dm2e2Edm(m, outNone).run(); - assertThat(out1934.contains(someCHO, edmRights, pdLicense)).isTrue(); - assertThat(out1934.contains(someCHO, edmRights, restrictiveLicense)).isFalse(); - assertThat(outNone.contains(someCHO, edmRights, pdLicense)).isFalse(); - assertThat(outNone.contains(someCHO, edmRights, restrictiveLicense)).isTrue(); + assertThat(out1934.contains(someAgg, edmRights, pdLicense)).isTrue(); + assertThat(out1934.contains(someWR, edmRights, pdLicense)).isTrue(); + assertThat(out1934.contains(someAgg, edmRights, restrictiveLicense)).isFalse(); + + assertThat(outNone.contains(someAgg, edmRights, pdLicense)).isFalse(); + assertThat(outNone.contains(someAgg, edmRights, restrictiveLicense)).isTrue(); + assertThat(outNone.contains(someWR, edmRights, restrictiveLicense)).isTrue(); } /*