From 5cb2a28f84fd0a9d4e013c10b009e8ae88ee8ede Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 1 Jul 2019 16:32:10 -0700 Subject: [PATCH 01/16] Before, we used the hard code to select the first resource map after querying solr. Now we go through the list choose the one dones't have the obsoletedBy field. Ref: https://github.com/NCEAS/metacat/issues/1363 --- src/edu/ucsb/nceas/metacat/dataone/MNodeService.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java index 5dd4e565e..7ba4c95b6 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java +++ b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java @@ -2159,7 +2159,16 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws // try the SOLR index List potentialOreIdentifiers = this.lookupOreFor(session, originalIdentifier); if (potentialOreIdentifiers != null) { - potentialOreIdentifier = potentialOreIdentifiers.get(0); + for(Identifier id :potentialOreIdentifiers) { + if (id != null && id.getValue() != null && !id.getValue().trim().equals("")) { + SystemMetadata sys = this.getSystemMetadata(session, id); + if(sys != null && sys.getObsoletedBy() == null) { + //found the non-obsotetedBy ore document. + potentialOreIdentifier = id; + break; + } + } + } try { oreInputStream = this.get(session, potentialOreIdentifier); } catch (NotFound nf2) { From 311ef1c3f755f82f103da3e200540ce725c47338 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 5 Jul 2019 17:54:16 -0700 Subject: [PATCH 02/16] Add a new class and its junit test to modify the eixsting resource map file by Jena. The changes haven't done yet. Ref: https://github.com/NCEAS/metacat/issues/1366 --- .../resourcemap/ResourceMapModifier.java | 172 ++++++++++++++++++ .../resourcemap/ResourceMapModifierTest.java | 89 +++++++++ test/resourcemap-with-prov.xml | 48 +++++ 3 files changed, 309 insertions(+) create mode 100644 src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java create mode 100644 test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java create mode 100644 test/resourcemap-with-prov.xml diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java new file mode 100644 index 000000000..8f2a19984 --- /dev/null +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -0,0 +1,172 @@ +/** + * '$RCSfile$' + * Copyright: 2000-2019 Regents of the University of California and the + * National Center for Ecological Analysis and Synthesis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package edu.ucsb.nceas.metacat.dataone.resourcemap; + +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLEncoder; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.Map; +import java.util.TimeZone; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dataone.service.types.v1.Identifier; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.rdf.model.Selector; +import com.hp.hpl.jena.rdf.model.SimpleSelector; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; + +import edu.ucsb.nceas.metacat.properties.PropertyService; + +/** + * This class will create a new resource map by modifying a given resourceMap input stream. + * @author tao + * + */ +public class ResourceMapModifier { + private final static String DEFAULT_CN_URI = "https://cn.dataone.org/cn"; + private final static String SLASH = "/"; + private final static String RESOLVE = "cn/v2/resolve/"; + private final static String TERM_NAMESPACE = "http://purl.org/dc/terms/"; + private final static String TER_NAMESPACE = "http://www.openarchives.org/ore/terms/"; + private final static String RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + + private static Log log = LogFactory.getLog(ResourceMapModifier.class); + private Identifier oldResourceMapId = null; + private Identifier newResourceMapId = null; + private static String baseURI = null; + static { + try { + String cnUrl = PropertyService.getProperty("D1Client.CN_URL"); + if(cnUrl.endsWith(SLASH)) { + baseURI = cnUrl + RESOLVE; + } else { + baseURI = cnUrl + SLASH + RESOLVE; + } + } catch (Exception e) { + log.warn("ResourceMapModifier.ResourceMapModifier - couldn't get the value of the property D1Client.CN_URL and Metacat will the default production cn url as the URI base"); + baseURI = DEFAULT_CN_URI + SLASH + RESOLVE; + } + } + + /** + * Constructor + * @param oldResourceMapId the identifier of the old resource map which will be modified + * @param newResourceMapId the identifier of the new resource map which will be generated + */ + public ResourceMapModifier(Identifier oldResourceMapId, Identifier newResourceMapId) { + this.oldResourceMapId = oldResourceMapId; + this.newResourceMapId = newResourceMapId; + + + } + + + /** + * Create new resource map by replacing obsoleted ids by new ids. + * @param obsoletedBys a map represents the ids' with the obsoletedBy relationship - the keys are the one need to be obsoleted (replaced); value are the new ones need to be used + * @param originalResourceMap the content of original resource map + * @param newResourceMap the place where the created new resource map will be written + * @throws UnsupportedEncodingException + */ + public void replaceObsoletedIds(MapobsoletedBys, InputStream originalResourceMap, OutputStream newResourceMap ) throws UnsupportedEncodingException { + //create an empty model + Model model = ModelFactory.createDefaultModel(); + //read the RDF/XML file + model.read(originalResourceMap, null); + //generate a new resource for the new resource map identifier + Resource subject = null; + Property predicate = null; + RDFNode object = null; + Selector selector = new SimpleSelector(subject, predicate, object); + //StmtIterator iterator = model.listStatements(selector); + + Resource originalOre = model.getResource("https://cn.dataone.org/cn/v2/resolve/urn%3Auuid%3Ae62c781c-643b-41f3-a0b0-9f6cbd80a708"); + StmtIterator iterator = originalOre.listProperties(); + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + Resource sub = statement.getSubject(); + System.out.println("the subject is "+sub.getURI());; + Property pred = statement.getPredicate(); + System.out.println("the predicate is "+pred.getLocalName()); + RDFNode obj = statement.getObject(); + System.out.println("the object "+obj.toString()); + if(obj.isResource()) { + Resource res = (Resource)obj; + System.out.println("namespace "+res.getNameSpace()); + System.out.println("local name "+res.getLocalName()); + + } + } + + //write it to standard out + generateNewOREId(model); + model.write(newResourceMap); + } + + /* + * This method generates a Resource object for the new ore id in the given model + */ + private void generateNewOREId(Model model) throws UnsupportedEncodingException { + String escaptedNewOreId = URLEncoder.encode(newResourceMapId.getValue(), "UTF-8"); + String uri = baseURI + escaptedNewOreId; + Resource resource = model.createResource(uri); + //create a identifier property (statement) + Property identifierPred = ResourceFactory.createProperty(TERM_NAMESPACE, "identifier"); + Literal identifierObj = ResourceFactory.createPlainLiteral(newResourceMapId.getValue()); + Statement state = ResourceFactory.createStatement(resource, identifierPred, identifierObj); + model.add(state); + //create a modification time statement + Property modificationPred = ResourceFactory.createProperty(TERM_NAMESPACE, "modified"); + Date date = new Date(); + SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.US); + format.setTimeZone(TimeZone.getTimeZone("UTC")); + Literal modificationObj = ResourceFactory.createTypedLiteral(format.format(date), XSDDatatype.XSDdateTime); + Statement state2 = ResourceFactory.createStatement(resource, modificationPred, modificationObj); + model.add(state2); + //create a describes statement + Property describesPred = ResourceFactory.createProperty(TER_NAMESPACE, "describes"); + Resource describesObj = ResourceFactory.createResource(uri + "#aggregation"); + Statement state3 = ResourceFactory.createStatement(resource, describesPred, describesObj); + model.add(state3); + //create a type + Property typePred = ResourceFactory.createProperty(RDF_NAMESPACE, "type"); + Resource typeObj = ResourceFactory.createResource("http://www.openarchives.org/ore/terms/ResourceMap"); + Statement state4 = ResourceFactory.createStatement(resource, typePred, typeObj); + model.add(state4); + //TODO: create a creator statement + + } + +} diff --git a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java new file mode 100644 index 000000000..54fcc8904 --- /dev/null +++ b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java @@ -0,0 +1,89 @@ +/** + * '$RCSfile$' + * Copyright: 2019 Regents of the University of California and the + * National Center for Ecological Analysis and Synthesis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package edu.ucsb.nceas.metacat.dataone.resourcemap; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.util.HashMap; + +import org.dataone.service.types.v1.Identifier; + +import edu.ucsb.nceas.MCTestCase; +import junit.framework.Test; +import junit.framework.TestSuite; + +/** + * Junit test class for the class of ResourceMapModifier + * @author tao + * + */ +public class ResourceMapModifierTest extends MCTestCase { + + private static String RESOURCEMAP_FILEPATH = "test/resourcemap-with-prov.xml"; + private static String ORIGINAL_RESOURCEMAP_PID = "urn:uuid:e62c781c-643b-41f3-a0b0-9f6cbd80a708"; + private static String NEW_RESOURCEMAP_PID = "urn:uuid:e62c781c-643b-41f3-a0b0-9f6cbd80a719"; + private static String ORIGNAL_METADATA_PID = "urn:uuid:c0e0d342-7cc1-4eaa-9648-c6d9f7ed8b1f"; + private static String NEW_METADATA_PID = "doi:10.5072/FK27D2ZR71"; + + /** + * Constructor + * @param name + */ + public ResourceMapModifierTest (String name) { + super(name); + } + + /** + * Test suite + * @return + */ + public static Test suite() { + TestSuite suite = new TestSuite(); + suite.addTest(new ResourceMapModifierTest("testReplaceObsoletedIds")); + return suite; + } + + /** + * Test the method of replaceObsoletedIds + * @throws Exception + */ + public void testReplaceObsoletedIds() throws Exception { + File resourceMapFile = new File(RESOURCEMAP_FILEPATH); + assertTrue(resourceMapFile.exists()); + FileInputStream resourceMapInputStream = new FileInputStream(resourceMapFile); + Identifier origin_resourceMap_id = new Identifier(); + origin_resourceMap_id.setValue(ORIGINAL_RESOURCEMAP_PID); + Identifier new_resourceMap_id = new Identifier(); + new_resourceMap_id.setValue(NEW_RESOURCEMAP_PID); + ResourceMapModifier modifier = new ResourceMapModifier(origin_resourceMap_id, new_resourceMap_id); + HashMap obsoletedBys = new HashMap(); + Identifier origin_metadata_id = new Identifier(); + origin_metadata_id.setValue(ORIGNAL_METADATA_PID); + Identifier new_metadata_id = new Identifier(); + new_metadata_id.setValue(NEW_METADATA_PID); + obsoletedBys.put(origin_metadata_id, new_metadata_id); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + modifier.replaceObsoletedIds(obsoletedBys, resourceMapInputStream, out); + String outStr = out.toString("UTF-8"); + System.out.println(outStr); + resourceMapInputStream.close(); + } +} diff --git a/test/resourcemap-with-prov.xml b/test/resourcemap-with-prov.xml new file mode 100644 index 000000000..b4fb837be --- /dev/null +++ b/test/resourcemap-with-prov.xml @@ -0,0 +1,48 @@ + + + urn:uuid:c0e0d342-7cc1-4eaa-9648-c6d9f7ed8b1f + + + + + + + + urn:uuid:e8960a65-8748-4552-b1cf-fdcab171540a + + + + + + + urn:uuid:326e21d5-c961-46ed-a85c-28eeedd980de + + + + + + + + + + + + + + + Jing Tao + + + urn:uuid:e62c781c-643b-41f3-a0b0-9f6cbd80a708 + 2019-07-05T17:37:52.546Z + + + + From 29556f3610f3f88661d4cd2e4271886f96bb6ffc Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 8 Jul 2019 14:40:04 -0700 Subject: [PATCH 03/16] Add code to repace the relationship of documents/isDocumentsBy Ref: https://github.com/NCEAS/metacat/issues/1366 --- .../resourcemap/ResourceMapModifier.java | 148 ++++++++++++++---- 1 file changed, 115 insertions(+), 33 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java index 8f2a19984..149f03bfc 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -28,11 +28,15 @@ import java.util.Date; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.TimeZone; +import java.util.Vector; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dataone.service.types.v1.Identifier; +import org.dataone.vocabulary.CITO; +import org.dataone.vocabulary.DC_TERMS; import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; import com.hp.hpl.jena.rdf.model.Literal; @@ -58,7 +62,8 @@ public class ResourceMapModifier { private final static String DEFAULT_CN_URI = "https://cn.dataone.org/cn"; private final static String SLASH = "/"; private final static String RESOLVE = "cn/v2/resolve/"; - private final static String TERM_NAMESPACE = "http://purl.org/dc/terms/"; + private final static String TERM_NAMESPACE = DC_TERMS.namespace; + private final static String CITO_NAMESPACE = CITO.namespace; private final static String TER_NAMESPACE = "http://www.openarchives.org/ore/terms/"; private final static String RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; @@ -95,7 +100,7 @@ public ResourceMapModifier(Identifier oldResourceMapId, Identifier newResourceMa /** * Create new resource map by replacing obsoleted ids by new ids. - * @param obsoletedBys a map represents the ids' with the obsoletedBy relationship - the keys are the one need to be obsoleted (replaced); value are the new ones need to be used + * @param obsoletedBys a map represents the ids' with the obsoletedBy relationship - the keys are the one need to be obsoleted (replaced); value are the new ones need to be used. They are all science metadata objects * @param originalResourceMap the content of original resource map * @param newResourceMap the place where the created new resource map will be written * @throws UnsupportedEncodingException @@ -106,49 +111,81 @@ public void replaceObsoletedIds(MapobsoletedBys, InputSt //read the RDF/XML file model.read(originalResourceMap, null); //generate a new resource for the new resource map identifier - Resource subject = null; - Property predicate = null; - RDFNode object = null; - Selector selector = new SimpleSelector(subject, predicate, object); - //StmtIterator iterator = model.listStatements(selector); - - Resource originalOre = model.getResource("https://cn.dataone.org/cn/v2/resolve/urn%3Auuid%3Ae62c781c-643b-41f3-a0b0-9f6cbd80a708"); - StmtIterator iterator = originalOre.listProperties(); - while (iterator.hasNext()) { - Statement statement = iterator.nextStatement(); - Resource sub = statement.getSubject(); - System.out.println("the subject is "+sub.getURI());; - Property pred = statement.getPredicate(); - System.out.println("the predicate is "+pred.getLocalName()); - RDFNode obj = statement.getObject(); - System.out.println("the object "+obj.toString()); - if(obj.isResource()) { - Resource res = (Resource)obj; - System.out.println("namespace "+res.getNameSpace()); - System.out.println("local name "+res.getLocalName()); - + generateNewOREId(model); + + //replace ids + if(obsoletedBys != null) { + Set ids = obsoletedBys.keySet(); + for (Identifier obsoletedId : ids) { + Vector needToRemove = new Vector(); + Identifier newId = obsoletedBys.get(obsoletedId); + Resource newResource = getResource(model, newId.getValue()); + if(newResource == null) { + newResource = generateNewComponent(model, newId.getValue()); + } + Resource oldResource = getResource(model, obsoletedId.getValue()); + if(oldResource != null) { + //replace the documents relationship + RDFNode node = null; + Selector selector = new SimpleSelector(oldResource, CITO.documents, node); + StmtIterator iterator = model.listStatements(selector); + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + RDFNode object = statement.getObject(); + //handle the case - oldId documents oldId + if(object.isResource()) { + Resource objResource = (Resource) object; + if(objResource.getURI().equals(oldResource.getURI())) { + object = newResource; + } + } + Statement newStatement = ResourceFactory.createStatement(newResource, CITO.documents, object); + needToRemove.add(statement); + model.add(newStatement); + } + //replace the documentedBy relationship + Resource nullSubject = null; + selector = new SimpleSelector(nullSubject, CITO.isDocumentedBy, oldResource); + iterator = model.listStatements(selector); + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + Resource subject = statement.getSubject(); + //handle the case - oldId isDocumentBy oldId + if(subject.getURI().equals(oldResource.getURI())) { + subject = newResource; + } + Statement newStatement = ResourceFactory.createStatement(subject, CITO.isDocumentedBy, newResource); + needToRemove.add(statement); + model.add(newStatement); + } + //remove those old documents/isDocumentedBy relationships + for(Statement oldStatement : needToRemove) { + model.remove(oldStatement); + } + } } } - //write it to standard out - generateNewOREId(model); model.write(newResourceMap); } - - /* + + /** * This method generates a Resource object for the new ore id in the given model + * @param model the model where the new generated Resource object will be attached + * @return the uri of the generated new Resource object + * @throws UnsupportedEncodingException */ - private void generateNewOREId(Model model) throws UnsupportedEncodingException { - String escaptedNewOreId = URLEncoder.encode(newResourceMapId.getValue(), "UTF-8"); - String uri = baseURI + escaptedNewOreId; + private String generateNewOREId(Model model) throws UnsupportedEncodingException { + String escapedNewOreId = URLEncoder.encode(newResourceMapId.getValue(), "UTF-8"); + String uri = baseURI + escapedNewOreId; Resource resource = model.createResource(uri); //create a identifier property (statement) - Property identifierPred = ResourceFactory.createProperty(TERM_NAMESPACE, "identifier"); + Property identifierPred = DC_TERMS.identifier; Literal identifierObj = ResourceFactory.createPlainLiteral(newResourceMapId.getValue()); Statement state = ResourceFactory.createStatement(resource, identifierPred, identifierObj); model.add(state); //create a modification time statement - Property modificationPred = ResourceFactory.createProperty(TERM_NAMESPACE, "modified"); + Property modificationPred = DC_TERMS.modified; Date date = new Date(); SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.US); format.setTimeZone(TimeZone.getTimeZone("UTC")); @@ -165,8 +202,53 @@ private void generateNewOREId(Model model) throws UnsupportedEncodingException { Resource typeObj = ResourceFactory.createResource("http://www.openarchives.org/ore/terms/ResourceMap"); Statement state4 = ResourceFactory.createStatement(resource, typePred, typeObj); model.add(state4); + return uri; //TODO: create a creator statement - } + + /** + * Create a Resource object for the given id. + * @param model the model where the new Resource object will be attached + * @param id the identifier of the new Resource object will have + * @return the uri of the new generated Resource object + * @throws UnsupportedEncodingException + */ + private Resource generateNewComponent(Model model, String id) throws UnsupportedEncodingException { + String escapedNewId = URLEncoder.encode(id, "UTF-8"); + String uri = baseURI + escapedNewId; + Resource resource = model.createResource(uri); + //create a identifier property (statement) + Property identifierPred = DC_TERMS.identifier; + Literal identifierObj = ResourceFactory.createPlainLiteral(id); + Statement state = ResourceFactory.createStatement(resource, identifierPred, identifierObj); + model.add(state); + return resource; + } + + /** + * Get the Resource object which has the given identifier + * @param model the model where the query will be applied + * @param id the identifier of the Resource object has + * @return the Resource object with the given identifier. It can return null if not found. + */ + private Resource getResource(Model model, String id) { + Resource resource = null; + if(id != null && !id.trim().equals("")) { + Resource subject = null; + Property predicate = DC_TERMS.identifier;; + RDFNode object = ResourceFactory.createPlainLiteral(id); + Selector selector = new SimpleSelector(subject, predicate, object); + StmtIterator iterator = model.listStatements(selector); + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + resource = statement.getSubject(); + if(resource != null) { + break; + } + } + } + return resource; + } + } From d107ec15005d7743ed7091935c44fd93d2529124 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 8 Jul 2019 16:55:06 -0700 Subject: [PATCH 04/16] Add code the handle aggregation relationship. --- .../resourcemap/ResourceMapModifier.java | 111 ++++++++++++++++-- 1 file changed, 102 insertions(+), 9 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java index 149f03bfc..96c1a5b24 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -64,8 +64,9 @@ public class ResourceMapModifier { private final static String RESOLVE = "cn/v2/resolve/"; private final static String TERM_NAMESPACE = DC_TERMS.namespace; private final static String CITO_NAMESPACE = CITO.namespace; - private final static String TER_NAMESPACE = "http://www.openarchives.org/ore/terms/"; + private final static String ORE_TER_NAMESPACE = "http://www.openarchives.org/ore/terms/"; private final static String RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + private final static String AGGREGATION = "#aggregation"; private static Log log = LogFactory.getLog(ResourceMapModifier.class); private Identifier oldResourceMapId = null; @@ -110,10 +111,10 @@ public void replaceObsoletedIds(MapobsoletedBys, InputSt Model model = ModelFactory.createDefaultModel(); //read the RDF/XML file model.read(originalResourceMap, null); - //generate a new resource for the new resource map identifier - generateNewOREId(model); - + //replace ids + Vector oldURIs = new Vector(); //those uris (resource) shouldn't be aggregated into the new ore since they are obsoleted + Vector newURIs = new Vector(); //those uris (resource) should be added into the new aggregation if(obsoletedBys != null) { Set ids = obsoletedBys.keySet(); for (Identifier obsoletedId : ids) { @@ -123,7 +124,9 @@ public void replaceObsoletedIds(MapobsoletedBys, InputSt if(newResource == null) { newResource = generateNewComponent(model, newId.getValue()); } + newURIs.add(newResource.getURI()); Resource oldResource = getResource(model, obsoletedId.getValue()); + oldURIs.add(oldResource.getURI()); if(oldResource != null) { //replace the documents relationship RDFNode node = null; @@ -165,6 +168,11 @@ public void replaceObsoletedIds(MapobsoletedBys, InputSt } } } + + //generate a new resource for the new resource map identifier + Resource newOreResource = generateNewOREResource(model); + Resource oldOreResource = getResource(model,oldResourceMapId.getValue()); + replaceAggregations(model, oldOreResource.getURI(), newOreResource, oldURIs, newURIs); //write it to standard out model.write(newResourceMap); } @@ -172,10 +180,10 @@ public void replaceObsoletedIds(MapobsoletedBys, InputSt /** * This method generates a Resource object for the new ore id in the given model * @param model the model where the new generated Resource object will be attached - * @return the uri of the generated new Resource object + * @return the generated new ORE Resource object * @throws UnsupportedEncodingException */ - private String generateNewOREId(Model model) throws UnsupportedEncodingException { + private Resource generateNewOREResource(Model model) throws UnsupportedEncodingException { String escapedNewOreId = URLEncoder.encode(newResourceMapId.getValue(), "UTF-8"); String uri = baseURI + escapedNewOreId; Resource resource = model.createResource(uri); @@ -193,8 +201,8 @@ private String generateNewOREId(Model model) throws UnsupportedEncodingException Statement state2 = ResourceFactory.createStatement(resource, modificationPred, modificationObj); model.add(state2); //create a describes statement - Property describesPred = ResourceFactory.createProperty(TER_NAMESPACE, "describes"); - Resource describesObj = ResourceFactory.createResource(uri + "#aggregation"); + Property describesPred = ResourceFactory.createProperty(ORE_TER_NAMESPACE, "describes"); + Resource describesObj = ResourceFactory.createResource(uri + AGGREGATION); Statement state3 = ResourceFactory.createStatement(resource, describesPred, describesObj); model.add(state3); //create a type @@ -202,8 +210,92 @@ private String generateNewOREId(Model model) throws UnsupportedEncodingException Resource typeObj = ResourceFactory.createResource("http://www.openarchives.org/ore/terms/ResourceMap"); Statement state4 = ResourceFactory.createStatement(resource, typePred, typeObj); model.add(state4); - return uri; //TODO: create a creator statement + return resource; + } + + /** + * Replace the old aggregation relationship by the new ore id. + * This method will be called after calling generateNewOREResource + * @param model the model will be modified + * @param oldOREUri the uri of the old resource map + * @param newOREResource the uri of the new resource map + * @param oldURIs the uri of old ids shouldn't included in the new aggregation + * @param newURIs the uri of new ids should be added into the new aggregation + */ + private void replaceAggregations(Model model, String oldOREUri, Resource newOREResource, Vector oldURIs, Vector newURIs) { + //create a aggregation resource for the new ore id + Resource newAggregation = ResourceFactory.createResource(newOREResource.getURI() + AGGREGATION); + Property predicate = ResourceFactory.createProperty(ORE_TER_NAMESPACE, "isDescribedBy"); + Statement statement = ResourceFactory.createStatement(newAggregation, predicate, newOREResource); + model.add(statement); + + Vector needToRemove = new Vector(); + Resource oldOreAggregation = model.getResource(oldOREUri+AGGREGATION); + //replace the aggregates relationship + RDFNode node = null; + predicate = ResourceFactory.createProperty(ORE_TER_NAMESPACE, "aggregates"); + Selector selector = new SimpleSelector(oldOreAggregation, predicate, node); + StmtIterator iterator = model.listStatements(selector); + while (iterator.hasNext()) { + Statement aggregatesState = iterator.nextStatement(); + RDFNode object = aggregatesState.getObject(); + needToRemove.add(aggregatesState); + if(object.isResource() && oldURIs != null) { + //the object is an obsoleted id, we don't need to add it into the new aggregation + Resource objResource = (Resource)object; + if(oldURIs.contains(objResource.getURI())) { + continue; + } + } + Statement newStatement = ResourceFactory.createStatement(newAggregation, predicate, object); + model.add(newStatement); + } + //add new ids + if(newURIs != null) { + for(String uri : newURIs) { + Resource newResource = model.getResource(uri); + if(newResource != null) { + Statement newStatement = ResourceFactory.createStatement(newAggregation, predicate, newResource); + model.add(newStatement); + } + } + } + + //replace the documentedBy relationship + Resource nullSubject = null; + predicate = ResourceFactory.createProperty(ORE_TER_NAMESPACE, "isAggregatedBy"); + selector = new SimpleSelector(nullSubject, predicate, oldOreAggregation); + iterator = model.listStatements(selector); + while (iterator.hasNext()) { + Statement aggregatedBystatement = iterator.nextStatement(); + Resource subject = aggregatedBystatement.getSubject(); + needToRemove.add(aggregatedBystatement); + if(subject.isResource() && oldURIs != null) { + //the object is an obsoleted id, we don't need to add it into the new aggregation + Resource subjResource = (Resource)subject; + if(oldURIs.contains(subjResource.getURI())) { + continue; + } + } + Statement newStatement = ResourceFactory.createStatement(subject, predicate, newAggregation); + model.add(newStatement); + } + //add new ids + if(newURIs != null) { + for(String uri : newURIs) { + Resource newResource = model.getResource(uri); + if(newResource != null) { + Statement newStatement = ResourceFactory.createStatement(newResource, predicate, newAggregation); + model.add(newStatement); + } + } + } + + //remove those old aggregates/isAggregatedBy relationships + for(Statement oldStatement : needToRemove) { + model.remove(oldStatement); + } } /** @@ -243,6 +335,7 @@ private Resource getResource(Model model, String id) { Statement statement = iterator.nextStatement(); resource = statement.getSubject(); if(resource != null) { + log.debug("ResourceMapModifier.getResource - get the resource "+resource.getURI()+" with the identifier "+id); break; } } From 67a761481973f12dbeccd991e2cce67c4d6df46f Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 9 Jul 2019 14:21:34 -0700 Subject: [PATCH 05/16] Completed the junit test. Ref: https://github.com/NCEAS/metacat/issues/1366 --- .../resourcemap/ResourceMapModifier.java | 22 +-- .../resourcemap/ResourceMapModifierTest.java | 140 +++++++++++++++++- 2 files changed, 146 insertions(+), 16 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java index 96c1a5b24..6949763fd 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -59,19 +59,19 @@ * */ public class ResourceMapModifier { - private final static String DEFAULT_CN_URI = "https://cn.dataone.org/cn"; - private final static String SLASH = "/"; - private final static String RESOLVE = "cn/v2/resolve/"; - private final static String TERM_NAMESPACE = DC_TERMS.namespace; - private final static String CITO_NAMESPACE = CITO.namespace; - private final static String ORE_TER_NAMESPACE = "http://www.openarchives.org/ore/terms/"; - private final static String RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; - private final static String AGGREGATION = "#aggregation"; + public final static String DEFAULT_CN_URI = "https://cn.dataone.org/cn"; + public final static String SLASH = "/"; + public final static String RESOLVE = "v2/resolve/"; + public final static String TERM_NAMESPACE = DC_TERMS.namespace; + public final static String CITO_NAMESPACE = CITO.namespace; + public final static String ORE_TER_NAMESPACE = "http://www.openarchives.org/ore/terms/"; + public final static String RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + public final static String AGGREGATION = "#aggregation"; private static Log log = LogFactory.getLog(ResourceMapModifier.class); private Identifier oldResourceMapId = null; private Identifier newResourceMapId = null; - private static String baseURI = null; + public static String baseURI = null; static { try { String cnUrl = PropertyService.getProperty("D1Client.CN_URL"); @@ -323,11 +323,11 @@ private Resource generateNewComponent(Model model, String id) throws Unsupported * @param id the identifier of the Resource object has * @return the Resource object with the given identifier. It can return null if not found. */ - private Resource getResource(Model model, String id) { + public static Resource getResource(Model model, String id) { Resource resource = null; if(id != null && !id.trim().equals("")) { Resource subject = null; - Property predicate = DC_TERMS.identifier;; + Property predicate = DC_TERMS.identifier; RDFNode object = ResourceFactory.createPlainLiteral(id); Selector selector = new SimpleSelector(subject, predicate, object); StmtIterator iterator = model.listStatements(selector); diff --git a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java index 54fcc8904..1f9b29845 100644 --- a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java +++ b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java @@ -19,12 +19,28 @@ */ package edu.ucsb.nceas.metacat.dataone.resourcemap; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; +import java.net.URLEncoder; import java.util.HashMap; +import java.util.Vector; import org.dataone.service.types.v1.Identifier; +import org.dataone.vocabulary.CITO; +import org.dataone.vocabulary.DC_TERMS; + +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.rdf.model.Selector; +import com.hp.hpl.jena.rdf.model.SimpleSelector; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; import edu.ucsb.nceas.MCTestCase; import junit.framework.Test; @@ -37,11 +53,15 @@ */ public class ResourceMapModifierTest extends MCTestCase { - private static String RESOURCEMAP_FILEPATH = "test/resourcemap-with-prov.xml"; - private static String ORIGINAL_RESOURCEMAP_PID = "urn:uuid:e62c781c-643b-41f3-a0b0-9f6cbd80a708"; - private static String NEW_RESOURCEMAP_PID = "urn:uuid:e62c781c-643b-41f3-a0b0-9f6cbd80a719"; - private static String ORIGNAL_METADATA_PID = "urn:uuid:c0e0d342-7cc1-4eaa-9648-c6d9f7ed8b1f"; - private static String NEW_METADATA_PID = "doi:10.5072/FK27D2ZR71"; + private static final String RESOURCEMAP_FILEPATH = "test/resourcemap-with-prov.xml"; + private static final String ORIGINAL_RESOURCEMAP_PID = "urn:uuid:e62c781c-643b-41f3-a0b0-9f6cbd80a708"; + private static final String NEW_RESOURCEMAP_PID = "urn:uuid:e62c781c-643b-41f3-a0b0-9f6cbd80a719"; + private static final String ORIGNAL_METADATA_PID = "urn:uuid:c0e0d342-7cc1-4eaa-9648-c6d9f7ed8b1f"; + private static final String NEW_METADATA_PID = "doi:10.5072/FK27D2ZR71"; + private static final String DATA_1_URI = "https://cn.dataone.org/cn/v2/resolve/urn%3Auuid%3A326e21d5-c961-46ed-a85c-28eeedd980de"; + private static final String DATA_1_PID = "urn:uuid:326e21d5-c961-46ed-a85c-28eeedd980de"; + private static final String DATA_2_URI = "https://cn.dataone.org/cn/v2/resolve/urn%3Auuid%3Ae8960a65-8748-4552-b1cf-fdcab171540a"; + private static final String DATA_2_PID = "urn:uuid:e8960a65-8748-4552-b1cf-fdcab171540a"; /** * Constructor @@ -57,15 +77,36 @@ public ResourceMapModifierTest (String name) { */ public static Test suite() { TestSuite suite = new TestSuite(); + suite.addTest(new ResourceMapModifierTest("testGetResource")); suite.addTest(new ResourceMapModifierTest("testReplaceObsoletedIds")); return suite; } + public void testGetResource() throws Exception { + File resourceMapFile = new File(RESOURCEMAP_FILEPATH); + assertTrue(resourceMapFile.exists()); + FileInputStream resourceMapInputStream = new FileInputStream(resourceMapFile); + Model model = ModelFactory.createDefaultModel(); + //read the RDF/XML file + model.read(resourceMapInputStream, null); + Resource resource = ResourceMapModifier.getResource(model, DATA_1_PID); + assertTrue(resource.getURI().equals(DATA_1_URI)); + resource = ResourceMapModifier.getResource(model, DATA_2_PID); + assertTrue(resource.getURI().equals(DATA_2_URI)); + } + /** * Test the method of replaceObsoletedIds * @throws Exception */ public void testReplaceObsoletedIds() throws Exception { + String newMetadataURI = "https://cn.dataone.org/cn/v2/resolve/" + "doi%3A10.5072%2FFK27D2ZR71"; + Vector dataURI = new Vector(); + dataURI.add(DATA_1_URI); + dataURI.add(DATA_2_URI); + dataURI.add(newMetadataURI); + String newOREUri = "https://cn.dataone.org/cn/v2/resolve/" + "urn%3Auuid%3Ae62c781c-643b-41f3-a0b0-9f6cbd80a719"; + String newAggreOREUri = newOREUri + "#aggregation"; File resourceMapFile = new File(RESOURCEMAP_FILEPATH); assertTrue(resourceMapFile.exists()); FileInputStream resourceMapInputStream = new FileInputStream(resourceMapFile); @@ -84,6 +125,95 @@ public void testReplaceObsoletedIds() throws Exception { modifier.replaceObsoletedIds(obsoletedBys, resourceMapInputStream, out); String outStr = out.toString("UTF-8"); System.out.println(outStr); + ByteArrayInputStream in = new ByteArrayInputStream(outStr.getBytes("UTF-8")); + Model model = ModelFactory.createDefaultModel(); + //read the RDF/XML file + model.read(in, null); + //check documents relationship + Resource subject = null; + Property predicate = CITO.documents; + RDFNode object = null; + Selector selector = new SimpleSelector(subject, predicate, object); + StmtIterator iterator = model.listStatements(selector); + int i = 0; + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + subject = statement.getSubject(); + assertTrue(subject.getURI().equals(newMetadataURI)); + object = statement.getObject(); + Resource obj = (Resource) object; + assertTrue(dataURI.contains(obj.getURI())); + i++; + } + assertTrue(i==3); + //check documents relationship + subject = null; + predicate = CITO.isDocumentedBy; + object = null; + selector = new SimpleSelector(subject, predicate, object); + iterator = model.listStatements(selector); + i = 0; + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + subject = statement.getSubject(); + assertTrue(dataURI.contains(subject.getURI())); + object = statement.getObject(); + Resource obj = (Resource) object; + assertTrue(obj.getURI().equals(newMetadataURI)); + i++; + } + assertTrue(i==3); + //check aggregate + subject = null; + predicate = ResourceFactory.createProperty("http://www.openarchives.org/ore/terms/", "aggregates"); + object = null; + selector = new SimpleSelector(subject, predicate, object); + iterator = model.listStatements(selector); + i = 0; + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + subject = statement.getSubject(); + assertTrue(subject.getURI().equals(newAggreOREUri)); + object = statement.getObject(); + Resource obj = (Resource) object; + assertTrue(dataURI.contains(obj.getURI())); + i++; + } + assertTrue(i==3); + //check aggregateBy + subject = null; + predicate = ResourceFactory.createProperty("http://www.openarchives.org/ore/terms/", "isAggregatedBy"); + object = null; + selector = new SimpleSelector(subject, predicate, object); + iterator = model.listStatements(selector); + i = 0; + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + subject = statement.getSubject(); + assertTrue(dataURI.contains(subject.getURI())); + object = statement.getObject(); + Resource obj = (Resource) object; + assertTrue(obj.getURI().equals(newAggreOREUri)); + i++; + } + assertTrue(i==3); + //check the provenance relationship + subject = null; + predicate = ResourceFactory.createProperty("http://www.w3.org/ns/prov#", "wasDerivedFrom"); + object = null; + selector = new SimpleSelector(subject, predicate, object); + iterator = model.listStatements(selector); + i = 0; + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + subject = statement.getSubject(); + assertTrue(subject.getURI().equals(DATA_2_URI)); + object = statement.getObject(); + Resource obj = (Resource) object; + assertTrue(obj.getURI().equals(DATA_1_URI)); + i++; + } + assertTrue(i==1); resourceMapInputStream.close(); } } From 7d8b846dea57fc5d65e0881cce92d8fbd3863f22 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 9 Jul 2019 20:12:33 -0700 Subject: [PATCH 06/16] Modify the publish method to preserver other relationship besides the documents/documentedBy ones. Ref: https://github.com/NCEAS/metacat/issues/1366 --- .../nceas/metacat/dataone/MNodeService.java | 41 ++++++++----------- .../resourcemap/ResourceMapModifier.java | 20 ++++----- .../resourcemap/ResourceMapModifierTest.java | 4 +- 3 files changed, 25 insertions(+), 40 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java index 7ba4c95b6..7954a02bd 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java +++ b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java @@ -158,6 +158,7 @@ import edu.ucsb.nceas.metacat.common.query.EnabledQueryEngines; import edu.ucsb.nceas.metacat.common.query.stream.ContentTypeByteArrayInputStream; import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService; +import edu.ucsb.nceas.metacat.dataone.resourcemap.ResourceMapModifier; import edu.ucsb.nceas.metacat.index.MetacatSolrEngineDescriptionHandler; import edu.ucsb.nceas.metacat.index.MetacatSolrIndex; import edu.ucsb.nceas.metacat.properties.PropertyService; @@ -2158,12 +2159,15 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws logMetacat.warn("No potential ORE map found for: " + potentialOreIdentifier.getValue()+" by the name convention."); // try the SOLR index List potentialOreIdentifiers = this.lookupOreFor(session, originalIdentifier); - if (potentialOreIdentifiers != null) { - for(Identifier id :potentialOreIdentifiers) { + if (potentialOreIdentifiers != null && potentialOreIdentifiers.size() >0) { + int size = potentialOreIdentifiers.size(); + for (int i = size-1; i>=0; i--) { + Identifier id = potentialOreIdentifiers.get(i); if (id != null && id.getValue() != null && !id.getValue().trim().equals("")) { SystemMetadata sys = this.getSystemMetadata(session, id); if(sys != null && sys.getObsoletedBy() == null) { //found the non-obsotetedBy ore document. + System.out.println("============ found the ore map from the list when the index is "+i); potentialOreIdentifier = id; break; } @@ -2182,22 +2186,12 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws if (oreInputStream != null) { logMetacat.info("MNodeService.publish - we find the old ore document "+potentialOreIdentifier+" for the metacat object "+originalIdentifier); Identifier newOreIdentifier = MNodeService.getInstance(request).generateIdentifier(session, MNodeService.UUID_SCHEME, null); - - Map>> resourceMapStructure = ResourceMapFactory.getInstance().parseResourceMap(oreInputStream); - Map> sciMetaMap = resourceMapStructure.get(potentialOreIdentifier); - List dataIdentifiers = sciMetaMap.get(originalIdentifier); - - // reconstruct the ORE with the new identifiers - //the original identifier can be in the data object list, we should replace it if does exist. - if(dataIdentifiers.contains(originalIdentifier)) { - dataIdentifiers.remove(originalIdentifier); - dataIdentifiers.add(newIdentifier); - } - sciMetaMap.remove(originalIdentifier); - sciMetaMap.put(newIdentifier, dataIdentifiers); - - ResourceMap resourceMap = ResourceMapFactory.getInstance().createResourceMap(newOreIdentifier, sciMetaMap); - String resourceMapString = ResourceMapFactory.getInstance().serializeResourceMap(resourceMap); + ResourceMapModifier modifier = new ResourceMapModifier(potentialOreIdentifier, oreInputStream, newOreIdentifier); + Map obsoletedBys = new HashMap(); + obsoletedBys.put(originalIdentifier, newIdentifier); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + modifier.replaceObsoletedIds(obsoletedBys, out); + String resourceMapString = out.toString("UTF-8"); // get the original ORE SM and update the values SystemMetadata originalOreSysMeta = this.getSystemMetadata(session, potentialOreIdentifier); @@ -2219,9 +2213,11 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws oreSysMeta.setSize(BigInteger.valueOf(resourceMapString.getBytes("UTF-8").length)); oreSysMeta.setChecksum(ChecksumUtil.checksum(resourceMapString.getBytes("UTF-8"), oreSysMeta.getChecksum().getAlgorithm())); + Map>> resourceMapStructure = ResourceMapFactory.getInstance().parseResourceMap(new ByteArrayInputStream(resourceMapString.getBytes("UTF-8"))); + Map> sciMetaMap = resourceMapStructure.get(newOreIdentifier); + List dataIdentifiers = sciMetaMap.get(newIdentifier); // ensure ORE is publicly readable - oreSysMeta = makePublicIfNot(oreSysMeta, potentialOreIdentifier); - + oreSysMeta = makePublicIfNot(oreSysMeta, potentialOreIdentifier); // ensure all data objects allow public read List pidsToSync = new ArrayList(); for (Identifier dataId: dataIdentifiers) { @@ -2288,11 +2284,6 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws ServiceFailure sf = new ServiceFailure("1030", e.getMessage()); sf.initCause(e); throw sf; - } catch (ORESerialiserException e) { - // report as service failure - ServiceFailure sf = new ServiceFailure("1030", e.getMessage()); - sf.initCause(e); - throw sf; } catch (NoSuchAlgorithmException e) { // report as service failure ServiceFailure sf = new ServiceFailure("1030", e.getMessage()); diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java index 6949763fd..f853d34b5 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -22,7 +22,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.UnsupportedEncodingException; -import java.net.URL; import java.net.URLEncoder; import java.text.SimpleDateFormat; import java.util.Date; @@ -71,6 +70,7 @@ public class ResourceMapModifier { private static Log log = LogFactory.getLog(ResourceMapModifier.class); private Identifier oldResourceMapId = null; private Identifier newResourceMapId = null; + private Model model = ModelFactory.createDefaultModel(); public static String baseURI = null; static { try { @@ -89,29 +89,23 @@ public class ResourceMapModifier { /** * Constructor * @param oldResourceMapId the identifier of the old resource map which will be modified + * @param originalResourceMap the content of original resource map * @param newResourceMapId the identifier of the new resource map which will be generated */ - public ResourceMapModifier(Identifier oldResourceMapId, Identifier newResourceMapId) { + public ResourceMapModifier(Identifier oldResourceMapId, InputStream originalResourceMap, Identifier newResourceMapId) { this.oldResourceMapId = oldResourceMapId; this.newResourceMapId = newResourceMapId; - - + //read the RDF/XML file + model.read(originalResourceMap, null); } - /** * Create new resource map by replacing obsoleted ids by new ids. * @param obsoletedBys a map represents the ids' with the obsoletedBy relationship - the keys are the one need to be obsoleted (replaced); value are the new ones need to be used. They are all science metadata objects - * @param originalResourceMap the content of original resource map * @param newResourceMap the place where the created new resource map will be written * @throws UnsupportedEncodingException */ - public void replaceObsoletedIds(MapobsoletedBys, InputStream originalResourceMap, OutputStream newResourceMap ) throws UnsupportedEncodingException { - //create an empty model - Model model = ModelFactory.createDefaultModel(); - //read the RDF/XML file - model.read(originalResourceMap, null); - + public void replaceObsoletedIds(MapobsoletedBys, OutputStream newResourceMap ) throws UnsupportedEncodingException { //replace ids Vector oldURIs = new Vector(); //those uris (resource) shouldn't be aggregated into the new ore since they are obsoleted Vector newURIs = new Vector(); //those uris (resource) should be added into the new aggregation @@ -176,7 +170,7 @@ public void replaceObsoletedIds(MapobsoletedBys, InputSt //write it to standard out model.write(newResourceMap); } - + /** * This method generates a Resource object for the new ore id in the given model * @param model the model where the new generated Resource object will be attached diff --git a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java index 1f9b29845..4be150d88 100644 --- a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java +++ b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java @@ -114,7 +114,7 @@ public void testReplaceObsoletedIds() throws Exception { origin_resourceMap_id.setValue(ORIGINAL_RESOURCEMAP_PID); Identifier new_resourceMap_id = new Identifier(); new_resourceMap_id.setValue(NEW_RESOURCEMAP_PID); - ResourceMapModifier modifier = new ResourceMapModifier(origin_resourceMap_id, new_resourceMap_id); + ResourceMapModifier modifier = new ResourceMapModifier(origin_resourceMap_id, resourceMapInputStream, new_resourceMap_id); HashMap obsoletedBys = new HashMap(); Identifier origin_metadata_id = new Identifier(); origin_metadata_id.setValue(ORIGNAL_METADATA_PID); @@ -122,7 +122,7 @@ public void testReplaceObsoletedIds() throws Exception { new_metadata_id.setValue(NEW_METADATA_PID); obsoletedBys.put(origin_metadata_id, new_metadata_id); ByteArrayOutputStream out = new ByteArrayOutputStream(); - modifier.replaceObsoletedIds(obsoletedBys, resourceMapInputStream, out); + modifier.replaceObsoletedIds(obsoletedBys, out); String outStr = out.toString("UTF-8"); System.out.println(outStr); ByteArrayInputStream in = new ByteArrayInputStream(outStr.getBytes("UTF-8")); From 3d19af19ee69b910993ee2b42483aa2d4b03415d Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 10 Jul 2019 10:21:16 -0700 Subject: [PATCH 07/16] Add a new method to get the data object ids list for the given metadata id. Ref: https://github.com/NCEAS/metacat/issues/1366 --- .../resourcemap/ResourceMapModifier.java | 40 +++++++++++++++++++ .../resourcemap/ResourceMapModifierTest.java | 21 ++++++---- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java index f853d34b5..612a1f678 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -24,7 +24,9 @@ import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Date; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; @@ -337,5 +339,43 @@ public static Resource getResource(Model model, String id) { return resource; } + + /** + * Get all subjects of the triple - * is documentedBy metadataId on the resource map + * @param metadataId the id of object on the triple (it always be a metadata id). If it is null, it will be anything. + * @return the all the identifiers of the subjects match the query + */ + public List getSubjectsOfDocumentedBy(Identifier metadataId) { + List subjects = new ArrayList(); + Resource nullSubject = null; + Resource object = null; + String objectId = null; + if(metadataId != null) { + objectId = metadataId.getValue(); + object = getResource(model, objectId); + log.debug("ResourceMapModifier.getSubjectsOfDocumentedBy - the object's uri is " + object.getURI() + " for the id " + objectId); + } + Selector selector = new SimpleSelector(nullSubject, CITO.isDocumentedBy, object); + StmtIterator iterator = model.listStatements(selector); + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + Resource subject = statement.getSubject(); + Statement idStatement = subject.getProperty(DC_TERMS.identifier); + RDFNode idResource = idStatement.getObject(); + log.debug("ResourceMapModifier.getSubjectsOfDocumentedBy - get the identifier RDF " + idResource.toString() + " . Is the RDF literal? " + idResource.isLiteral()); + if (idResource != null && idResource.isLiteral()) { + Literal idValue = (Literal) idResource; + String idStr = idValue.getString(); + if(idStr != null) { + log.debug("ResourceMapModifier.getSubjectsOfDocumentedBy - add the " + idStr + " into the return list for given metadata id " + objectId); + Identifier identifier = new Identifier(); + identifier.setValue(idStr); + subjects.add(identifier); + } + } + } + return subjects; + } + } diff --git a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java index 4be150d88..b3e409d67 100644 --- a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java +++ b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java @@ -23,13 +23,12 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; -import java.net.URLEncoder; import java.util.HashMap; +import java.util.List; import java.util.Vector; import org.dataone.service.types.v1.Identifier; import org.dataone.vocabulary.CITO; -import org.dataone.vocabulary.DC_TERMS; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; @@ -145,7 +144,7 @@ public void testReplaceObsoletedIds() throws Exception { assertTrue(dataURI.contains(obj.getURI())); i++; } - assertTrue(i==3); + assertTrue(i == 3); //check documents relationship subject = null; predicate = CITO.isDocumentedBy; @@ -162,7 +161,7 @@ public void testReplaceObsoletedIds() throws Exception { assertTrue(obj.getURI().equals(newMetadataURI)); i++; } - assertTrue(i==3); + assertTrue(i == 3); //check aggregate subject = null; predicate = ResourceFactory.createProperty("http://www.openarchives.org/ore/terms/", "aggregates"); @@ -179,7 +178,7 @@ public void testReplaceObsoletedIds() throws Exception { assertTrue(dataURI.contains(obj.getURI())); i++; } - assertTrue(i==3); + assertTrue(i == 3); //check aggregateBy subject = null; predicate = ResourceFactory.createProperty("http://www.openarchives.org/ore/terms/", "isAggregatedBy"); @@ -196,7 +195,7 @@ public void testReplaceObsoletedIds() throws Exception { assertTrue(obj.getURI().equals(newAggreOREUri)); i++; } - assertTrue(i==3); + assertTrue(i == 3); //check the provenance relationship subject = null; predicate = ResourceFactory.createProperty("http://www.w3.org/ns/prov#", "wasDerivedFrom"); @@ -213,7 +212,15 @@ public void testReplaceObsoletedIds() throws Exception { assertTrue(obj.getURI().equals(DATA_1_URI)); i++; } - assertTrue(i==1); + assertTrue(i == 1); + //Test the method of getSubjectsOfDocumentedBy + List dataFileIds = modifier.getSubjectsOfDocumentedBy(new_metadata_id); + assertTrue(dataFileIds.size() == 3); + for(Identifier id : dataFileIds) { + assertTrue(id.getValue().equals(DATA_1_PID) || id.getValue().equals(DATA_2_PID) || id.getValue().equals(NEW_METADATA_PID)); + } resourceMapInputStream.close(); } + + } From 8e928771d44db04d27b87e94a9f332fe37038117 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 10 Jul 2019 12:13:13 -0700 Subject: [PATCH 08/16] Using a new method to get data ids. --- .../nceas/metacat/dataone/MNodeService.java | 32 ++++++------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java index 7954a02bd..5fdba224f 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java +++ b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java @@ -2213,18 +2213,21 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws oreSysMeta.setSize(BigInteger.valueOf(resourceMapString.getBytes("UTF-8").length)); oreSysMeta.setChecksum(ChecksumUtil.checksum(resourceMapString.getBytes("UTF-8"), oreSysMeta.getChecksum().getAlgorithm())); - Map>> resourceMapStructure = ResourceMapFactory.getInstance().parseResourceMap(new ByteArrayInputStream(resourceMapString.getBytes("UTF-8"))); - Map> sciMetaMap = resourceMapStructure.get(newOreIdentifier); - List dataIdentifiers = sciMetaMap.get(newIdentifier); // ensure ORE is publicly readable oreSysMeta = makePublicIfNot(oreSysMeta, potentialOreIdentifier); + List dataIdentifiers = modifier.getSubjectsOfDocumentedBy(newIdentifier); // ensure all data objects allow public read List pidsToSync = new ArrayList(); for (Identifier dataId: dataIdentifiers) { - SystemMetadata dataSysMeta = this.getSystemMetadata(session, dataId); - dataSysMeta = makePublicIfNot(dataSysMeta, dataId); - this.updateSystemMetadata(dataSysMeta); - pidsToSync.add(dataId.getValue()); + try { + SystemMetadata dataSysMeta = this.getSystemMetadata(session, dataId); + dataSysMeta = makePublicIfNot(dataSysMeta, dataId); + this.updateSystemMetadata(dataSysMeta); + pidsToSync.add(dataId.getValue()); + } catch (Exception e) { + // ignore + logMetacat.warn("Error attempting to set data object " + dataId.getValue() + " public readable when publishing package"); + } } SyncAccessPolicy sap = new SyncAccessPolicy(); try { @@ -2269,21 +2272,6 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws ServiceFailure sf = new ServiceFailure("1030", e.getMessage()); sf.initCause(e); throw sf; - } catch (OREException e) { - // report as service failure - ServiceFailure sf = new ServiceFailure("1030", e.getMessage()); - sf.initCause(e); - throw sf; - } catch (URISyntaxException e) { - // report as service failure - ServiceFailure sf = new ServiceFailure("1030", e.getMessage()); - sf.initCause(e); - throw sf; - } catch (OREParserException e) { - // report as service failure - ServiceFailure sf = new ServiceFailure("1030", e.getMessage()); - sf.initCause(e); - throw sf; } catch (NoSuchAlgorithmException e) { // report as service failure ServiceFailure sf = new ServiceFailure("1030", e.getMessage()); From 5a22b7bfa6d902f4ef00e79dbc92358178f5eb6e Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 10 Jul 2019 13:53:54 -0700 Subject: [PATCH 09/16] Set a file name on the system metadata of the new generated rdf. Delete the old rdf. --- .../nceas/metacat/dataone/MNodeService.java | 3 ++- .../resourcemap/ResourceMapModifier.java | 22 ++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java index 5fdba224f..d3b5e6fd2 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java +++ b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java @@ -2167,7 +2167,7 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws SystemMetadata sys = this.getSystemMetadata(session, id); if(sys != null && sys.getObsoletedBy() == null) { //found the non-obsotetedBy ore document. - System.out.println("============ found the ore map from the list when the index is "+i); + logMetacat.debug("MNodeService.publish - found the ore map from the list when the index is " + i); potentialOreIdentifier = id; break; } @@ -2212,6 +2212,7 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws oreSysMeta.setObsoletedBy(null); oreSysMeta.setSize(BigInteger.valueOf(resourceMapString.getBytes("UTF-8").length)); oreSysMeta.setChecksum(ChecksumUtil.checksum(resourceMapString.getBytes("UTF-8"), oreSysMeta.getChecksum().getAlgorithm())); + oreSysMeta.setFileName("resourceMap_" + newOreIdentifier.getValue() + ".rdf.xml"); // ensure ORE is publicly readable oreSysMeta = makePublicIfNot(oreSysMeta, potentialOreIdentifier); diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java index 612a1f678..c71e8fed3 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -168,7 +168,7 @@ public void replaceObsoletedIds(MapobsoletedBys, Output //generate a new resource for the new resource map identifier Resource newOreResource = generateNewOREResource(model); Resource oldOreResource = getResource(model,oldResourceMapId.getValue()); - replaceAggregations(model, oldOreResource.getURI(), newOreResource, oldURIs, newURIs); + replaceAggregations(model, oldOreResource, newOreResource, oldURIs, newURIs); //write it to standard out model.write(newResourceMap); } @@ -214,12 +214,12 @@ private Resource generateNewOREResource(Model model) throws UnsupportedEncodingE * Replace the old aggregation relationship by the new ore id. * This method will be called after calling generateNewOREResource * @param model the model will be modified - * @param oldOREUri the uri of the old resource map - * @param newOREResource the uri of the new resource map + * @param oldOREResource the Resource object of the old ore + * @param newOREResource the Resource object the new ore * @param oldURIs the uri of old ids shouldn't included in the new aggregation * @param newURIs the uri of new ids should be added into the new aggregation */ - private void replaceAggregations(Model model, String oldOREUri, Resource newOREResource, Vector oldURIs, Vector newURIs) { + private void replaceAggregations(Model model, Resource oldOREResource, Resource newOREResource, Vector oldURIs, Vector newURIs) { //create a aggregation resource for the new ore id Resource newAggregation = ResourceFactory.createResource(newOREResource.getURI() + AGGREGATION); Property predicate = ResourceFactory.createProperty(ORE_TER_NAMESPACE, "isDescribedBy"); @@ -227,11 +227,11 @@ private void replaceAggregations(Model model, String oldOREUri, Resource newORER model.add(statement); Vector needToRemove = new Vector(); - Resource oldOreAggregation = model.getResource(oldOREUri+AGGREGATION); + Resource oldOreAggregation = model.getResource(oldOREResource.getURI()+AGGREGATION); //replace the aggregates relationship - RDFNode node = null; + final RDFNode nullNode = null; predicate = ResourceFactory.createProperty(ORE_TER_NAMESPACE, "aggregates"); - Selector selector = new SimpleSelector(oldOreAggregation, predicate, node); + Selector selector = new SimpleSelector(oldOreAggregation, predicate, nullNode); StmtIterator iterator = model.listStatements(selector); while (iterator.hasNext()) { Statement aggregatesState = iterator.nextStatement(); @@ -258,7 +258,7 @@ private void replaceAggregations(Model model, String oldOREUri, Resource newORER } } - //replace the documentedBy relationship + //replace the isAggregatedBy relationship Resource nullSubject = null; predicate = ResourceFactory.createProperty(ORE_TER_NAMESPACE, "isAggregatedBy"); selector = new SimpleSelector(nullSubject, predicate, oldOreAggregation); @@ -292,6 +292,12 @@ private void replaceAggregations(Model model, String oldOREUri, Resource newORER for(Statement oldStatement : needToRemove) { model.remove(oldStatement); } + Selector delSelector = new SimpleSelector(oldOreAggregation, null, nullNode); + StmtIterator delIterator = model.listStatements(delSelector); + model.remove(delIterator); + delSelector = new SimpleSelector(oldOREResource, null, nullNode); + delIterator = model.listStatements(delSelector); + model.remove(delIterator); } /** From 8b919afd4ece82aa0e67e8db461a35eabbea8719 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Thu, 11 Jul 2019 09:14:57 -0700 Subject: [PATCH 10/16] Change the version to 2.10.4. --- README.md | 7 ++++++- build.properties | 2 +- build.xml | 2 +- lib/metacat.properties | 3 ++- metacat-common/pom.xml | 2 +- metacat-index/pom.xml | 4 ++-- pom.xml | 4 ++-- src/loaddtdschema-postgres.sql | 2 +- src/upgrade-db-to-2.10.4-postgres.sql | 12 ++++++++++++ 9 files changed, 28 insertions(+), 10 deletions(-) create mode 100644 src/upgrade-db-to-2.10.4-postgres.sql diff --git a/README.md b/README.md index ab72cfab1..6fbee1668 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Metacat: Data Preservation and Discovery System -Version: 2.10.3 Release +Version: 2.10.4 Release Send feedback and bugs to: metacat-dev@ecoinformatics.org http://github.com/NCEAS/metacat @@ -67,6 +67,11 @@ for the next release. ## Release Notes +### Release Notes for 2.10.4: +Bugs fixed in this release: +* Publishing dataset with provenance loses all provenance relationships +* Publish call fails after adding provenance information + ### Release Notes for 2.10.3: New features and bugs fixed in this release: * Update DOI registrations for EML objects with complete metadata diff --git a/build.properties b/build.properties index 326f3e512..87ddeb64c 100755 --- a/build.properties +++ b/build.properties @@ -2,7 +2,7 @@ #Version of this build. This needs to be a dotted numeric version. For #instance 1.9.1 is okay. 1.9.1_rc1 is not. -metacat.version=2.10.3 +metacat.version=2.10.4 #This is for packaging purposes. leave it blank for final production release. metacat.releaseCandidate= diff --git a/build.xml b/build.xml index 93a00e2e3..759177f5e 100755 --- a/build.xml +++ b/build.xml @@ -32,7 +32,7 @@ - + diff --git a/lib/metacat.properties b/lib/metacat.properties index 194318618..291025cb7 100755 --- a/lib/metacat.properties +++ b/lib/metacat.properties @@ -25,7 +25,7 @@ server.internalPort=80 ############### Application Values ############ ## one of the few places where we use ANT tokens -application.metacatVersion=2.10.3 +application.metacatVersion=2.10.4 application.metacatReleaseInfo=-1 application.readOnlyMode=false @@ -113,6 +113,7 @@ database.upgradeVersion.2.10.0=upgrade-db-to-2.10.0 database.upgradeVersion.2.10.1=upgrade-db-to-2.10.1 database.upgradeVersion.2.10.2=upgrade-db-to-2.10.2 database.upgradeVersion.2.10.3=upgrade-db-to-2.10.3 +database.upgradeVersion.2.10.4=upgrade-db-to-2.10.4 ## for running java-based utilities database.upgradeUtility.1.5.0=edu.ucsb.nceas.metacat.admin.upgrade.Upgrade1_5_0 database.upgradeUtility.2.0.0=edu.ucsb.nceas.metacat.admin.upgrade.Upgrade2_0_0 diff --git a/metacat-common/pom.xml b/metacat-common/pom.xml index 5db1e783c..ab0c571c2 100644 --- a/metacat-common/pom.xml +++ b/metacat-common/pom.xml @@ -4,7 +4,7 @@ edu.ucsb.nceas.metacat.common metacat-common jar - 2.10.3 + 2.10.4 metacat-common http://maven.apache.org diff --git a/metacat-index/pom.xml b/metacat-index/pom.xml index 2a6ea017f..02d88fb69 100644 --- a/metacat-index/pom.xml +++ b/metacat-index/pom.xml @@ -4,13 +4,13 @@ edu.ucsb.nceas.metacat.index metacat-index war - 2.10.3 + 2.10.4 metacat-index http://maven.apache.org 2.3.8 - 2.10.3 + 2.10.4 diff --git a/pom.xml b/pom.xml index 77c222f2c..8be55a62d 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ 4.0.0 org.ecoinformatics metacat - 2.10.3 + 2.10.4 metacat war http://maven.apache.org @@ -12,7 +12,7 @@ UTF-8 2.3.1 2.3.1 - 2.10.3 + 2.10.4 diff --git a/src/loaddtdschema-postgres.sql b/src/loaddtdschema-postgres.sql index 3a91ad744..aa25d5b01 100755 --- a/src/loaddtdschema-postgres.sql +++ b/src/loaddtdschema-postgres.sql @@ -206,4 +206,4 @@ INSERT INTO xml_catalog (entry_type, public_id, format_id, system_id) INSERT INTO xml_catalog (entry_type, public_id, system_id) SELECT 'Schema', 'http://www.openarchives.org/OAI/2.0/oai_dc/', '/schema/oai_dc/oai_dc.xsd' WHERE NOT EXISTS (SELECT * FROM xml_catalog WHERE public_id='http://www.openarchives.org/OAI/2.0/oai_dc/'); INSERT INTO db_version (version, status, date_created) - VALUES ('2.10.3',1,CURRENT_DATE); + VALUES ('2.10.4',1,CURRENT_DATE); diff --git a/src/upgrade-db-to-2.10.4-postgres.sql b/src/upgrade-db-to-2.10.4-postgres.sql new file mode 100644 index 000000000..959b05966 --- /dev/null +++ b/src/upgrade-db-to-2.10.4-postgres.sql @@ -0,0 +1,12 @@ +/* + * Ensure xml_catalog sequence is at table max + */ +SELECT setval('xml_catalog_id_seq', (SELECT max(catalog_id) from xml_catalog)); + +/* + * update the database version + */ +UPDATE db_version SET status=0; + +INSERT INTO db_version (version, status, date_created) + VALUES ('2.10.4', 1, CURRENT_DATE); From e5295d04067d63613221ff096353b37696555aa3 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Thu, 11 Jul 2019 12:09:21 -0700 Subject: [PATCH 11/16] Add the creator trip on the resource map resource. --- .../nceas/metacat/dataone/MNodeService.java | 2 +- .../resourcemap/ResourceMapModifier.java | 48 +++++++++++++++---- .../resourcemap/ResourceMapModifierTest.java | 5 +- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java index d3b5e6fd2..3f6c7b7ac 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java +++ b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java @@ -2190,7 +2190,7 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws Map obsoletedBys = new HashMap(); obsoletedBys.put(originalIdentifier, newIdentifier); ByteArrayOutputStream out = new ByteArrayOutputStream(); - modifier.replaceObsoletedIds(obsoletedBys, out); + modifier.replaceObsoletedIds(obsoletedBys, out, session.getSubject()); String resourceMapString = out.toString("UTF-8"); // get the original ORE SM and update the values diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java index c71e8fed3..f476f3176 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -36,10 +36,12 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dataone.service.types.v1.Identifier; +import org.dataone.service.types.v1.Subject; import org.dataone.vocabulary.CITO; import org.dataone.vocabulary.DC_TERMS; import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.rdf.model.AnonId; import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; @@ -105,9 +107,10 @@ public ResourceMapModifier(Identifier oldResourceMapId, InputStream originalReso * Create new resource map by replacing obsoleted ids by new ids. * @param obsoletedBys a map represents the ids' with the obsoletedBy relationship - the keys are the one need to be obsoleted (replaced); value are the new ones need to be used. They are all science metadata objects * @param newResourceMap the place where the created new resource map will be written + * @param subject the subject who generates the resource map * @throws UnsupportedEncodingException */ - public void replaceObsoletedIds(MapobsoletedBys, OutputStream newResourceMap ) throws UnsupportedEncodingException { + public void replaceObsoletedIds(MapobsoletedBys, OutputStream newResourceMap, Subject subject) throws UnsupportedEncodingException { //replace ids Vector oldURIs = new Vector(); //those uris (resource) shouldn't be aggregated into the new ore since they are obsoleted Vector newURIs = new Vector(); //those uris (resource) should be added into the new aggregation @@ -148,12 +151,12 @@ public void replaceObsoletedIds(MapobsoletedBys, Output iterator = model.listStatements(selector); while (iterator.hasNext()) { Statement statement = iterator.nextStatement(); - Resource subject = statement.getSubject(); + Resource subj = statement.getSubject(); //handle the case - oldId isDocumentBy oldId - if(subject.getURI().equals(oldResource.getURI())) { - subject = newResource; + if(subj.getURI().equals(oldResource.getURI())) { + subj = newResource; } - Statement newStatement = ResourceFactory.createStatement(subject, CITO.isDocumentedBy, newResource); + Statement newStatement = ResourceFactory.createStatement(subj, CITO.isDocumentedBy, newResource); needToRemove.add(statement); model.add(newStatement); } @@ -166,20 +169,23 @@ public void replaceObsoletedIds(MapobsoletedBys, Output } //generate a new resource for the new resource map identifier - Resource newOreResource = generateNewOREResource(model); + Resource newOreResource = generateNewOREResource(model, subject); Resource oldOreResource = getResource(model,oldResourceMapId.getValue()); replaceAggregations(model, oldOreResource, newOreResource, oldURIs, newURIs); //write it to standard out model.write(newResourceMap); } + + /** * This method generates a Resource object for the new ore id in the given model * @param model the model where the new generated Resource object will be attached + * @param subject name of the creator of this resource map * @return the generated new ORE Resource object * @throws UnsupportedEncodingException */ - private Resource generateNewOREResource(Model model) throws UnsupportedEncodingException { + private Resource generateNewOREResource(Model model, Subject subject) throws UnsupportedEncodingException { String escapedNewOreId = URLEncoder.encode(newResourceMapId.getValue(), "UTF-8"); String uri = baseURI + escapedNewOreId; Resource resource = model.createResource(uri); @@ -206,9 +212,35 @@ private Resource generateNewOREResource(Model model) throws UnsupportedEncodingE Resource typeObj = ResourceFactory.createResource("http://www.openarchives.org/ore/terms/ResourceMap"); Statement state4 = ResourceFactory.createStatement(resource, typePred, typeObj); model.add(state4); - //TODO: create a creator statement + //create a creator statement + Property creator = ResourceFactory.createProperty("http://purl.org/dc/elements/1.1/", "creator"); + Resource agent = generateAgentResource(subject); + Statement creatorState = ResourceFactory.createStatement(resource, creator, agent); + model.add(creatorState); return resource; } + + /** + * Generate an agent resource + * @param subject the name of the agent resource + * @return the agent resource + */ + private Resource generateAgentResource(Subject subject) { + String name = "Metacat"; + if (subject != null && subject.getValue() != null && !subject.getValue().trim().equals("")) { + name = subject.getValue(); + } + Resource creator = model.createResource(AnonId.create()); + Property type = ResourceFactory.createProperty(RDF_NAMESPACE, "type"); + Resource typeObj = ResourceFactory.createResource("http://purl.org/dc/terms/Agent"); + Statement statement = ResourceFactory.createStatement(creator, type, typeObj); + model.add(statement); + Property namePred = ResourceFactory.createProperty("http://xmlns.com/foaf/0.1/", "name"); + Literal nameObj = ResourceFactory.createPlainLiteral(name); + Statement nameState = ResourceFactory.createStatement(creator, namePred, nameObj); + model.add(nameState); + return creator; + } /** * Replace the old aggregation relationship by the new ore id. diff --git a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java index b3e409d67..6d806689c 100644 --- a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java +++ b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java @@ -28,6 +28,7 @@ import java.util.Vector; import org.dataone.service.types.v1.Identifier; +import org.dataone.service.types.v1.Subject; import org.dataone.vocabulary.CITO; import com.hp.hpl.jena.rdf.model.Model; @@ -121,7 +122,9 @@ public void testReplaceObsoletedIds() throws Exception { new_metadata_id.setValue(NEW_METADATA_PID); obsoletedBys.put(origin_metadata_id, new_metadata_id); ByteArrayOutputStream out = new ByteArrayOutputStream(); - modifier.replaceObsoletedIds(obsoletedBys, out); + Subject subj = new Subject(); + subj.setValue("foo"); + modifier.replaceObsoletedIds(obsoletedBys, out, subj); String outStr = out.toString("UTF-8"); System.out.println(outStr); ByteArrayInputStream in = new ByteArrayInputStream(outStr.getBytes("UTF-8")); From 19adacc491010e9af33bd1b88b8bb38324420da5 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Thu, 11 Jul 2019 13:57:12 -0700 Subject: [PATCH 12/16] Add code to avoid adding the duplicated agent if it already exists. --- .../resourcemap/ResourceMapModifier.java | 41 +++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java index f476f3176..87e550aa0 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -70,6 +70,7 @@ public class ResourceMapModifier { public final static String ORE_TER_NAMESPACE = "http://www.openarchives.org/ore/terms/"; public final static String RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; public final static String AGGREGATION = "#aggregation"; + public final static String AGENT_URI = "http://purl.org/dc/terms/Agent"; private static Log log = LogFactory.getLog(ResourceMapModifier.class); private Identifier oldResourceMapId = null; @@ -221,7 +222,7 @@ private Resource generateNewOREResource(Model model, Subject subject) throws Uns } /** - * Generate an agent resource + * Generate an agent resource. If there is an existing agent with the same name and the type, the existing agent will be returned. * @param subject the name of the agent resource * @return the agent resource */ @@ -230,15 +231,41 @@ private Resource generateAgentResource(Subject subject) { if (subject != null && subject.getValue() != null && !subject.getValue().trim().equals("")) { name = subject.getValue(); } - Resource creator = model.createResource(AnonId.create()); + Resource creator = null; + //the type should be http://purl.org/dc/terms/Agent Property type = ResourceFactory.createProperty(RDF_NAMESPACE, "type"); - Resource typeObj = ResourceFactory.createResource("http://purl.org/dc/terms/Agent"); - Statement statement = ResourceFactory.createStatement(creator, type, typeObj); - model.add(statement); + Resource typeObj = ResourceFactory.createResource(AGENT_URI); + //check if the agent with the same name already exists + Resource nullSubject = null; Property namePred = ResourceFactory.createProperty("http://xmlns.com/foaf/0.1/", "name"); Literal nameObj = ResourceFactory.createPlainLiteral(name); - Statement nameState = ResourceFactory.createStatement(creator, namePred, nameObj); - model.add(nameState); + Selector selector = new SimpleSelector(nullSubject, namePred, nameObj); + StmtIterator iterator = model.listStatements(selector); + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + Resource subj = statement.getSubject(); + if(subj != null) { + //find an existing resource which has the same name + //then we need check the resource map has the type - http://purl.org/dc/terms/Agent + log.debug("ResourceMapModifier.generateAgentResource - found an existing agent with the name " + name + ". But we need to check if it has the type - http://purl.org/dc/terms/Agent."); + if(subj.hasProperty(type)) { + log.debug("ResourceMapModifier.generateAgentResource - found an existing agent with the name " + name + " and it has the type we want."); + creator = subj; + break; + } + } + } + if( creator == null ) { + log.debug("ResourceMapModifier.generateAgentResource - didn't find an existing agent with the name " + name + " and a new one will be generated."); + //we didn't find an existing agent with the same name, so generate a new one + creator = model.createResource(AnonId.create()); + //set the type + Statement statement = ResourceFactory.createStatement(creator, type, typeObj); + model.add(statement); + //set the name + Statement nameState = ResourceFactory.createStatement(creator, namePred, nameObj); + model.add(nameState); + } return creator; } From 644ad3562d82c1fbca3cfde8b44fe544142466e0 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Thu, 11 Jul 2019 14:31:11 -0700 Subject: [PATCH 13/16] Remove the instruction to reindex the solr since this release doesn't change anything on the schema file. --- lib/admin/metacat-configuration.jsp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/admin/metacat-configuration.jsp b/lib/admin/metacat-configuration.jsp index 23e8f9611..c6dcfa73b 100755 --- a/lib/admin/metacat-configuration.jsp +++ b/lib/admin/metacat-configuration.jsp @@ -349,8 +349,6 @@ %>
Configuration of Metacat is complete. Please restart Tomcat so that the webapps are initialized with these settings. Note that this may take some time while the system initializes with the new configuration values.
-
All objects on this server need to be re-indexed because this Metacat release modified the schema file of the Solr server. - The details of re-indexing can be found in the documentation.
<% } } From 4253aa5aac0aa06e6d22fb0b30790edc7e2543a9 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 12 Jul 2019 11:17:36 -0700 Subject: [PATCH 14/16] In the publish method, don't swallow the exception if making data file components public readable fails. --- .../ucsb/nceas/metacat/dataone/MNodeService.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java index 3f6c7b7ac..216920905 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java +++ b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java @@ -2166,7 +2166,7 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws if (id != null && id.getValue() != null && !id.getValue().trim().equals("")) { SystemMetadata sys = this.getSystemMetadata(session, id); if(sys != null && sys.getObsoletedBy() == null) { - //found the non-obsotetedBy ore document. + //found the non-obsoletedBy ore document. logMetacat.debug("MNodeService.publish - found the ore map from the list when the index is " + i); potentialOreIdentifier = id; break; @@ -2220,15 +2220,11 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws // ensure all data objects allow public read List pidsToSync = new ArrayList(); for (Identifier dataId: dataIdentifiers) { - try { - SystemMetadata dataSysMeta = this.getSystemMetadata(session, dataId); - dataSysMeta = makePublicIfNot(dataSysMeta, dataId); - this.updateSystemMetadata(dataSysMeta); - pidsToSync.add(dataId.getValue()); - } catch (Exception e) { - // ignore - logMetacat.warn("Error attempting to set data object " + dataId.getValue() + " public readable when publishing package"); - } + SystemMetadata dataSysMeta = this.getSystemMetadata(session, dataId); + dataSysMeta = makePublicIfNot(dataSysMeta, dataId); + this.updateSystemMetadata(dataSysMeta); + pidsToSync.add(dataId.getValue()); + } SyncAccessPolicy sap = new SyncAccessPolicy(); try { From 6bd6fa5eaf33a48c36288b94abc13a10456ab20e Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 12 Jul 2019 11:33:08 -0700 Subject: [PATCH 15/16] Change some format issues. --- .../resourcemap/ResourceMapModifier.java | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java index 87e550aa0..768258db5 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -80,7 +80,7 @@ public class ResourceMapModifier { static { try { String cnUrl = PropertyService.getProperty("D1Client.CN_URL"); - if(cnUrl.endsWith(SLASH)) { + if (cnUrl.endsWith(SLASH)) { baseURI = cnUrl + RESOLVE; } else { baseURI = cnUrl + SLASH + RESOLVE; @@ -115,19 +115,19 @@ public void replaceObsoletedIds(MapobsoletedBys, Output //replace ids Vector oldURIs = new Vector(); //those uris (resource) shouldn't be aggregated into the new ore since they are obsoleted Vector newURIs = new Vector(); //those uris (resource) should be added into the new aggregation - if(obsoletedBys != null) { + if (obsoletedBys != null) { Set ids = obsoletedBys.keySet(); for (Identifier obsoletedId : ids) { Vector needToRemove = new Vector(); Identifier newId = obsoletedBys.get(obsoletedId); Resource newResource = getResource(model, newId.getValue()); - if(newResource == null) { + if (newResource == null) { newResource = generateNewComponent(model, newId.getValue()); } newURIs.add(newResource.getURI()); Resource oldResource = getResource(model, obsoletedId.getValue()); oldURIs.add(oldResource.getURI()); - if(oldResource != null) { + if (oldResource != null) { //replace the documents relationship RDFNode node = null; Selector selector = new SimpleSelector(oldResource, CITO.documents, node); @@ -136,9 +136,9 @@ public void replaceObsoletedIds(MapobsoletedBys, Output Statement statement = iterator.nextStatement(); RDFNode object = statement.getObject(); //handle the case - oldId documents oldId - if(object.isResource()) { + if (object.isResource()) { Resource objResource = (Resource) object; - if(objResource.getURI().equals(oldResource.getURI())) { + if (objResource.getURI().equals(oldResource.getURI())) { object = newResource; } } @@ -154,7 +154,7 @@ public void replaceObsoletedIds(MapobsoletedBys, Output Statement statement = iterator.nextStatement(); Resource subj = statement.getSubject(); //handle the case - oldId isDocumentBy oldId - if(subj.getURI().equals(oldResource.getURI())) { + if (subj.getURI().equals(oldResource.getURI())) { subj = newResource; } Statement newStatement = ResourceFactory.createStatement(subj, CITO.isDocumentedBy, newResource); @@ -162,7 +162,7 @@ public void replaceObsoletedIds(MapobsoletedBys, Output model.add(newStatement); } //remove those old documents/isDocumentedBy relationships - for(Statement oldStatement : needToRemove) { + for (Statement oldStatement : needToRemove) { model.remove(oldStatement); } } @@ -244,18 +244,18 @@ private Resource generateAgentResource(Subject subject) { while (iterator.hasNext()) { Statement statement = iterator.nextStatement(); Resource subj = statement.getSubject(); - if(subj != null) { + if (subj != null) { //find an existing resource which has the same name //then we need check the resource map has the type - http://purl.org/dc/terms/Agent log.debug("ResourceMapModifier.generateAgentResource - found an existing agent with the name " + name + ". But we need to check if it has the type - http://purl.org/dc/terms/Agent."); - if(subj.hasProperty(type)) { + if (subj.hasProperty(type)) { log.debug("ResourceMapModifier.generateAgentResource - found an existing agent with the name " + name + " and it has the type we want."); creator = subj; break; } } } - if( creator == null ) { + if (creator == null) { log.debug("ResourceMapModifier.generateAgentResource - didn't find an existing agent with the name " + name + " and a new one will be generated."); //we didn't find an existing agent with the same name, so generate a new one creator = model.createResource(AnonId.create()); @@ -286,7 +286,7 @@ private void replaceAggregations(Model model, Resource oldOREResource, Resource model.add(statement); Vector needToRemove = new Vector(); - Resource oldOreAggregation = model.getResource(oldOREResource.getURI()+AGGREGATION); + Resource oldOreAggregation = model.getResource(oldOREResource.getURI() + AGGREGATION); //replace the aggregates relationship final RDFNode nullNode = null; predicate = ResourceFactory.createProperty(ORE_TER_NAMESPACE, "aggregates"); @@ -296,10 +296,10 @@ private void replaceAggregations(Model model, Resource oldOREResource, Resource Statement aggregatesState = iterator.nextStatement(); RDFNode object = aggregatesState.getObject(); needToRemove.add(aggregatesState); - if(object.isResource() && oldURIs != null) { + if (object.isResource() && oldURIs != null) { //the object is an obsoleted id, we don't need to add it into the new aggregation Resource objResource = (Resource)object; - if(oldURIs.contains(objResource.getURI())) { + if (oldURIs.contains(objResource.getURI())) { continue; } } @@ -307,10 +307,10 @@ private void replaceAggregations(Model model, Resource oldOREResource, Resource model.add(newStatement); } //add new ids - if(newURIs != null) { - for(String uri : newURIs) { + if (newURIs != null) { + for (String uri : newURIs) { Resource newResource = model.getResource(uri); - if(newResource != null) { + if (newResource != null) { Statement newStatement = ResourceFactory.createStatement(newAggregation, predicate, newResource); model.add(newStatement); } @@ -326,10 +326,10 @@ private void replaceAggregations(Model model, Resource oldOREResource, Resource Statement aggregatedBystatement = iterator.nextStatement(); Resource subject = aggregatedBystatement.getSubject(); needToRemove.add(aggregatedBystatement); - if(subject.isResource() && oldURIs != null) { + if (subject.isResource() && oldURIs != null) { //the object is an obsoleted id, we don't need to add it into the new aggregation Resource subjResource = (Resource)subject; - if(oldURIs.contains(subjResource.getURI())) { + if (oldURIs.contains(subjResource.getURI())) { continue; } } @@ -337,10 +337,10 @@ private void replaceAggregations(Model model, Resource oldOREResource, Resource model.add(newStatement); } //add new ids - if(newURIs != null) { - for(String uri : newURIs) { + if (newURIs != null) { + for (String uri : newURIs) { Resource newResource = model.getResource(uri); - if(newResource != null) { + if (newResource != null) { Statement newStatement = ResourceFactory.createStatement(newResource, predicate, newAggregation); model.add(newStatement); } @@ -348,7 +348,7 @@ private void replaceAggregations(Model model, Resource oldOREResource, Resource } //remove those old aggregates/isAggregatedBy relationships - for(Statement oldStatement : needToRemove) { + for (Statement oldStatement : needToRemove) { model.remove(oldStatement); } Selector delSelector = new SimpleSelector(oldOreAggregation, null, nullNode); @@ -386,7 +386,7 @@ private Resource generateNewComponent(Model model, String id) throws Unsupported */ public static Resource getResource(Model model, String id) { Resource resource = null; - if(id != null && !id.trim().equals("")) { + if (id != null && !id.trim().equals("")) { Resource subject = null; Property predicate = DC_TERMS.identifier; RDFNode object = ResourceFactory.createPlainLiteral(id); @@ -395,7 +395,7 @@ public static Resource getResource(Model model, String id) { while (iterator.hasNext()) { Statement statement = iterator.nextStatement(); resource = statement.getSubject(); - if(resource != null) { + if (resource != null) { log.debug("ResourceMapModifier.getResource - get the resource "+resource.getURI()+" with the identifier "+id); break; } @@ -415,7 +415,7 @@ public List getSubjectsOfDocumentedBy(Identifier metadataId) { Resource nullSubject = null; Resource object = null; String objectId = null; - if(metadataId != null) { + if (metadataId != null) { objectId = metadataId.getValue(); object = getResource(model, objectId); log.debug("ResourceMapModifier.getSubjectsOfDocumentedBy - the object's uri is " + object.getURI() + " for the id " + objectId); @@ -431,7 +431,7 @@ public List getSubjectsOfDocumentedBy(Identifier metadataId) { if (idResource != null && idResource.isLiteral()) { Literal idValue = (Literal) idResource; String idStr = idValue.getString(); - if(idStr != null) { + if (idStr != null) { log.debug("ResourceMapModifier.getSubjectsOfDocumentedBy - add the " + idStr + " into the return list for given metadata id " + objectId); Identifier identifier = new Identifier(); identifier.setValue(idStr); From b0fcc64a43c3616d86a9ff6706b15848a10c6035 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 15 Jul 2019 11:47:36 -0700 Subject: [PATCH 16/16] We will remove the old metadata resource as well. --- .../nceas/metacat/dataone/MNodeService.java | 4 +- .../resourcemap/ResourceMapModifier.java | 93 ++++++++++--------- .../resourcemap/ResourceMapModifierTest.java | 24 ++++- 3 files changed, 72 insertions(+), 49 deletions(-) diff --git a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java index 216920905..f4f25e427 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java +++ b/src/edu/ucsb/nceas/metacat/dataone/MNodeService.java @@ -2187,10 +2187,8 @@ public Identifier publish(Session session, Identifier originalIdentifier) throws logMetacat.info("MNodeService.publish - we find the old ore document "+potentialOreIdentifier+" for the metacat object "+originalIdentifier); Identifier newOreIdentifier = MNodeService.getInstance(request).generateIdentifier(session, MNodeService.UUID_SCHEME, null); ResourceMapModifier modifier = new ResourceMapModifier(potentialOreIdentifier, oreInputStream, newOreIdentifier); - Map obsoletedBys = new HashMap(); - obsoletedBys.put(originalIdentifier, newIdentifier); ByteArrayOutputStream out = new ByteArrayOutputStream(); - modifier.replaceObsoletedIds(obsoletedBys, out, session.getSubject()); + modifier.replaceObsoletedId(originalIdentifier, newIdentifier, out, session.getSubject()); String resourceMapString = out.toString("UTF-8"); // get the original ORE SM and update the values diff --git a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java index 768258db5..d3e9b312b 100644 --- a/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java +++ b/src/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifier.java @@ -58,6 +58,7 @@ /** * This class will create a new resource map by modifying a given resourceMap input stream. + * Note: this class can only be used on the mn.publish method since it will replace the all old pid existing places by the new id. It is only safe for the the mn.publish method. * @author tao * */ @@ -106,65 +107,71 @@ public ResourceMapModifier(Identifier oldResourceMapId, InputStream originalReso /** * Create new resource map by replacing obsoleted ids by new ids. - * @param obsoletedBys a map represents the ids' with the obsoletedBy relationship - the keys are the one need to be obsoleted (replaced); value are the new ones need to be used. They are all science metadata objects + * Note: this method can only be used on the mn.publish method since it will replace the all old pid existing places by the new id. It is only safe for the the mn.publish method. + * @param obsoletedId the pid will be replaced + * @param newId the pid will be used to replace the old pid * @param newResourceMap the place where the created new resource map will be written * @param subject the subject who generates the resource map * @throws UnsupportedEncodingException */ - public void replaceObsoletedIds(MapobsoletedBys, OutputStream newResourceMap, Subject subject) throws UnsupportedEncodingException { + public void replaceObsoletedId(Identifier obsoletedId, Identifier newId, OutputStream newResourceMap, Subject subject) throws UnsupportedEncodingException { //replace ids Vector oldURIs = new Vector(); //those uris (resource) shouldn't be aggregated into the new ore since they are obsoleted Vector newURIs = new Vector(); //those uris (resource) should be added into the new aggregation - if (obsoletedBys != null) { - Set ids = obsoletedBys.keySet(); - for (Identifier obsoletedId : ids) { - Vector needToRemove = new Vector(); - Identifier newId = obsoletedBys.get(obsoletedId); - Resource newResource = getResource(model, newId.getValue()); - if (newResource == null) { - newResource = generateNewComponent(model, newId.getValue()); - } - newURIs.add(newResource.getURI()); - Resource oldResource = getResource(model, obsoletedId.getValue()); - oldURIs.add(oldResource.getURI()); - if (oldResource != null) { - //replace the documents relationship - RDFNode node = null; - Selector selector = new SimpleSelector(oldResource, CITO.documents, node); - StmtIterator iterator = model.listStatements(selector); - while (iterator.hasNext()) { - Statement statement = iterator.nextStatement(); - RDFNode object = statement.getObject(); - //handle the case - oldId documents oldId + if (obsoletedId != null && newId != null) { + Vector needToRemove = new Vector(); + Resource newResource = getResource(model, newId.getValue()); + if (newResource == null) { + newResource = generateNewComponent(model, newId.getValue()); + } + newURIs.add(newResource.getURI()); + Resource oldResource = getResource(model, obsoletedId.getValue()); + oldURIs.add(oldResource.getURI()); + if (oldResource != null) { + //replace all subjects having the old pid resource + RDFNode node = null; + Property nullPredicate = null; + Selector selector = new SimpleSelector(oldResource, nullPredicate, node); + StmtIterator iterator = model.listStatements(selector); + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + RDFNode object = statement.getObject(); + Property predicate = statement.getPredicate(); + log.debug("ResourceMapModifer.replaceObsoletedIds - the statement with the predicate " + predicate.getLocalName() + " before replace"); + //we don't need to replace the relationship - DC_TERMS.identifier, just remove it + //handle the case - oldId predicates oldId + if(predicate == null || !predicate.equals(DC_TERMS.identifier)) { if (object.isResource()) { Resource objResource = (Resource) object; if (objResource.getURI().equals(oldResource.getURI())) { object = newResource; } } - Statement newStatement = ResourceFactory.createStatement(newResource, CITO.documents, object); - needToRemove.add(statement); - model.add(newStatement); - } - //replace the documentedBy relationship - Resource nullSubject = null; - selector = new SimpleSelector(nullSubject, CITO.isDocumentedBy, oldResource); - iterator = model.listStatements(selector); - while (iterator.hasNext()) { - Statement statement = iterator.nextStatement(); - Resource subj = statement.getSubject(); - //handle the case - oldId isDocumentBy oldId - if (subj.getURI().equals(oldResource.getURI())) { - subj = newResource; - } - Statement newStatement = ResourceFactory.createStatement(subj, CITO.isDocumentedBy, newResource); - needToRemove.add(statement); + log.debug("ResourceMapModifer.replaceObsoletedIds - the statement with the predicate " + predicate.getLocalName() + " has been replaced"); + Statement newStatement = ResourceFactory.createStatement(newResource, predicate, object); model.add(newStatement); } - //remove those old documents/isDocumentedBy relationships - for (Statement oldStatement : needToRemove) { - model.remove(oldStatement); + needToRemove.add(statement); + } + //replace all objects having the old pid resource + Resource nullSubject = null; + selector = new SimpleSelector(nullSubject, nullPredicate, oldResource); + iterator = model.listStatements(selector); + while (iterator.hasNext()) { + Statement statement = iterator.nextStatement(); + Resource subj = statement.getSubject(); + Property predicate = statement.getPredicate(); + //handle the case - oldId predicates oldId + if (subj.getURI().equals(oldResource.getURI())) { + subj = newResource; } + Statement newStatement = ResourceFactory.createStatement(subj, predicate, newResource); + model.add(newStatement); + needToRemove.add(statement); + } + //remove those old relationships + for (Statement oldStatement : needToRemove) { + model.remove(oldStatement); } } } diff --git a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java index 6d806689c..eada5f1e8 100644 --- a/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java +++ b/test/edu/ucsb/nceas/metacat/dataone/resourcemap/ResourceMapModifierTest.java @@ -115,16 +115,14 @@ public void testReplaceObsoletedIds() throws Exception { Identifier new_resourceMap_id = new Identifier(); new_resourceMap_id.setValue(NEW_RESOURCEMAP_PID); ResourceMapModifier modifier = new ResourceMapModifier(origin_resourceMap_id, resourceMapInputStream, new_resourceMap_id); - HashMap obsoletedBys = new HashMap(); Identifier origin_metadata_id = new Identifier(); origin_metadata_id.setValue(ORIGNAL_METADATA_PID); Identifier new_metadata_id = new Identifier(); new_metadata_id.setValue(NEW_METADATA_PID); - obsoletedBys.put(origin_metadata_id, new_metadata_id); ByteArrayOutputStream out = new ByteArrayOutputStream(); Subject subj = new Subject(); subj.setValue("foo"); - modifier.replaceObsoletedIds(obsoletedBys, out, subj); + modifier.replaceObsoletedId(origin_metadata_id, new_metadata_id, out, subj); String outStr = out.toString("UTF-8"); System.out.println(outStr); ByteArrayInputStream in = new ByteArrayInputStream(outStr.getBytes("UTF-8")); @@ -222,6 +220,26 @@ public void testReplaceObsoletedIds() throws Exception { for(Identifier id : dataFileIds) { assertTrue(id.getValue().equals(DATA_1_PID) || id.getValue().equals(DATA_2_PID) || id.getValue().equals(NEW_METADATA_PID)); } + + //no old ore triples + Resource oldOreResource = ResourceFactory.createResource("https://cn.dataone.org/cn/v2/resolve/urn%3Auuid%3Ae62c781c-643b-41f3-a0b0-9f6cbd80a708"); + subject = null; + predicate = null; + object = null; + selector = new SimpleSelector(oldOreResource, predicate, object); + iterator = model.listStatements(selector); + assertFalse(iterator.hasNext()); + selector = new SimpleSelector(subject, predicate, oldOreResource); + iterator = model.listStatements(selector); + assertFalse(iterator.hasNext()); + //no old metadata triples + Resource oldMetadataResource = ResourceFactory.createResource("https://cn.dataone.org/cn/v2/resolve/urn%3Auuid%3Ac0e0d342-7cc1-4eaa-9648-c6d9f7ed8b1f"); + selector = new SimpleSelector(oldMetadataResource, predicate, object); + iterator = model.listStatements(selector); + assertFalse(iterator.hasNext()); + selector = new SimpleSelector(subject, predicate, oldMetadataResource); + iterator = model.listStatements(selector); + assertFalse(iterator.hasNext()); resourceMapInputStream.close(); }