From 44efed55dfe56800853adf28b7593b0eb9050008 Mon Sep 17 00:00:00 2001 From: IgorRodchenkov Date: Sun, 22 Apr 2018 11:18:13 -0400 Subject: [PATCH] Refs #263, #243 (SMPDB data cleaner removes sub-pathways, steps, base/dummy interactions) --- .../main/java/cpath/cleaner/SmpdbCleaner.java | 86 ++++++++++--------- .../java/cpath/cleaner/SmpdbCleanerTest.java | 40 ++------- 2 files changed, 54 insertions(+), 72 deletions(-) diff --git a/cpath-cli/src/main/java/cpath/cleaner/SmpdbCleaner.java b/cpath-cli/src/main/java/cpath/cleaner/SmpdbCleaner.java index 7b1ee80b2..a2d329f8d 100644 --- a/cpath-cli/src/main/java/cpath/cleaner/SmpdbCleaner.java +++ b/cpath-cli/src/main/java/cpath/cleaner/SmpdbCleaner.java @@ -1,13 +1,12 @@ package cpath.cleaner; -//import cpath.service.CPathUtils; import cpath.service.Cleaner; -//import org.biopax.paxtools.controller.ModelUtils; +import org.biopax.paxtools.controller.ModelUtils; import org.biopax.paxtools.io.SimpleIOHandler; import org.biopax.paxtools.model.BioPAXLevel; import org.biopax.paxtools.model.Model; import org.biopax.paxtools.model.level3.*; -//import org.biopax.paxtools.util.ClassFilterSet; +import org.biopax.paxtools.model.level3.Process; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,33 +30,51 @@ public void clean(InputStream data, OutputStream cleanedData) // create bp model from dataFile SimpleIOHandler simpleReader = new SimpleIOHandler(BioPAXLevel.L3); Model model = simpleReader.convertFromOWL(data); - log.info("Cleaning SMPDB biopax file..."); // As we managed to get only human data archive from SMPDB there is no need for filtering by organism anymore - -// /* -// Fail shortly if there is no TAXONOMY:9606 unif. xref, -// but there are other (non-human) BioSource objects -// (some human data files out there have no human BioSource defined) -// */ -// if(!model.containsID(model.getXmlBase() + "Reference/TAXONOMY_9606") -// && !model.getObjects(BioSource.class).isEmpty()) -// throw new RuntimeException("Highly likely non-human datafile (skip)."); + if(!model.containsID(model.getXmlBase() + "Reference/TAXONOMY_9606") + && !model.containsID(model.getXmlBase() + "Reference/Taxonomy_9606") + && !model.getObjects(BioSource.class).isEmpty()) + throw new RuntimeException("Highly likely non-human datafile (skip)."); // Normalize Pathway URIs KEGG stable id, where possible Set pathways = new HashSet<>(model.getObjects(Pathway.class)); // final Map replacements = new HashMap<>(); for(Pathway pw : pathways) { - //since 1-Apr-2018 - skip normalized pathways - if(!pw.getUri().startsWith("http://identifiers.org/smpdb/")) - { - throw new RuntimeException("Unexpected (malformed) SMPDB pathway URI: " + pw.getUri()); + //since Apr-2018, there are normalized pathway URIs +// if(!pw.getUri().startsWith("http://identifiers.org/smpdb/")) +// throw new RuntimeException("Unexpected (malformed) SMPDB pathway URI: " + pw.getUri()); + + for (PathwayStep step : new HashSet<>(pw.getPathwayOrder())) { + if(step.getNextStep().isEmpty() && step.getNextStepOf().isEmpty()) { + for (Process process : step.getStepProcess()) + if(process instanceof Interaction && !Interaction.class.equals(process.getModelInterface())) + pw.addPathwayComponent(process); + pw.removePathwayOrder(step); + } + } + + //remove all Interaction.class (base) objects + for(Interaction it : new HashSet<>(model.getObjects(Interaction.class))) { + if(Interaction.class.equals(it.getModelInterface())) + model.remove(it); + } + + //remove sub-pathways + for(Pathway pathway : new HashSet<>(model.getObjects(Pathway.class))) { + if(pathway.getName().contains("SubPathway")) { + model.remove(pathway); + for(Pathway pp : new HashSet<>(pathway.getPathwayComponentOf())) + pp.removePathwayComponent(pathway); + } + } + // Set uxrefs = new ClassFilterSet<>(new HashSet<>(pw.getXref()), UnificationXref.class); // //normally there are two unif. xrefs, e.g., SMP00016 and PW000149, per pathway // for (UnificationXref x : uxrefs) { // if (x.getId() == null) // continue; -// ; -// if (x.getId().startsWith("SMP")) { // SMPDB 07-Jul-2015 +// if (x.getId().startsWith("SMP")) { // 15-Apr-2018 // String uri = "http://identifiers.org/smpdb/" + x.getId(); // if (!model.containsID(uri)) { // CPathUtils.replaceID(model, pw, uri); @@ -67,35 +84,24 @@ public void clean(InputStream data, OutputStream cleanedData) // model.remove(pw); // } // break; -// } else if (x.getId().startsWith("http://identifiers.org/smpdb/")) { //SMPDB 05-Jun-2016 -// String uri = x.getId(); -// if (!model.containsID(uri)) { -// CPathUtils.replaceID(model, pw, uri); -// } else { -// //collect to replace the duplicate with equivalent, normalized URI pathway -// replacements.put(pw, (Pathway) model.getByID(uri)); -// model.remove(pw); -// } -// String id = uri.replaceFirst("http://identifiers.org/smpdb/", ""); -// x.setId(id); -// break; //there must be only one such xref // } // } + } + + for(Named o : model.getObjects(Named.class)) { + //move bogus dummy names to comments + for(String name : new HashSet<>(o.getName())) { + if(name.startsWith("SubPathway")) { + o.removeName(name); + o.addComment(name); + } } - //replace shortened ugly displayName with standardName - pw.removeName("SubPathway"); - pw.removeName("SubPathwayOutput"); - pw.removeName("SubPathwayInput"); } // ModelUtils.replace(model, replacements); -// ModelUtils.removeObjectsIfDangling(model, UtilityClass.class); + ModelUtils.removeObjectsIfDangling(model, UtilityClass.class); // convert model back to OutputStream for return - try { - simpleReader.convertToOWL(model, cleanedData); - } catch (Exception e) { - throw new RuntimeException("clean(), Exception thrown while saving cleaned data", e); - } + simpleReader.convertToOWL(model, cleanedData); } } diff --git a/cpath-cli/src/test/java/cpath/cleaner/SmpdbCleanerTest.java b/cpath-cli/src/test/java/cpath/cleaner/SmpdbCleanerTest.java index 287c2a276..dc443e468 100644 --- a/cpath-cli/src/test/java/cpath/cleaner/SmpdbCleanerTest.java +++ b/cpath-cli/src/test/java/cpath/cleaner/SmpdbCleanerTest.java @@ -7,6 +7,7 @@ import org.biopax.paxtools.model.BioPAXLevel; import org.biopax.paxtools.model.Model; import org.biopax.paxtools.model.level3.Pathway; +import org.biopax.paxtools.model.level3.PathwayStep; import org.junit.Test; import java.io.File; @@ -38,38 +39,10 @@ public final void testClean() throws IOException { cleaner.clean(new FileInputStream(getClass().getResource("/PW000005.owl").getFile()), new FileOutputStream(f57)); Model m57 = new SimpleIOHandler().convertFromOWL(new FileInputStream(f57)); - assertTrue(m57.containsID(uri57)); - assertTrue(m57.containsID(uri40)); - //Test whether the simple merging of these two files does not depend on the order of sub-models - Model model = BioPAXLevel.L3.getDefaultFactory().createModel(); - model.merge(m40); //contains full definition of SMP00040 pathway - model.merge(m57); //contains a trivial version of SMP00040 as sub-pathway - assertTrue(model.containsID(uri40)); - assertTrue(model.containsID(uri57)); - new SimpleIOHandler().convertToOWL(model, new FileOutputStream( - getClass().getClassLoader().getResource("").getPath() + File.separator - + "testCleanSmpdbMerge_40_57.owl")); - Pathway pw = (Pathway) model.getByID(uri40); - assertEquals(47, pw.getPathwayComponent().size()); - - //Merge again in reverse order - model = BioPAXLevel.L3.getDefaultFactory().createModel(); - model.merge(m57); //contains a trivial version of SMP00040 as sub-pathway - model.merge(m40); //contains full definition of SMP00040 pathway - assertTrue(model.containsID(uri40)); - assertTrue(model.containsID(uri57)); - new SimpleIOHandler().convertToOWL(model, new FileOutputStream( - getClass().getClassLoader().getResource("").getPath() - + File.separator + "testCleanSmpdbMerge_57_40.owl")); - - pw = (Pathway) model.getByID(uri40); - //with SimpleMerger only, pathways with the same URI do not merge properly... - assertEquals(1, pw.getPathwayComponent().size()); // sub-pathway replaced the full pathway! - - //It works properly when using SimpleMerger with a Filter argument - + //Using SimpleMerger with Filter makes merging by URI work properly (regardless order of sub-models)- SimpleMerger merger = new SimpleMerger(SimpleEditorMap.L3, (o)-> o instanceof Pathway); - model = BioPAXLevel.L3.getDefaultFactory().createModel(); + Model model = BioPAXLevel.L3.getDefaultFactory().createModel(); merger.merge(model, m57); merger.merge(model, m40); assertTrue(model.containsID(uri40)); @@ -78,8 +51,11 @@ public final void testClean() throws IOException { getClass().getClassLoader().getResource("").getPath() + File.separator + "testCleanSmpdbMergeOK.owl")); - pw = (Pathway) model.getByID(uri40); - assertEquals(48, pw.getPathwayComponent().size()); + Pathway pw = (Pathway) model.getByID(uri40); + assertEquals(37, pw.getPathwayComponent().size()); + assertTrue(pw.getPathwayOrder().isEmpty()); + assertEquals(2, model.getObjects(Pathway.class).size()); + assertTrue(model.getObjects(PathwayStep.class).isEmpty()); } }