Skip to content

Commit

Permalink
Refs #263, #243 (SMPDB data cleaner removes sub-pathways, steps, base…
Browse files Browse the repository at this point in the history
…/dummy interactions)
  • Loading branch information
IgorRodchenkov committed Apr 22, 2018
1 parent 640ec4a commit 44efed5
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 72 deletions.
86 changes: 46 additions & 40 deletions cpath-cli/src/main/java/cpath/cleaner/SmpdbCleaner.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package cpath.cleaner;

//import cpath.service.CPathUtils;
import cpath.service.Cleaner;
//import org.biopax.paxtools.controller.ModelUtils;
import org.biopax.paxtools.controller.ModelUtils;
import org.biopax.paxtools.io.SimpleIOHandler;
import org.biopax.paxtools.model.BioPAXLevel;
import org.biopax.paxtools.model.Model;
import org.biopax.paxtools.model.level3.*;
//import org.biopax.paxtools.util.ClassFilterSet;
import org.biopax.paxtools.model.level3.Process;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -31,33 +30,51 @@ public void clean(InputStream data, OutputStream cleanedData)
// create bp model from dataFile
SimpleIOHandler simpleReader = new SimpleIOHandler(BioPAXLevel.L3);
Model model = simpleReader.convertFromOWL(data);
log.info("Cleaning SMPDB biopax file...");

// As we managed to get only human data archive from SMPDB there is no need for filtering by organism anymore -
// /*
// Fail shortly if there is no TAXONOMY:9606 unif. xref,
// but there are other (non-human) BioSource objects
// (some human data files out there have no human BioSource defined)
// */
// if(!model.containsID(model.getXmlBase() + "Reference/TAXONOMY_9606")
// && !model.getObjects(BioSource.class).isEmpty())
// throw new RuntimeException("Highly likely non-human datafile (skip).");
if(!model.containsID(model.getXmlBase() + "Reference/TAXONOMY_9606")
&& !model.containsID(model.getXmlBase() + "Reference/Taxonomy_9606")
&& !model.getObjects(BioSource.class).isEmpty())
throw new RuntimeException("Highly likely non-human datafile (skip).");

// Normalize Pathway URIs KEGG stable id, where possible
Set<Pathway> pathways = new HashSet<>(model.getObjects(Pathway.class));
// final Map<Pathway, Pathway> replacements = new HashMap<>();
for(Pathway pw : pathways) {
//since 1-Apr-2018 - skip normalized pathways
if(!pw.getUri().startsWith("http://identifiers.org/smpdb/"))
{
throw new RuntimeException("Unexpected (malformed) SMPDB pathway URI: " + pw.getUri());
//since Apr-2018, there are normalized pathway URIs
// if(!pw.getUri().startsWith("http://identifiers.org/smpdb/"))
// throw new RuntimeException("Unexpected (malformed) SMPDB pathway URI: " + pw.getUri());

for (PathwayStep step : new HashSet<>(pw.getPathwayOrder())) {
if(step.getNextStep().isEmpty() && step.getNextStepOf().isEmpty()) {
for (Process process : step.getStepProcess())
if(process instanceof Interaction && !Interaction.class.equals(process.getModelInterface()))
pw.addPathwayComponent(process);
pw.removePathwayOrder(step);
}
}

//remove all Interaction.class (base) objects
for(Interaction it : new HashSet<>(model.getObjects(Interaction.class))) {
if(Interaction.class.equals(it.getModelInterface()))
model.remove(it);
}

//remove sub-pathways
for(Pathway pathway : new HashSet<>(model.getObjects(Pathway.class))) {
if(pathway.getName().contains("SubPathway")) {
model.remove(pathway);
for(Pathway pp : new HashSet<>(pathway.getPathwayComponentOf()))
pp.removePathwayComponent(pathway);
}
}

// Set<UnificationXref> uxrefs = new ClassFilterSet<>(new HashSet<>(pw.getXref()), UnificationXref.class);
// //normally there are two unif. xrefs, e.g., SMP00016 and PW000149, per pathway
// for (UnificationXref x : uxrefs) {
// if (x.getId() == null)
// continue;
// ;
// if (x.getId().startsWith("SMP")) { // SMPDB 07-Jul-2015
// if (x.getId().startsWith("SMP")) { // 15-Apr-2018
// String uri = "http://identifiers.org/smpdb/" + x.getId();
// if (!model.containsID(uri)) {
// CPathUtils.replaceID(model, pw, uri);
Expand All @@ -67,35 +84,24 @@ public void clean(InputStream data, OutputStream cleanedData)
// model.remove(pw);
// }
// break;
// } else if (x.getId().startsWith("http://identifiers.org/smpdb/")) { //SMPDB 05-Jun-2016
// String uri = x.getId();
// if (!model.containsID(uri)) {
// CPathUtils.replaceID(model, pw, uri);
// } else {
// //collect to replace the duplicate with equivalent, normalized URI pathway
// replacements.put(pw, (Pathway) model.getByID(uri));
// model.remove(pw);
// }
// String id = uri.replaceFirst("http://identifiers.org/smpdb/", "");
// x.setId(id);
// break; //there must be only one such xref
// }
// }
}

for(Named o : model.getObjects(Named.class)) {
//move bogus dummy names to comments
for(String name : new HashSet<>(o.getName())) {
if(name.startsWith("SubPathway")) {
o.removeName(name);
o.addComment(name);
}
}
//replace shortened ugly displayName with standardName
pw.removeName("SubPathway");
pw.removeName("SubPathwayOutput");
pw.removeName("SubPathwayInput");
}

// ModelUtils.replace(model, replacements);
// ModelUtils.removeObjectsIfDangling(model, UtilityClass.class);
ModelUtils.removeObjectsIfDangling(model, UtilityClass.class);

// convert model back to OutputStream for return
try {
simpleReader.convertToOWL(model, cleanedData);
} catch (Exception e) {
throw new RuntimeException("clean(), Exception thrown while saving cleaned data", e);
}
simpleReader.convertToOWL(model, cleanedData);
}
}
40 changes: 8 additions & 32 deletions cpath-cli/src/test/java/cpath/cleaner/SmpdbCleanerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.biopax.paxtools.model.BioPAXLevel;
import org.biopax.paxtools.model.Model;
import org.biopax.paxtools.model.level3.Pathway;
import org.biopax.paxtools.model.level3.PathwayStep;
import org.junit.Test;

import java.io.File;
Expand Down Expand Up @@ -38,38 +39,10 @@ public final void testClean() throws IOException {
cleaner.clean(new FileInputStream(getClass().getResource("/PW000005.owl").getFile()),
new FileOutputStream(f57));
Model m57 = new SimpleIOHandler().convertFromOWL(new FileInputStream(f57));
assertTrue(m57.containsID(uri57));
assertTrue(m57.containsID(uri40));

//Test whether the simple merging of these two files does not depend on the order of sub-models
Model model = BioPAXLevel.L3.getDefaultFactory().createModel();
model.merge(m40); //contains full definition of SMP00040 pathway
model.merge(m57); //contains a trivial version of SMP00040 as sub-pathway
assertTrue(model.containsID(uri40));
assertTrue(model.containsID(uri57));
new SimpleIOHandler().convertToOWL(model, new FileOutputStream(
getClass().getClassLoader().getResource("").getPath() + File.separator
+ "testCleanSmpdbMerge_40_57.owl"));
Pathway pw = (Pathway) model.getByID(uri40);
assertEquals(47, pw.getPathwayComponent().size());

//Merge again in reverse order
model = BioPAXLevel.L3.getDefaultFactory().createModel();
model.merge(m57); //contains a trivial version of SMP00040 as sub-pathway
model.merge(m40); //contains full definition of SMP00040 pathway
assertTrue(model.containsID(uri40));
assertTrue(model.containsID(uri57));
new SimpleIOHandler().convertToOWL(model, new FileOutputStream(
getClass().getClassLoader().getResource("").getPath()
+ File.separator + "testCleanSmpdbMerge_57_40.owl"));

pw = (Pathway) model.getByID(uri40);
//with SimpleMerger only, pathways with the same URI do not merge properly...
assertEquals(1, pw.getPathwayComponent().size()); // sub-pathway replaced the full pathway!

//It works properly when using SimpleMerger with a Filter argument -
//Using SimpleMerger with Filter makes merging by URI work properly (regardless order of sub-models)-
SimpleMerger merger = new SimpleMerger(SimpleEditorMap.L3, (o)-> o instanceof Pathway);
model = BioPAXLevel.L3.getDefaultFactory().createModel();
Model model = BioPAXLevel.L3.getDefaultFactory().createModel();
merger.merge(model, m57);
merger.merge(model, m40);
assertTrue(model.containsID(uri40));
Expand All @@ -78,8 +51,11 @@ public final void testClean() throws IOException {
getClass().getClassLoader().getResource("").getPath()
+ File.separator + "testCleanSmpdbMergeOK.owl"));

pw = (Pathway) model.getByID(uri40);
assertEquals(48, pw.getPathwayComponent().size());
Pathway pw = (Pathway) model.getByID(uri40);
assertEquals(37, pw.getPathwayComponent().size());
assertTrue(pw.getPathwayOrder().isEmpty());
assertEquals(2, model.getObjects(Pathway.class).size());
assertTrue(model.getObjects(PathwayStep.class).isEmpty());
}

}

0 comments on commit 44efed5

Please sign in to comment.