From 26de2a62133bae0546ee1263ba1cf10755a3ae6d Mon Sep 17 00:00:00 2001 From: IgorRodchenkov Date: Sun, 10 Mar 2024 15:46:41 -0400 Subject: [PATCH] Can now process compressed or not data files. --- README.md | 1 + src/main/java/org/ctdbase/CtdToBiopax.java | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a55b15f..f620fe8 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,7 @@ to run without any command line options to see the help text: can use special values: 'defined', 'undefined', and 'null') [optional] -x,--interaction structured chemical-gene interaction file (XML) [optional] + Note: the input data files can be compressed, e.g. CTD_genes.csv.gz If you want to test the converter though, you can download small (old) example files from [goal2_ctd_smallSampleInputFiles-20140702.zip](https://bitbucket.org/armish/gsoc14/downloads/goal2_ctd_smallSampleInputFiles-20140702.zip). diff --git a/src/main/java/org/ctdbase/CtdToBiopax.java b/src/main/java/org/ctdbase/CtdToBiopax.java index 639a0d6..32f894e 100644 --- a/src/main/java/org/ctdbase/CtdToBiopax.java +++ b/src/main/java/org/ctdbase/CtdToBiopax.java @@ -1,5 +1,6 @@ package org.ctdbase; +import org.apache.commons.lang3.StringUtils; import org.biopax.paxtools.model.level3.UtilityClass; import org.ctdbase.converter.CTDChemicalConverter; import org.ctdbase.converter.CTDGeneConverter; @@ -11,14 +12,15 @@ import org.biopax.paxtools.io.SimpleIOHandler; import org.biopax.paxtools.model.BioPAXElement; import org.biopax.paxtools.model.Model; -import org.biopax.paxtools.model.level3.EntityReference; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; import java.util.Set; +import java.util.zip.GZIPInputStream; public class CtdToBiopax { private static Logger log = LoggerFactory.getLogger(CtdToBiopax.class); @@ -61,7 +63,7 @@ public static void main( String[] args ) { } Converter converter = new CTDInteractionConverter(taxonomy); log.info("Option 'x'. Using " + converter.getClass().getSimpleName() + " to convert: " + fileName); - Model model = converter.convert(new FileInputStream(fileName)); + Model model = converter.convert(inputDataStream(fileName)); merger.merge(finalModel, model); } @@ -69,7 +71,7 @@ public static void main( String[] args ) { String fileName = commandLine.getOptionValue("g"); Converter converter = new CTDGeneConverter(); log.info("Option 'g'. Using " + converter.getClass().getSimpleName() + " to convert: " + fileName); - Model model = converter.convert(new FileInputStream(fileName)); + Model model = converter.convert(inputDataStream(fileName)); merger.merge(finalModel, model); } @@ -77,7 +79,7 @@ public static void main( String[] args ) { String fileName = commandLine.getOptionValue("c"); Converter converter = new CTDChemicalConverter(); log.info("Option 'c'. Using " + converter.getClass().getSimpleName() + " to convert: " + fileName); - Model model = converter.convert(new FileInputStream(fileName)); + Model model = converter.convert(inputDataStream(fileName)); merger.merge(finalModel, model); } @@ -103,4 +105,12 @@ public static void main( String[] args ) { } } + static InputStream inputDataStream(String fileName) throws IOException { + InputStream inputStream = new FileInputStream(fileName); + if (StringUtils.endsWith(fileName, ".gz")) { + inputStream = new GZIPInputStream(inputStream); + } + return inputStream; + } + }