From 6ea8b9195cddd05a28db1e7805c862e2aa17e293 Mon Sep 17 00:00:00 2001
From: sol <sol.shenker@gmail.com>
Date: Sun, 27 Apr 2014 22:34:18 -0400
Subject: [PATCH] migrated referenced code from other projects to this
 repository

---
 IsoSCM/.classpath                             |   2 -
 .../src/executable/{Main.java => IsoSCM.java} |  10 +-
 .../processing/ClusterExpressedSegments.java  | 580 +-----------------
 .../src/processing/FindSpliceJunctions.java   |  30 +-
 IsoSCM/src/processing/SlidingWindow.java      |   2 -
 .../EnumerateTranscriptModels.java            |  93 +--
 6 files changed, 32 insertions(+), 685 deletions(-)
 rename IsoSCM/src/executable/{Main.java => IsoSCM.java} (99%)
diff --git a/IsoSCM/.classpath b/IsoSCM/.classpath
index b1e3ed0..a489509 100644
--- a/IsoSCM/.classpath
+++ b/IsoSCM/.classpath
@@ -2,9 +2,7 @@
 <classpath>
 	<classpathentry kind="src" path="src"/>
 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
-	<classpathentry combineaccessrules="false" kind="src" path="/Utils"/>
 	<classpathentry kind="con" path="org.eclipse.jdt.USER_LIBRARY/itext"/>
-	<classpathentry kind="src" path="/Lai"/>
 	<classpathentry kind="lib" path="lib/colt.jar" sourcepath="lib-src/colt-src.jar"/>
 	<classpathentry kind="lib" path="lib/commons-math3-3.0.jar" sourcepath="lib-src/commons-math3-3.0-sources.jar"/>
 	<classpathentry kind="lib" path="lib/picard-1.106.jar" sourcepath="lib-src/picard-1.106-src.jar"/>
diff --git a/IsoSCM/src/executable/Main.java b/IsoSCM/src/executable/IsoSCM.java
similarity index 99%
rename from IsoSCM/src/executable/Main.java
rename to IsoSCM/src/executable/IsoSCM.java
index 7f5d07f..58cc6aa 100644
--- a/IsoSCM/src/executable/Main.java
+++ b/IsoSCM/src/executable/IsoSCM.java
@@ -1,7 +1,6 @@
 package executable;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.IOException;
 
 import multisample.JointSegmentation;
@@ -15,9 +14,9 @@
 import processing.SlidingWindow;
 import scm.IdentifyChangePoints;
 import splicegraph.ExonSpliceGraph;
-import tools.Strandedness;
 import tools.BEDTools.BEDWriter;
 import tools.GTFTools.GTFWriter;
+import tools.Strandedness;
 import util.IO;
 import util.Util;
 
@@ -28,12 +27,7 @@
 import com.beust.jcommander.converters.FileConverter;
 import com.beust.jcommander.converters.IntegerConverter;
 
-public class Main {
-	enum FileType {}
-	
-	public static File getFile(FileType type, File dir, String base){
-		return dir;
-	}
+public class IsoSCM {
 
 	public static void main(String[] args) throws IOException {
 		class AssembleCommand{	
diff --git a/IsoSCM/src/processing/ClusterExpressedSegments.java b/IsoSCM/src/processing/ClusterExpressedSegments.java
index 963f896..445748e 100644
--- a/IsoSCM/src/processing/ClusterExpressedSegments.java
+++ b/IsoSCM/src/processing/ClusterExpressedSegments.java
@@ -9,46 +9,38 @@
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.math3.distribution.NormalDistribution;
-
-import cern.colt.list.DoubleArrayList;
-import cern.jet.random.engine.RandomEngine;
-import cern.jet.stat.quantile.DoubleQuantileFinder;
-import cern.jet.stat.quantile.QuantileFinderFactory;
-import filter.ComposableFilter;
-import filter.ConsumingReadFilter;
-import filter.Counter;
-import filter.MultiMappingfilter;
-import filter.SAMRecordStrandednessFilter;
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.SAMFileReader;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMRecordIterator;
-import splicegraph.ExonSpliceGraph.ConnectedComponentResult;
+
+import org.apache.commons.math3.distribution.NormalDistribution;
+
 import tools.AnnotatedRegion;
 import tools.BAMTools;
 import tools.BEDTools.BEDWriter;
 import tools.GTFTools.GTFWriter;
 import tools.GenomicIntervalSet;
 import tools.GenomicIntervalTree;
-import tools.GenomicPositionTree;
 import tools.IntervalTools;
-import tools.StrandedSegmentClusterer;
 import tools.ParseBed.BEDIterator;
 import tools.ParseGTF.TranscriptIterator;
-import tools.PositionClustering.StrandedClusterer;
 import tools.StrandedGenomicIntervalSet;
 import tools.StrandedGenomicIntervalTree;
-import tools.StrandedGenomicPositionTree;
+import tools.StrandedSegmentClusterer;
 import tools.Strandedness;
-import util.ChartUtils;
-import util.IO;
 import util.Util;
-import util.Stats.MomentTracker;
-import util.Util.ExtremeObjectTracker;
 import util.Util.ExtremeTracker;
-import util.Util.MapList;
+import cern.colt.list.DoubleArrayList;
+import cern.jet.random.engine.RandomEngine;
+import cern.jet.stat.quantile.DoubleQuantileFinder;
+import cern.jet.stat.quantile.QuantileFinderFactory;
+import filter.ComposableFilter;
+import filter.ConsumingReadFilter;
+import filter.Counter;
+import filter.MultiMappingfilter;
+import filter.SAMRecordStrandednessFilter;
 
 public class ClusterExpressedSegments {
 
@@ -103,127 +95,6 @@ public static int nConsumingReads(SAMFileReader sfr, Strandedness strandedness,
 		return n;
 	}
 
-	public static GenomicIntervalTree<Map<String,Object>> intervals(String gtf, GTFWriter gw) throws FileNotFoundException {
-		StrandedGenomicIntervalTree<Map<String,Object>> git = new StrandedGenomicIntervalTree<Map<String,Object>>();
-		GenomicIntervalTree<Map<String,Object>> git2 = new GenomicIntervalTree<Map<String,Object>>();
-
-		TranscriptIterator ti = new TranscriptIterator(gtf);
-
-		for(AnnotatedRegion r : ti){
-			if(!git.contains(r.chr, r.start, r.end,r.strand)){
-				git.add(r.chr, r.start, r.end,r.strand, r.attributes);
-			}
-		}
-
-		// remove all stranded changepoints that share one boundary 
-		// with an unstranded segment
-		// and
-		// remove all stranded changepoints that are shorter than
-		// another segment sharing the same boundary
-		List<AnnotatedRegion> irrelevant = new LinkedList<AnnotatedRegion>();
-		for(AnnotatedRegion r : git){
-			if(r.strand=='.'){
-				for(char c : Util.list('+','-')){
-					int r5p = r.get5Prime(c);
-					for(AnnotatedRegion n : git.overlappingRegions(r.chr, r5p, r5p, c)){
-						if(r5p == n.get5Prime()){
-							irrelevant.add(n);
-						}
-					}
-				}
-			}
-			else{
-				int r5p = r.get5Prime();
-				for(AnnotatedRegion n : git.overlappingRegions(r.chr, r5p, r5p, r.strand)){
-					if(r5p == n.get5Prime() && r.size < n.size){
-						irrelevant.add(r);
-					}
-				}
-			}
-		}
-
-		for(AnnotatedRegion r : irrelevant){
-			git.remove(r.chr, r.start, r.end, r.strand);
-		}
-
-		// find those that are on both strands,
-		// delete them, those that remain are those that are only on one strand.
-		// for each of these, assign them to the closest up/down-stream segment boundary.
-
-		StrandedGenomicPositionTree unmatchedSegmentBoundaries = new StrandedGenomicPositionTree();
-
-		for(AnnotatedRegion r : git){
-			if(r.strand=='.'){
-				unmatchedSegmentBoundaries.add(r.chr, r.start, '+');
-				unmatchedSegmentBoundaries.add(r.chr, r.end, '-');
-			}
-			else{
-				unmatchedSegmentBoundaries.add(r.chr, r.get5Prime(), r.strand);
-			}
-		}
-
-		GenomicIntervalTree<Map<String,Object>> matchedSegmentBoundaries = new GenomicIntervalTree<Map<String,Object>>();
-		for(AnnotatedRegion r : git){
-			if(r.strand!='.'){
-				AnnotatedRegion closest = unmatchedSegmentBoundaries.getClosestUpstream(r.chr, r.get3Prime(), r.isNegativeStrand()?'+':'-');
-				//			System.out.println(r);
-				//			System.out.printf("\t%s\n", closest);
-				if(closest!=null){
-
-					String chr = r.chr;
-					int start = Math.min(r.start,closest.start);
-					int end = Math.max(r.end,closest.end);
-
-					boolean nested = false;
-					boolean matched = false;
-					for(AnnotatedRegion matchedSegment : git.overlappingRegions(chr, start, end, r.isNegativeStrand()?'+':'-')){
-						if(matchedSegment.get5Prime()==closest.start){
-							matched |= true;
-							//							System.out.println(matchedSegment);
-							//							System.out.println(matchedSegment.toAttributeString());
-							closest.addAttribute("expression", matchedSegment.getAttribute("expression"));
-							int inter_5p_overlap = Math.abs(r.get5Prime() - matchedSegment.get5Prime());
-							if(inter_5p_overlap < matchedSegment.size){
-								System.out.printf("nested %s in %s\n", r, matchedSegment);
-								nested = true;
-							}
-
-						}
-					}
-					if(matched && !(nested  || matchedSegmentBoundaries.contains(chr, start, end))){
-						//						System.out.printf("(%s,%s)\t%s:%d-%d\t%s\t%s\n",r,closest,r.chr,Math.min(r.start,closest.start), Math.max(r.end,closest.end),r.toAttributeString(),closest.toAttributeString());
-						Map<String, Object> attributes = new HashMap<String, Object>();
-						attributes.put("consolidated", true);
-						if(r.isNegativeStrand()){
-							attributes.put("start_expression", closest.getAttribute("expression"));
-							attributes.put("end_expression", r.getAttribute("expression"));
-						}
-						else{
-							attributes.put("start_expression", r.getAttribute("expression"));
-							attributes.put("end_expression", closest.getAttribute("expression"));
-						}
-						matchedSegmentBoundaries.add(r.chr, Math.min(r.start,closest.start), Math.max(r.end,closest.end), attributes);
-					}
-				}
-			}
-		}
-		//		System.exit(0);
-
-		for(AnnotatedRegion r : matchedSegmentBoundaries){
-			//			System.out.println(r);
-			gw.write("exon", r.chr, r.start, r.end, '.', r.toGTFAttributeString());
-		}
-
-		//		BEDWriter bw = new BEDWriter("test3.bed");
-		//		for(AnnotatedRegion r : git2){
-		//			bw.write(r.chr, r.start, r.end, '.');
-		//		}
-		//		bw.close();
-
-
-		return git2;
-	}
-
 	static class ConsolidatedIntervalResult{
 		GenomicIntervalTree<Map<String,Object>> segments;
 		GenomicIntervalTree<Map<String,Object>> changePoints;
@@ -238,83 +109,7 @@ public ConsolidatedIntervalResult(GenomicIntervalTree<Map<String, Object>> segme
 		}
 	}
 
-	public static ConsolidatedIntervalResult expressedSegmentIntervals(String gtf) throws FileNotFoundException {
-		StrandedGenomicIntervalTree<Map<String,Object>> git = new StrandedGenomicIntervalTree<Map<String,Object>>();
-		StrandedGenomicIntervalTree<Map<String,Object>> unique_git = new StrandedGenomicIntervalTree<Map<String,Object>>();
-		GenomicPositionTree gpt = new GenomicPositionTree();
-		GenomicIntervalTree<Map<String,Object>> max_git = new GenomicIntervalTree<Map<String,Object>>();
-		GenomicIntervalTree<Map<String,Object>> increase_git = new GenomicIntervalTree<Map<String,Object>>();
-		GenomicIntervalTree<Map<String,Object>> decrease_git = new GenomicIntervalTree<Map<String,Object>>();
-		GenomicIntervalTree<Map<String,Object>> expressedSegmentIntervals = new GenomicIntervalTree<Map<String,Object>>();
-
-		TranscriptIterator ti = new TranscriptIterator(gtf);
-
-		for(AnnotatedRegion r : ti){
-			if(!unique_git.contains(r.chr, r.start, r.end,r.strand)){
-				unique_git.add(r.chr, r.start, r.end,r.strand);
-				gpt.add(r.chr, r.start);
-			}
-			git.add(r.chr, r.start, r.end,r.strand, r.attributes);
-		}
-
-		for(AnnotatedRegion r : unique_git){
-			MapList<String, Double> mles = new MapList<String, Double>();
-
-			//			System.out.println(r);
-			for(AnnotatedRegion g : git.overlappingRegions(r.chr, r.start, r.end, r.strand)){
-				for(String s : Util.list("before_mle","after_mle")){				
-					mles.put(s, Double.parseDouble((String) g.getAttribute(s)));					
-				}
-			}
-
-			Map<String,Object> attributes = new HashMap<String, Object>();
-			for(String s : Util.list("before_mle","after_mle")){
-				attributes.put(s, Collections.max(mles.get(s)));
-			}	
-
-			boolean isIncrease = (Double) attributes.get("after_mle") > (Double) attributes.get("before_mle");
-
-
-			if(!max_git.contains(r.chr, r.start, r.start)){
-				max_git.add(r.chr,r.start,r.end,attributes);
-				if(isIncrease)
-					increase_git.add(r.chr,r.start,r.end,attributes);
-				else
-					decrease_git.add(r.chr,r.start,r.end,attributes);
-				//				gw.write("exon", r.chr, r.start, r.end, '.', AnnotatedRegion.GTFAttributeString(attributes));		
-			}
-		}
-
-		//		System.out.println();
-
-		for(AnnotatedRegion r : max_git){
-			//			System.out.println(r);
-
-			AnnotatedRegion upstream = gpt.getClosestUpstream(r.chr, r.start-1);
-			//			AnnotatedRegion downstream = gpt.getClosestDownstream(r.chr, r.end+1);
-
-			if(upstream!=null){
-				Map<String,Object> attributes = new HashMap<String, Object>();
-				attributes.put("mle", Math.max((Double) r.getAttribute("before_mle"), (Double) max_git.overlappingRegions(upstream.chr, upstream.start, upstream.end, upstream.strand).iterator().next().getAttribute("after_mle")));		
-				//				gw.write("exon", r.chr, upstream.start, r.start, '.', AnnotatedRegion.GTFAttributeString(attributes));		
-				expressedSegmentIntervals.add(r.chr, upstream.start, r.start, attributes);
-			}
-
-			//			if(downstream!=null){
-			//				Map<String,Object> attributes = new HashMap<String, Object>();
-			//				attributes.put("mle", Math.max((Double) r.getAttribute("after_mle"), (Double) max_git.overlappingRegions(downstream.chr, downstream.start, downstream.end, downstream.strand).iterator().next().getAttribute("before_mle")));
-			//				gw.write("exon", r.chr, r.start, downstream.start, '.', AnnotatedRegion.GTFAttributeString(attributes));		
-			//			
-			//			}
-
-
-			//			System.out.printf("%s\t%s\t%s\n", gpt.getClosestUpstream(r.chr, r.start-1),r,gpt.getClosestDownstream(r.chr, r.end+1));
-		}
-
-		ConsolidatedIntervalResult result = new ConsolidatedIntervalResult(expressedSegmentIntervals, max_git, increase_git, decrease_git);
-
-		return result;
-	}
+	
 
 	public static void enumerateExons(String chr, int start, int end, char strand, List<AnnotatedRegion> splices, BEDWriter bw) throws FileNotFoundException{
 
@@ -1145,352 +940,5 @@ public static void scaffoldSpannableRegions(File segment_bed,	File matepair_bed,
 		}
 	}
 
-	public static void main(String[] args) throws FileNotFoundException {
-		if(true){
-			SAMFileReader sfr = new SAMFileReader(new File("/mnt/LaiLab/sol/GSE51572/test/g.bam"));
-			File segment_bed = new File("/mnt/LaiLab/sol/GSE51572/test/isoscm/tmp/gu.seg.bed");
-//			countJunctionSupportingReads(sfr, new File("/mnt/LaiLab/sol/GSE51572/test/isoscm/tmp/g.sj.bed"), new GTFWriter("/mnt/LaiLab/sol/GSE51572/test/g1.counts.gtf"));
-
-			int i = estimateMateInsertSize(sfr, Strandedness.unstranded, segment_bed, .99);
-			System.out.println(i);
-		}
-		if(false)
-		{
-			SAMFileReader sfr = new SAMFileReader(new File("/mnt/LaiLab/sol/GSE41637/mapped/indexed/SRR594413.bam"));
-			BEDWriter matepair_bw = new BEDWriter("/mnt/LaiLab/sol/GSE41637/mapped/test_pairs.bed");
-			identifySpannableRegions(sfr, Strandedness.reverse_forward, matepair_bw, 160);
-			matepair_bw.close();
-		}
-		if(false)
-		{
-			trimInferredExons(new File("/home/sol/lailab/sol/sangercenter/de_dup/isoscm/gtf/tmp/hip.exon.gtf"), new GTFWriter("/home/sol/lailab/sol/sangercenter/de_dup/isoscm/gtf/tmp/hip.exon.trimmed.gtf"));
-		}
-		if(false)
-		{
-			BEDWriter bw = new BEDWriter("/mnt/LaiLab/sol/sangercenter/de_dup/isoscm/gtf/tmp/hip.inferred.bed");
-			inferSegmentStrand(new File("/mnt/LaiLab/sol/sangercenter/de_dup/isoscm/gtf/tmp/hip.sj.bed"), new File("/home/sol/workspace/IsoSCM/unstranded.bed"),bw);
-			bw.close();
-		}
-		if(false)
-		{			
-			File segment_bed = new File("/mnt/LaiLab/sol/GSE41637/nb/gtf/tmp/SRR594393.seg.bed");
-			File repeat_bed = new File("/home/sol/data/repeats/mm10/mm10.RepeatMasker.bed");
-			BEDWriter merged_segments = new BEDWriter("/mnt/LaiLab/sol/GSE41637/nb/test.merge.repeat.bed");
-			mergeSegments(segment_bed, repeat_bed, merged_segments, 100);
-			merged_segments.close();
-		}
-		if(false)
-		{
-			File segment_bed = new File("/home/sol/workspace/IsoSCM/tests/gtf/tmp/SRR594393.25.seg.bed");
-			BEDWriter merged_segments = new BEDWriter("/home/sol/workspace/IsoSCM/tests/gtf/tmp/SRR594393.25.seg.merged.bed");
-			//			mergeSegments(segment_bed, merged_segments, 100);
-			merged_segments.close();
-		}
-		if(false)
-		{
-			File splice_junction_bed = new File("/home/sol/workspace/IsoSCM/tests/gtf/tmp/SRR594393.25.sj.filtered.bed");
-			File spliced_exon_gtf = new File("/home/sol/workspace/IsoSCM/tests/gtf/tmp/SRR594393.25.exon.gtf");
-			File intronic_exon_gtf = new File("/home/sol/workspace/IsoSCM/tests/gtf/tmp/test.intronic.gtf");
-			SAMFileReader bam = new SAMFileReader(new File("/mnt/LaiLab/sol/GSE41637/mapped/indexed/SRR594393.bam"));
-			Strandedness strandedness = Strandedness.reverse_forward;
-			int w = 5;
-			double d = .1;
-
-			GTFWriter intronic_exon_writer = new GTFWriter("/home/sol/workspace/IsoSCM/tests/gtf/tmp/test.intronic.filtered.gtf");
-			filterIntronicExons(bam, strandedness, spliced_exon_gtf, intronic_exon_gtf, splice_junction_bed, w, d, intronic_exon_writer);
-			intronic_exon_writer.close();
-		}
-		if(false)
-		{
-			File segment_bed = new File("/home/sol/workspace/IsoSCM/tests/gtf/tmp/SRR594393.25.seg.bed");
-			File splice_junction_bed = new File("/home/sol/workspace/IsoSCM/tests/gtf/tmp/SRR594393.25.sj.bed");
-			File spliced_exon_gtf = new File("/home/sol/workspace/IsoSCM/tests/gtf/tmp/SRR594393.25.exon.gtf");
-			SAMFileReader bam = new SAMFileReader(new File("/mnt/LaiLab/sol/GSE41637/mapped/indexed/SRR594393.bam"));
-			Strandedness strandedness = Strandedness.reverse_forward;
-			int minExtensionLength = 5;
-			double minExtensionFraction = .1;
-			GTFWriter intronic_exon_writer = new GTFWriter("/home/sol/workspace/IsoSCM/tests/gtf/tmp/test.intronic.gtf");
-			identifyIntronicExons(segment_bed, splice_junction_bed, spliced_exon_gtf, bam, strandedness, intronic_exon_writer, minExtensionLength, minExtensionFraction);
-			intronic_exon_writer.close();
-		}
-		System.exit(0);
-
-		//				GTFWriter gw = new GTFWriter("test5.consolidated.gtf");
-		//		GTFWriter gw = new GTFWriter("test6.consolidated.gtf");
-		//				intervals("test5.gtf",gw);
-		SAMFileReader sfr = new SAMFileReader(new File("/home/sol/data/sangercenter/hippocampus.bam"));
-
-		ConsolidatedIntervalResult expressedSegmentsResult = expressedSegmentIntervals("test6.gtf");
-		GenomicIntervalTree<Map<String,Object>> expressedSegments = expressedSegmentsResult.segments;
-		GenomicIntervalSet highlyExpressedSegments = new GenomicIntervalSet();
-		double expressed_threshold = 10;
-		for(AnnotatedRegion r : expressedSegments){
-			if((Double)r.getAttribute("mle")>expressed_threshold){
-				highlyExpressedSegments.add(r.chr, r.start, r.end);
-			}
-		}
-
-		System.out.println(expressedSegments.size());
-		System.out.println(highlyExpressedSegments.size());
-
-
-		StrandedGenomicIntervalTree<Map<String,Object>> sj = new StrandedGenomicIntervalTree<Map<String,Object>>();
-		BEDIterator splice_junctions = new BEDIterator("hippocampus.sj.bed");
-		for(AnnotatedRegion splice_junction : splice_junctions){
-			//			System.out.println(splice_junction);
-			if(!sj.contains(splice_junction.chr, splice_junction.start, splice_junction.end, splice_junction.strand))
-				sj.add(splice_junction);
-		}
-
-		int w = 50;
-		int hw = (w+1)/2;
-
-		StrandedGenomicIntervalSet clusteredSegments = new StrandedGenomicIntervalSet();
-		for(AnnotatedRegion sje : sj){
-			boolean overlaps = false;
-			for(AnnotatedRegion r : highlyExpressedSegments.overlappingRegions(sje.chr, sje.start-w-hw, sje.start+w-hw)){
-				overlaps |= true;
-			}
-			for(AnnotatedRegion r : highlyExpressedSegments.overlappingRegions(sje.chr, sje.end-w-hw, sje.end+w-hw)){
-				overlaps |= true;
-			}
-			if(overlaps){
-				clusteredSegments.add(sje.chr, sje.start-w, sje.end+w, sje.strand);
-			}
-		}
-
-		/**
-		 * TODO rather than have unstranded clustered segments, make them stranded above. Then, only add highly expressed segments that
-		 * overlap with these.
-		 */
-
-		for(AnnotatedRegion r : highlyExpressedSegments){
-			boolean overlaps_splice = false;
-			for(char strand : Util.list('+','-')){
-				if(clusteredSegments.overlappingRegions(r.chr, r.start, r.end, strand).iterator().hasNext()){
-					clusteredSegments.add(r.chr, r.start, r.end, strand);
-					overlaps_splice |= true;
-				}
-			}
-			if(!overlaps_splice)
-				clusteredSegments.add(r.chr, r.start, r.end, '.');
-
-		}
-		//		chr1:23367501-23929101
-		BEDWriter bw = new BEDWriter("test6.clusters.bed");
-		//		BEDWriter models = new BEDWriter("test6.models.bed");
-		BEDWriter plus_models = new BEDWriter("test6.plus_models.bed");
-		BEDWriter minus_models = new BEDWriter("test6.minus_models.bed");
-		BEDWriter bw_i = new BEDWriter("increase.bed");
-		BEDWriter bw_d = new BEDWriter("decrease.bed");
-		GTFWriter transcribed_fragments = new GTFWriter("test6.transfrags.gtf");
-		GTFWriter assembly = new GTFWriter("chr1.assembly.gtf");
-
-		int cluster_id=0;
-		for(AnnotatedRegion r : clusteredSegments){
-			cluster_id++;
-			bw.write(r.chr, r.start, r.end, r.strand);
-
-			// save the first and last increase and decrease, these should not be removed even if they are near a splice junction
-			ExtremeObjectTracker<AnnotatedRegion, Integer> terminalIncreaseTracker = new ExtremeObjectTracker<AnnotatedRegion, Integer>(new Util.ComparableComparator<Integer>());
-			ExtremeObjectTracker<AnnotatedRegion, Integer> terminalDecreaseTracker = new ExtremeObjectTracker<AnnotatedRegion, Integer>(new Util.ComparableComparator<Integer>());
-
-			GenomicIntervalTree<Map<String,Object>> increasePoints = new GenomicIntervalTree<Map<String,Object>>();
-			GenomicIntervalTree<Map<String,Object>> decreasePoints = new GenomicIntervalTree<Map<String,Object>>();
-
-			for(AnnotatedRegion changepoint : expressedSegmentsResult.increasePoints.overlappingRegions(r.chr, r.start, r.end)){
-				terminalIncreaseTracker.put(changepoint, changepoint.start);
-				increasePoints.add(changepoint.chr, changepoint.start, changepoint.end, changepoint.attributes);
-			}
-			for(AnnotatedRegion changepoint : expressedSegmentsResult.decreasePoints.overlappingRegions(r.chr, r.start, r.end)){
-				terminalDecreaseTracker.put(changepoint, changepoint.start);
-				decreasePoints.add(changepoint.chr, changepoint.start, changepoint.end, changepoint.attributes);
-			}
-
-			AnnotatedRegion terminalIncrease = terminalIncreaseTracker.getMinObject();
-			AnnotatedRegion terminalDecrease = terminalDecreaseTracker.getMaxObject();
-
-			List<AnnotatedRegion> to_remove = new LinkedList<AnnotatedRegion>();
-			for(char strand : Util.list('+','-')){
-				for(AnnotatedRegion sje : sj.overlappingRegions(r.chr, r.start, r.end, strand)){
-					for(AnnotatedRegion changepoint : expressedSegmentsResult.changePoints.overlappingRegions(sje.chr, sje.start-w-hw, sje.start+w-hw)){
-						to_remove.add(changepoint);
-					}
-					for(AnnotatedRegion changepoint : expressedSegmentsResult.changePoints.overlappingRegions(sje.chr, sje.end-w-hw, sje.end+w-hw)){
-						to_remove.add(changepoint);
-					}
-				}
-			}
-
-			for(AnnotatedRegion changepoint : to_remove){
-				if( (terminalIncrease!=null && changepoint.start != terminalIncrease.start) && (terminalDecrease!=null && changepoint.start != terminalDecrease.start)){
-					increasePoints.remove(changepoint.chr, changepoint.start, changepoint.end);
-					decreasePoints.remove(changepoint.chr, changepoint.start, changepoint.end);
-				}
-			}
-
-			List<AnnotatedRegion> increases = new LinkedList<AnnotatedRegion>();
-			List<AnnotatedRegion> decreases = new LinkedList<AnnotatedRegion>();
-
-			for(AnnotatedRegion changepoint : increasePoints.overlappingRegions(r.chr, r.start, r.end)){
-				if( (Double) changepoint.getAttribute("after_mle") > expressed_threshold ){
-					increases.add(changepoint);
-					bw_i.write(changepoint.chr, changepoint.start, changepoint.end, changepoint.strand);
-				}
-			}
-			for(AnnotatedRegion changepoint : decreasePoints.overlappingRegions(r.chr, r.start, r.end)){
-				if( (Double) changepoint.getAttribute("before_mle") > expressed_threshold ){
-					decreases.add(changepoint);
-					bw_d.write(changepoint.chr, changepoint.start, changepoint.end, changepoint.strand);
-				}
-			}
-
-			//			if(r.strand != '.')
-			//				System.out.println(r);
-			//			for(AnnotatedRegion increase : increases){
-			//				for(AnnotatedRegion decrease : decreases){
-			//					if(increase.start < decrease.end){
-			//
-			//						List<AnnotatedRegion> splices = prepareSpliceJunctions(sfr, sj, r.chr, increase.start, decrease.end, r.strand);
-			//
-			//						enumerateExons(r.chr, increase.start, decrease.end, r.strand, splices, models);
-			//
-			//						//					bw.write(r.chr, increase.start, decrease.end, '.');
-			//					}
-			//				}
-			//			}
-
-			//TODO make GTF output with junctions, exons 5' and 3' labeled
-
-			if(r.strand != '.'){
-				BEDWriter models = r.isNegativeStrand()? minus_models : plus_models;
-
-				System.out.println(r);
-
-				StrandedGenomicIntervalTree<Map<String,Object>> exons = new StrandedGenomicIntervalTree<Map<String,Object>>();
-
-				StrandedGenomicPositionTree splice5ps = new StrandedGenomicPositionTree();
-				StrandedGenomicPositionTree splice3ps = new StrandedGenomicPositionTree();
-
-				// find all splice junctions
-				//				List<AnnotatedRegion> splices = prepareSpliceJunctions(sfr, sj, r.chr, r.start, r.end, r.strand);
-				for(AnnotatedRegion splice : sj.overlappingRegions(r.chr, r.start, r.end, r.strand)){
-					//				for(AnnotatedRegion splice : splices){
-					StrandedGenomicIntervalSet exonIntervals = new StrandedGenomicIntervalSet();
-					exonIntervals.add(splice.chr, splice.start-1, splice.start-1, splice.strand);
-					exonIntervals.add(splice.chr, splice.end+1, splice.end+1, splice.strand);
-					writeTranscript(exonIntervals, models);
-					//					models.write(splice.chr, splice.start, splice.end, splice.strand);
-					splice5ps.add(splice.chr, splice.get5Prime(), splice.strand);
-					splice3ps.add(splice.chr, splice.get3Prime(), splice.strand);
-
-					Map<String,Object> attributes = new HashMap<String, Object>();
-					attributes.put("gene_id", splice.toString());
-					attributes.put("transcript_id", splice.toString());
-					attributes.put("type", "splice_junction");
-
-					assembly.write("exon", splice.chr, splice.start-1, splice.start-1, splice.strand, AnnotatedRegion.GTFAttributeString(attributes));
-					assembly.write("exon", splice.chr, splice.end+1, splice.end+1, splice.strand, AnnotatedRegion.GTFAttributeString(attributes));
-				}
-
-				// find all internal exons
-				for(AnnotatedRegion splice : sj.overlappingRegions(r.chr, r.start, r.end, r.strand)){
-					AnnotatedRegion exon5p = splice3ps.getClosestUpstream(splice.chr, splice.get5Prime(), splice.strand);
-					if(exon5p!=null){
-						int s5p = splice.get5Prime();
-						int e5p = exon5p.start;
-						if(!exons.contains(r.chr, Math.min(s5p,e5p)+1, Math.max(s5p,e5p)-1, r.strand)){
-							{
-								Map attributes = new HashMap();
-								attributes.put("type", "internal");
-								transcribed_fragments.write("exon", r.chr, Math.min(s5p,e5p)+1, Math.max(s5p,e5p)-1, r.strand, AnnotatedRegion.GTFAttributeString(attributes));
-								assembly.write("exon", r.chr, Math.min(s5p,e5p)+1, Math.max(s5p,e5p)-1, r.strand, AnnotatedRegion.GTFAttributeString(attributes));
-							}
-
-							models.write(r.chr, Math.min(s5p,e5p)+1, Math.max(s5p,e5p)-1, r.strand);
-							exons.add(r.chr, Math.min(s5p,e5p)+1, Math.max(s5p,e5p)-1, r.strand);
-						}
-					}
-				}
-
-				// find all terminal exons
-				List<AnnotatedRegion> ends3p = r.isNegativeStrand()?increases:decreases;
-				List<AnnotatedRegion> ends5p = !r.isNegativeStrand()?increases:decreases;
-
-				// the 3' exon
-				// since there will be some error due to granularity of SCM, don't forget to add a half-width
-				// splice junctions also have to be adjusted by one
-				int i3p = r.isNegativeStrand()?-1:1;
-				int i5p = !r.isNegativeStrand()?-1:1;
-
-				for(AnnotatedRegion end3p : ends3p){
-
-					AnnotatedRegion transcriptBoundary = splice3ps.getClosestUpstream(r.chr, end3p.start, r.strand);
-					if(transcriptBoundary!=null){
-						int s5p = end3p.start;
-						int e5p = transcriptBoundary.start;
-						if(!exons.contains(r.chr, Math.min(s5p,e5p), Math.max(s5p,e5p), r.strand)){
-
-							{	
-								Map attributes = new HashMap();
-								attributes.put("type", "exon3p");
-								transcribed_fragments.write("exon", r.chr, Math.min(s5p+hw,e5p)+1, Math.max(s5p+hw,e5p)-1, r.strand, AnnotatedRegion.GTFAttributeString(attributes));
-							}
-							{
-								Map attributes = new HashMap();
-								attributes.put("type", "3'end");
-								assembly.write("exon", r.chr, s5p+hw, s5p+hw, r.strand, AnnotatedRegion.GTFAttributeString(attributes));	
-							}
-
-							models.write(r.chr, Math.min(s5p+hw,e5p+i3p), Math.max(s5p+hw,e5p+i3p), r.strand);
-							exons.add(r.chr, Math.min(s5p,e5p), Math.max(s5p,e5p), r.strand);
-						}
-					}
-				}
-				// the 5' exon
-				for(AnnotatedRegion end5p : ends5p){
-
-					AnnotatedRegion transcriptBoundary = splice5ps.getClosestDownstream(r.chr, end5p.start, r.strand);
-					if(transcriptBoundary!=null){
-						int s5p = end5p.start;
-						int e5p = transcriptBoundary.start;
-						if(!exons.contains(r.chr, Math.min(s5p,e5p), Math.max(s5p,e5p), r.strand)){
-							{
-								Map attributes = new HashMap();
-								attributes.put("type", "exon5p");
-								transcribed_fragments.write("exon", r.chr, Math.min(s5p+hw,e5p)+1, Math.max(s5p+hw,e5p)-1, r.strand, AnnotatedRegion.GTFAttributeString(attributes));
-							}
-							{
-								Map attributes = new HashMap();
-								attributes.put("type", "5'end");
-								assembly.write("exon", r.chr, s5p+hw, s5p+hw, r.strand, AnnotatedRegion.GTFAttributeString(attributes));	
-							}
-							models.write(r.chr, Math.min(s5p+hw,e5p+i5p), Math.max(s5p+hw,e5p+i5p), r.strand);
-							exons.add(r.chr, Math.min(s5p,e5p), Math.max(s5p,e5p), r.strand);
-						}
-					}
-				}
-
-				//				for(AnnotatedRegion increase : increases){
-				//					for(AnnotatedRegion decrease : decreases){
-				//						if(increase.start < decrease.end){
-				//
-				//							List<AnnotatedRegion> splices = prepareSpliceJunctions(sfr, sj, r.chr, increase.start, decrease.end, r.strand);
-				//
-				//							enumerateExons(r.chr, increase.start, decrease.end, r.strand, splices, models);
-				//
-				//							//					bw.write(r.chr, increase.start, decrease.end, '.');
-				//						}
-				//					}
-				//				}
-			}
-		}
-		bw_i.close();
-		bw_d.close();
-		bw.close();
-		transcribed_fragments.close();
-		//					gw.close();
-	}
-
 
 }
diff --git a/IsoSCM/src/processing/FindSpliceJunctions.java b/IsoSCM/src/processing/FindSpliceJunctions.java
index 82a4303..2edbb4f 100644
--- a/IsoSCM/src/processing/FindSpliceJunctions.java
+++ b/IsoSCM/src/processing/FindSpliceJunctions.java
@@ -7,39 +7,33 @@
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.math3.special.Beta;
-
-import filter.ComposableFilter;
-import filter.ConsumingReadFilter;
-import filter.Counter;
-import filter.Echo;
-import filter.MismatchFilter;
-import filter.MultiMappingfilter;
-import filter.SAMRecordStrandednessFilter;
-import filter.Transformer;
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMFileReader;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMRecordIterator;
-import net.sf.samtools.SAMRecord.SAMTagAndValue;
+
+import org.apache.commons.math3.special.Beta;
+
 import tools.AnnotatedRegion;
 import tools.BAMTools;
-import tools.GenomicIntervalSet;
-import tools.IntervalTools;
-import tools.StrandedGenomicIntervalSet;
-import tools.StrandedGenomicIntervalTree;
-import tools.Strandedness;
 import tools.BEDTools.BEDWriter;
 import tools.GTFTools.GTFWriter;
+import tools.IntervalTools;
 import tools.ParseBed.BEDIterator;
 import tools.ParseGTF.TranscriptIterator;
+import tools.StrandedGenomicIntervalSet;
+import tools.StrandedGenomicIntervalTree;
+import tools.Strandedness;
 import util.Util;
-import util.Util.Criteria;
 import util.Util.ExtremeObjectTracker;
 import util.Util.MapCounter;
-import util.Util.MapSet;
+import filter.ComposableFilter;
+import filter.ConsumingReadFilter;
+import filter.Counter;
+import filter.MultiMappingfilter;
+import filter.SAMRecordStrandednessFilter;
 
 public class FindSpliceJunctions {
 
diff --git a/IsoSCM/src/processing/SlidingWindow.java b/IsoSCM/src/processing/SlidingWindow.java
index 0c5d0b8..5535039 100644
--- a/IsoSCM/src/processing/SlidingWindow.java
+++ b/IsoSCM/src/processing/SlidingWindow.java
@@ -8,8 +8,6 @@
 import java.util.List;
 import java.util.Map;
 
-import net.sf.samtools.Cigar;
-import net.sf.samtools.CigarElement;
 import net.sf.samtools.SAMFileReader;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMRecordIterator;
diff --git a/IsoSCM/src/splicegraph/EnumerateTranscriptModels.java b/IsoSCM/src/splicegraph/EnumerateTranscriptModels.java
index 27ed5c7..db3a92b 100644
--- a/IsoSCM/src/splicegraph/EnumerateTranscriptModels.java
+++ b/IsoSCM/src/splicegraph/EnumerateTranscriptModels.java
@@ -12,110 +12,25 @@
 import java.util.Map;
 import java.util.Set;
 
-import processing.FindSpliceJunctions;
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMFileReader;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMRecordIterator;
+import processing.FindSpliceJunctions;
 import tools.AnnotatedRegion;
+import tools.BEDTools.BEDWriter;
 import tools.GenomicIntervalSet;
-import tools.GenomicIntervalTree;
+import tools.ParseBed.BEDIterator;
 import tools.StrandedGenomicIntervalSet;
 import tools.StrandedGenomicIntervalTree;
-import tools.BEDTools.BEDWriter;
-import tools.ParseBed.BEDIterator;
-import tools.ParseGTF.TranscriptIterator;
-import tools.StrandedGenomicPositionTree;
 import util.Util;
 import util.Util.MapCounter;
 
 public class EnumerateTranscriptModels {
 
-	public static GenomicIntervalTree<Map<String,Object>> intervals(String gtf) throws FileNotFoundException {
-		StrandedGenomicIntervalTree<Map<String,Object>> git = new StrandedGenomicIntervalTree<Map<String,Object>>();
-		GenomicIntervalTree<Map<String,Object>> git2 = new GenomicIntervalTree<Map<String,Object>>();
-
-		TranscriptIterator ti = new TranscriptIterator(gtf);
-
-		for(AnnotatedRegion r : ti){
-			boolean isAdded=false;
-			for(AnnotatedRegion added : git.overlappingRegions(r.chr, r.start, r.end,r.strand)){
-
-				isAdded |= r.start== added.start && r.end ==added.end;
-
-			}
-
-			if(!isAdded){
-				git.add(r.chr, r.start, r.end,r.strand);
-				System.out.println(r);
-			}
-		}
-
-		for(AnnotatedRegion r : git){
-			if(r.strand=='+'){
-				boolean containsAntisense = false;
-				for(AnnotatedRegion added : git.overlappingRegions(r.chr, r.start, r.end,r.isNegativeStrand()?'+':'-')){
-					containsAntisense |= r.start== added.start && r.end ==added.end;
-				}
-				if(containsAntisense)
-					git2.add(r.chr, r.start, r.end);
-			}
-		}
-
-		// find those that are on both strands,
-		// delete them, those that remain are those that are only on one strand.
-		// for each of these, assign them to the closest up/down-stream segment boundary.
-
-		StrandedGenomicPositionTree segmentBoundaries = new StrandedGenomicPositionTree();
-
-		for(AnnotatedRegion r : git2){
-			segmentBoundaries.add(r.chr, r.start, '+');
-			segmentBoundaries.add(r.chr, r.end, '-');
-			for(char strand : Util.list('+','-'))
-				git.remove(r.chr, r.start, r.end, strand);
-		}
-
-
-
-		for(AnnotatedRegion r : git){
-			segmentBoundaries.add(r.chr, r.get5Prime(), r.strand);
-
-			//			System.out.println(r);
-		}
-
-		for(AnnotatedRegion r : git){
-			AnnotatedRegion closest = segmentBoundaries.getClosestUpstream(r.chr, r.get3Prime(), r.isNegativeStrand()?'+':'-');
-			//			System.out.println(r);
-			//			System.out.printf("\t%s\n", closest);
-			if(closest!=null){
-
-				boolean isAdded=false;
-				for(AnnotatedRegion added : git2.overlappingRegions(r.chr, Math.min(r.start,closest.start), Math.max(r.end,closest.end))){
-
-					isAdded |= Math.min(r.start,closest.start) == added.start && Math.max(r.end,closest.end) == added.end;
-
-				}
-
-				if(!isAdded){
-					System.out.printf("(%s,%s)\t%s:%d-%d\n",r,closest,r.chr,Math.min(r.start,closest.start), Math.max(r.end,closest.end));
-
-					git2.add(r.chr, Math.min(r.start,closest.start), Math.max(r.end,closest.end));
-				}
-			}
-		}
-
-		BEDWriter bw = new BEDWriter("test3.bed");
-		for(AnnotatedRegion r : git2){
-			bw.write(r.chr, r.start, r.end, '.');
-		}
-		bw.close();
-
-
-		return git2;
-	}
-
+	
 	public static void filterRegion(String chr, int start, int end, String segmentationBed) throws FileNotFoundException{
 		BEDIterator bi = new BEDIterator("test3.bed");
 		BEDWriter bw = new BEDWriter(segmentationBed);