Skip to content

Commit

Permalink
Added HeteroplasmySplitterSRA for SRA .mito data by Peter
Browse files Browse the repository at this point in the history
  • Loading branch information
haansi committed Mar 2, 2018
1 parent 0443c2d commit 44ae967
Show file tree
Hide file tree
Showing 6 changed files with 312 additions and 7 deletions.
2 changes: 2 additions & 0 deletions src/main/java/genepi/mitolib/Tools.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import genepi.mitolib.lofreq.LoFreqReader;
import genepi.mitolib.splitter.HeteroplasmySplitter;
import genepi.mitolib.splitter.HeteroplasmySplitterRaw;
import genepi.mitolib.splitter.HeteroplasmySplitterSRA;


public class Tools extends Toolbox {
Expand All @@ -31,6 +32,7 @@ public static void main (String[] args){

tools.addTool("splitter", HeteroplasmySplitter.class);
tools.addTool("splitterRaw", HeteroplasmySplitterRaw.class);
tools.addTool("splitterSRA", HeteroplasmySplitterSRA.class);
tools.addTool("contChecker", ContaminatonChecker.class);
tools.addTool("lofreq", LoFreqReader.class);
tools.addTool("bam2var",BAMReader.class);
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/genepi/mitolib/bam/BAMReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ public <BaqAlt> int build(String genome, String outDirectory, String refer, doub
Integer cigarElementEnd = currentReferencePos + cigarElementLength;

while (cigarElementStart < cigarElementEnd) {

try{
if ((samRecord.getFlags() & 0x10) == 0x10) {
countDf++;
result[(cigarElementEnd - cigarElementLength) % (size - 1)][11]++;
Expand All @@ -332,6 +332,11 @@ public <BaqAlt> int build(String genome, String outDirectory, String refer, doub
}

cigarElementStart += cigarElementLength;

} catch (Exception e) {
System.out.println("issue with deletion");
break;
}
}

} else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,14 @@ public int build(String inHG2, String inVar, String outfile,double threshold, Ha
} catch (Exception e) {
// TODO: handle exception
}



double weight = readTableHaploGrep.getDouble("Overall_Rank"); //Rank
System.out.println("weight " + weight);
//centry.setMajorId(readTableHaploGrep.getString("weight")); //Major

centry.setSampleId(id.split("_maj")[0]);
centry.setMajorId(readTableHaploGrep.getString("Haplogroup")); //Major


String notfound = readTableHaploGrep.getString("Not_Found_Polys");
centry.setMajorRemaining(notfound.length() - notfound.replaceAll(" ", "").length());
Expand Down
27 changes: 27 additions & 0 deletions src/main/java/genepi/mitolib/objects/ContaminationEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ public class ContaminationEntry {
public int minorRemaining;
double majorFoundMean;
double minorFoundMean;
double majorHGscore;

double minorHGscore;
double contScore;
int contType;

Expand Down Expand Up @@ -94,5 +97,29 @@ public void setContType(int contType) {
this.contType = contType;
}

public String getMajorHG() {
return majorHG;
}
public void setMajorHG(String majorHG) {
this.majorHG = majorHG;
}
public String getMinorHG() {
return minorHG;
}
public void setMinorHG(String minorHG) {
this.minorHG = minorHG;
}
public double getMajorHGscore() {
return majorHGscore;
}
public void setMajorHGscore(double majorHGscore) {
this.majorHGscore = majorHGscore;
}
public double getMinorHGscore() {
return minorHGscore;
}
public void setMinorHGscore(double minorHGscore) {
this.minorHGscore = minorHGscore;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public CNVServer2Haplogrep(String[] args) {
public void init() {

System.out
.println("Split mitochondrial raw file from CNVServer into profiles for HaploGrep 2\n\n");
.println("Split mitochondrial raw file from CNVServer into profiles for HaploGrep 2 for contamination check\n\n");

}

Expand Down Expand Up @@ -181,9 +181,7 @@ public static int generateHSDfile(HashMap<String, ArrayList<CheckEntry>> hm, Str
int hetcounter=0;
ArrayList<CheckEntry> helpArray = hm.get(pair.getKey());
for (int i = 0; i < helpArray.size(); i++) {


if (helpArray.get(i).getREF().contains("-") || helpArray.get(i).getALT().contains("-")
if (helpArray.get(i).getREF().contains("-") || helpArray.get(i).getALT().contains("-")
|| helpArray.get(i).getREF().equals("N") || helpArray.get(i).getALT().length() > 1
|| helpArray.get(i).getREF().length() > 1) {
// skip indel, and 3107 on rCRS;
Expand Down
270 changes: 270 additions & 0 deletions src/main/java/genepi/mitolib/splitter/HeteroplasmySplitterSRA.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
package genepi.mitolib.splitter;

import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;

import genepi.base.Tool;
import genepi.io.table.TableReaderFactory;
import genepi.io.table.reader.ITableReader;
import genepi.mitolib.objects.CheckEntry;
import genepi.mitolib.objects.HSDEntry;
import genepi.mitolib.objects.HeaderNames;


public class HeteroplasmySplitterSRA extends Tool {

public HeteroplasmySplitterSRA(String[] args) {
super(args);
}
@Override
public void init() {

System.out
.println("Split mitochondrial pileup file from Peter SRA - in profiles for HaploGrep 2\n\n");

}

@Override
public void createParameters() {

addParameter("in", "input raw file (pileup) from Peter");
addParameter("vaf", "variant allele frequence ", DOUBLE);
}


@Override
public int run() {

String in = (String) getValue("in");
double vaf = (double) getValue("vaf");

try {
return build(in, vaf);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return -1;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return -1;
}
}

public int build(String variantfile, double vaf) throws MalformedURLException, IOException {
try {

// input = directoryListing[0].getAbsolutePath();

ITableReader idReader = TableReaderFactory.getReader(variantfile);


HashMap<String, ArrayList<CheckEntry>> hm = new HashMap<String, ArrayList<CheckEntry>>();
ArrayList<String> missingList = new ArrayList<>();
int missings=0;
int covAll=0;
int linecount=0;
String id = variantfile; //SampleID


try {
while (idReader.next()) {
CheckEntry entry = new CheckEntry();
linecount++;

entry.setID(id);

entry.setPOS(idReader.getInteger("Position")); //Pos
String Ref = idReader.getString("Ref");
entry.setREF(Ref); //Ref

int coverageUnfiltered= idReader.getInteger("Coverage");

int RefCount= (int) (idReader.getInteger("RefCount"));

int A= (int) (idReader.getInteger("A"));
int C= (int) (idReader.getInteger("C"));
int G= (int) (idReader.getInteger("G"));
int T= (int) (idReader.getInteger("T"));

int covTotal = A + C + G + T + RefCount;

switch(Ref){
case "A": A+=RefCount; break;
case "C": C+=RefCount; break;
case "G": G+=RefCount; break;
case "T": T+=RefCount; break;
}

TreeMap<Integer, String> map = new TreeMap<Integer, String>(Collections.reverseOrder());

map.put(A, "A");
map.put(C, "C");
map.put(G, "G");
map.put(T, "T");

if(covTotal>0){

covAll+=covTotal;
entry.setCOV(covTotal);
String firstBase = (String) map.values().toArray()[0];
double firstBasePerc= Double.valueOf((map.keySet().toArray()[0])+"") / (covTotal);

String secondBase = (String) map.values().toArray()[1];
double secondBasePerc= Double.valueOf( map.keySet().toArray()[1] +"") / (covTotal);

boolean isVariant=false;
if (firstBase.equals(entry.getREF()))
{
entry.setALT(secondBase);
entry.setVAF(secondBasePerc);
if (secondBasePerc >= vaf){
isVariant=true;
}

}
else{
entry.setALT(firstBase);
entry.setVAF(firstBasePerc);
isVariant=true;
}

if (isVariant){
if (hm.containsKey(id)) {
hm.get(id).add(entry);
} else if (hm.get(id) == null) {
hm.put(id, new ArrayList<CheckEntry>());
hm.get(id).add(entry);
}
}
}
else{
missingList.add(entry.getPOS()+"");
missings++;
}
}
idReader.close();
} catch (Exception e) {
e.printStackTrace();
}

FileWriter fw;
fw = new FileWriter(variantfile+".stats.txt");
// fw.write("SampleID\tlines\tQ30_filtered\tmeanCov_Q30\tsitesNotcovered");
fw.write(System.lineSeparator());
fw.write(id+ "\t"+linecount +"\t"+ missings +"\t"+((covAll>0) ? (covAll/(linecount-missings)): 0)+"\t"+missingList.toString());
fw.close();

int counter = generateHSDfile(hm, variantfile, vaf);

System.out.println(counter +" samples processed\n" +counter*2 +" profiles written");

} catch (Exception e) {
System.out.println("ERROR");
e.printStackTrace();
return -1;
}
return 0;
}

public static int generateHSDfile(HashMap<String, ArrayList<CheckEntry>> hm, String outfile, double vaf) throws Exception{
FileWriter fw, fwVariant;
System.out.println("outfile " + outfile);
fw = new FileWriter(outfile+".hsd");
fwVariant = new FileWriter(outfile+".txt");

// fw.write("SampleID\tRange\tHaplogroup\tPolymorphisms");
fw.write(System.lineSeparator());

fwVariant.write("SampleID\tPos\tRef\tVariant\tVariant-Level\tCoverage-Total");
fwVariant.write(System.lineSeparator());

int counter =0;
Iterator it = hm.entrySet().iterator();
while (it.hasNext()) {
counter++;
Map.Entry pair = (Map.Entry) it.next();
HSDEntry minor = new HSDEntry();
HSDEntry major = new HSDEntry();
minor.setID(pair.getKey() + "_maj");
minor.setRANGE("1-16569");
major.setID(pair.getKey() + "_min");
major.setRANGE("1-16569");
int hetcounter=0;
ArrayList<CheckEntry> helpArray = hm.get(pair.getKey());
for (int i = 0; i < helpArray.size(); i++) {
if (helpArray.get(i).getREF().contains("-") || helpArray.get(i).getALT().contains("-")
|| helpArray.get(i).getREF().equals("N") || helpArray.get(i).getALT().length() > 1
|| helpArray.get(i).getREF().length() > 1) {
// skip indel, and 3107 on rCRS;
} else {
if (helpArray.get(i).getVAF() < 0.5 ) {
fwVariant.write(pair.getKey() + "\t" + helpArray.get(i).getPOS() + "\t" + helpArray.get(i).getREF() + "\t" + helpArray.get(i).getALT() + "\t" + helpArray.get(i).getVAF() + "\t" + helpArray.get(i).getCOV() );
fwVariant.write(System.lineSeparator());

minor.appendPROFILES(helpArray.get(i).getPOS() + helpArray.get(i).getREF());
major.appendPROFILES(helpArray.get(i).getPOS() + helpArray.get(i).getALT());
hetcounter++;
} else if (helpArray.get(i).getVAF() >= 0.5 && helpArray.get(i).getVAF() < 1-vaf){
fwVariant.write(pair.getKey() + "\t" + helpArray.get(i).getPOS() + "\t" + helpArray.get(i).getREF() + "\t" + helpArray.get(i).getALT() + "\t" + helpArray.get(i).getVAF() + "\t" + helpArray.get(i).getCOV());
fwVariant.write(System.lineSeparator());

minor.appendPROFILES(helpArray.get(i).getPOS() + helpArray.get(i).getALT());
major.appendPROFILES(helpArray.get(i).getPOS() + helpArray.get(i).getREF());
hetcounter++;
}
else{
fwVariant.write(pair.getKey() + "\t" + helpArray.get(i).getPOS() + "\t" + helpArray.get(i).getREF() + "\t" + helpArray.get(i).getALT() + "\t"+ helpArray.get(i).getVAF() + "\t" + helpArray.get(i).getCOV());
fwVariant.write(System.lineSeparator());
minor.appendPROFILES(helpArray.get(i).getPOS() + helpArray.get(i).getALT());
major.appendPROFILES(helpArray.get(i).getPOS() + helpArray.get(i).getALT());
}

}
}
if (hetcounter>0){
fw.write(minor.getString());
fw.write(System.lineSeparator());
fw.write(major.getString());
fw.write(System.lineSeparator());
}
it.remove(); // avoids a ConcurrentModificationException
}
fw.close();
fwVariant.close();
return counter;
}

public static void main(String[] args){
String in = "data/peter/test.mito";
double vaf=0.01;

HeteroplasmySplitterSRA hsra = new HeteroplasmySplitterSRA(args);
try {
hsra.build(in, vaf);
in = "data/peter/ERR436061.mito";
hsra.build(in, vaf);
in = "data/peter/SRR1286931.mito";
hsra.build(in, vaf);
in = "data/peter/SRR1695629.mito";
hsra.build(in, vaf);
in = "data/peter/SRR924015.mito";
hsra.build(in, vaf);
in = "data/peter/SRR099317.mito";
hsra.build(in, vaf);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

0 comments on commit 44ae967

Please sign in to comment.