Skip to content
This repository has been archived by the owner on Aug 6, 2021. It is now read-only.

Commit

Permalink
Update adaptor ligase finding algorithm
Browse files Browse the repository at this point in the history
Removed unneeded, confusing parameters -cc and -al
Removed unused code
  • Loading branch information
koen authored and koen committed Jul 3, 2015
1 parent 4278e2d commit ae53a65
Show file tree
Hide file tree
Showing 10 changed files with 617 additions and 561 deletions.
17 changes: 7 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,6 @@ piece) AGATCGGAAGAGCG) currently only used for adaptor ligase see -al and when
* `-s` the posible distance of the start. This is the distance count from
the start of the read to the first basepair of the barcode or enzyme
(standard 0, maximum 20)
* `-cc` Checks the complete read for the enzyme (if false, stops at the
first possible enzyme cutsite) (use values true or false, standard is true). If
used, the sequence after the enzyme site is compared to the adaptors, if the
first basepairs of the sequence are compaired to the first basepairs of the
adaptor
* `-kc` Keep the enzyme cut-site remains (standard true) (example: enzyme
ApeKI and restriction site G^CWGC: "ApeKI \tab CAGC,CTGC")
* `-ea` Add enzymes from the given file (keeps the standard enzymes, and
Expand All @@ -151,10 +146,7 @@ cutsites are comma separeted)) (only use once, not use -er) (example: enzyme
ApeKI and restriction site G^CWGC: "ApeKI \tab CAGC,CTGC")
* `-er` Replace enzymes from the given file (do not keep the standard
enzymes) (enzyme file: no header, enzyme name tab cutsites (multiple cutsites
are comma separeted)) (only use once, not use -ea)
* `-al` check for adaptor ligase: no (for no check) or a positive integer
(starts at 0), for the number of mismatches (only checks 10 basepairs of
the adaptor), standard 1
are comma separeted)) (only use once, not use -ea)
* `-scb` Use self correcting barcodes (barcodes created by the
barcodeGenerator) (standard false)
* `-malg` the used algorithm to find mismatches and indels, possible
Expand Down Expand Up @@ -259,4 +251,9 @@ v1.1.2
the read is considered as unvalid (previous was first sample)

v1.1.3
* On request added the enzyme AvaII
* On request added the enzyme AvaII

v1.1.4
* Update adaptor ligase finding algorithm
* Removed unneeded, confusing parameters -cc and -al
* Removed unused code
380 changes: 380 additions & 0 deletions releases/GBSX_v1.1.4/GBSX_digest_v1.0.pl

Large diffs are not rendered by default.

Binary file not shown.
Binary file added releases/latest/GBSX_v1.1.4.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion src/be/uzleuven/gc/logistics/GBSX/GBSX.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
public class GBSX {

public static final boolean DEBUG = false;
public final static String VERSION = "GBSX v1.1.3";
public final static String VERSION = "GBSX v1.1.4";
private final static String LICENCE = "GPLv3";

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
public class GBSdemultiplex {

public static final boolean DEBUG = false;
public final static String VERSION = "GBSX v1.2";
public final static String VERSION = "GBSX v1.3";
public final static String LICENCE = "GPLv3";

/*
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -563,11 +563,11 @@ public String getParametersHelp(){
toHelp += "\t -n \t keep sequences where N occurs as nucleotide (standard true)" + "\n";
toHelp += "\t -ca \t the common adaptor used in the sequencing (standard (only first piece) AGATCGGAAGAGCG) currently only used for adaptor ligase see -al and when -rad is true) (minimum length is 10)" + "\n";
toHelp += "\t -s \t the posible distance of the start. This is the distance count from the start of the read to the first basepair of the barcode or enzyme (standard 0, maximum 20)" + "\n";
toHelp += "\t -cc \t Checks the complete read for the enzyme (if false, stops at the first possible enzyme cutsite) (use values true or false, standard is true) if used, the sequence after the enzyme site is compared to the adaptors, if the first basepairs of the sequence are compaired to the first basepairs of the adaptor" + "\n";
//toHelp += "\t -cc \t Checks the complete read for the enzyme (if false, stops at the first possible enzyme cutsite) (use values true or false, standard is true) if used, the sequence after the enzyme site is compared to the adaptors, if the first basepairs of the sequence are compaired to the first basepairs of the adaptor" + "\n";
toHelp += "\t -kc \t Keep the enzyme cut-site remains (standard true)" + "\n";
toHelp += "\t -ea \t Add enzymes from the given file (keeps the standard enzymes, and add the new) (enzyme file: no header, enzyme name tab cutsites (multiple cutsites are comma separeted)) (only use once, not use -er)" + "\n";
toHelp += "\t -er \t Replace enzymes from the given file (don't keep the standard enzymes) (enzyme file: no header, enzyme name tab cutsites (multiple cutsites are comma separeted)) (only use once, not use -ea)" + "\n";
toHelp += "\t -al \t check for adaptor ligase: no (for no check) or a positive integer (starts at 0), for the number of mismatches (only checks 10 basepairs of the adaptor), standard 1" + "\n";
//toHelp += "\t -al \t check for adaptor ligase: no (for no check) or a positive integer (starts at 0), for the number of mismatches (only checks 10 basepairs of the adaptor), standard 1" + "\n";
toHelp += "\t -scb \t Use self correcting barcodes (barcodes created by the barcodeGenerator) (standard false)" + "\n";
toHelp += "\t -malg \t the used algorithm to find mismatches and indels, possible algorithms (see README): " + "\n";
for (FindingsAlgorithms algorithm : FindingsAlgorithms.values()){
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package be.uzleuven.gc.logistics.GBSX.demultiplexer.model;

import be.uzleuven.gc.logistics.GBSX.utils.fastq.model.FastqRead;
import be.uzleuven.gc.logistics.GBSX.utils.sampleBarcodeEnzyme.model.Sample;

/**
*
* @author koen
*/
public class ProcessedFragment {

/**
* all information of the processed fragment:
* <br> the sample of the read,
* <br> read1 (as HashMap of FastqParts and String)
* <br> read2 (only pair-end) (as HashMap of FastqParts and String)
* <br> mismatch occured in finding the barcode/enzyme
*/

private Sample sample;
private FastqRead read1;
private FastqRead read2;
private int mismatch;
private String sequenceComment = "";

/**
* create a new ProcessedFragment (pair-end)
* @param sample Sample | the sample of the fragment
* @param read1 FastqRead | the first read of the fragment
* @param read2 FastqRead | the second read of the fragment (only pair-end)
* @param mismatch int | number of mismatches in the barcode/enzyme
* @param sequenceComment String | the comment on the cut of the sequence
*/
public ProcessedFragment(Sample sample, FastqRead read1, FastqRead read2, int mismatch, String sequenceComment){
this.sample = sample;
this.read1 = read1;
this.read2 = read2;
this.mismatch = mismatch;
this.sequenceComment = sequenceComment;
}

/**
* create a new ProcessedFragment (pair-end)
* @param sample Sample | the sample of the fragment
* @param read1 FastqRead | the first read of the fragment
* @param read2 FastqRead | the second read of the fragment (only pair-end)
* @param mismatch int | number of mismatches in the barcode/enzyme
*/
public ProcessedFragment(Sample sample, FastqRead read1, FastqRead read2, int mismatch){
this.sample = sample;
this.read1 = read1;
this.read2 = read2;
this.mismatch = mismatch;
}

/**
* create a new ProcessedFragment (single read)
* @param sample Sample | the sample of the fragment
* @param read1 FastqRead | the only read of the fragment
* @param mismatch int | number of mismatches in the barcode/enzyme
*/
public ProcessedFragment(Sample sample, FastqRead read1, int mismatch){
this.sample = sample;
this.read1 = read1;
this.read2 = null;
this.mismatch = mismatch;
}

/**
*
* @return Sample | the sample of this fragment
*/
public Sample getSample(){
return this.sample;
}

/**
*
* @return HashMap of FastqParts and String | the first read
*/
public FastqRead getRead1(){
return this.read1;
}

/**
*
* @return HashMap of FastqParts and String | the second read (only by pair-end)
*/
public FastqRead getRead2(){
return this.read2;
}

/**
*
* @return int | number of mismatches in barcode/enzyme
*/
public int getMismatch(){
return this.mismatch;
}

/**
*
* @return String | the comment on the cut of the reads
*/
public String getComment(){
return this.sequenceComment;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package be.uzleuven.gc.logistics.GBSX.demultiplexer.model;

import be.uzleuven.gc.logistics.GBSX.utils.sampleBarcodeEnzyme.model.Sample;
/**
*
* @author koen
*/
public class SampleBarcodeCombination {

/**
* a combination of a sample, a barcode and enzyme, the mismatches (between sequence and barcode/enzyme) and the start location of the barcode in the sequence
*/

private final Sample sample;
private final String enzymeCutSite;
private final int location;
private final int mismatches;
private final int lengthFoundBarcode;
private final int lengthFoundEnzyme;
/**
*
* @param sample Sample | the sample of this combination
* @param enzymeCutSite String | the used enzyme cutsite
* @param location int | the start location of the barcodeEnzyme
* @param mismatches int | the amount of mismatches
* @param lengthFoundBarcode int | length of the found barcode
* @param lengthFoundEnzyme int | length of the found enzyme
*/
public SampleBarcodeCombination(Sample sample, String enzymeCutSite, int location, int mismatches, int lengthFoundBarcode, int lengthFoundEnzyme){
this.sample = sample;
this.enzymeCutSite = enzymeCutSite;
this.location = location;
this.mismatches = mismatches;
this.lengthFoundBarcode = lengthFoundBarcode;
this.lengthFoundEnzyme = lengthFoundEnzyme;
}

/**
*
* @return Sample | the sample of this combination
*/
public Sample getSample(){
return this.sample;
}

/**
*
* @return String | the enzymeCutsite
*/
public String getEnzymeCutsite(){
return this.enzymeCutSite;
}

/**
*
* @return int | the location of the barcode + enzyme in a sequence
*/
public int getLocation(){
return this.location;
}

/**
*
* @return int | the amount of mismatches between the barcode + enzyme and the sequence
*/
public int getMismatches(){
return this.mismatches;
}

/**
*
* @return int | the length of the found enzyme
*/
public int getLengthFoundEnzyme(){
return this.lengthFoundEnzyme;
}

/**
*
* @return int | the lenght of the found barcode
*/
public int getLengthFoundBarcode(){
return this.lengthFoundBarcode;
}

}

0 comments on commit ae53a65

Please sign in to comment.