Skip to content
This repository has been archived by the owner on Aug 29, 2023. It is now read-only.

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
heathsc committed Jun 4, 2019
2 parents 13ab246 + 616beff commit c299b93
Show file tree
Hide file tree
Showing 13 changed files with 59 additions and 40 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ RUN apt-get install -y libpng-dev uuid-dev libmysqlclient-dev
RUN apt-get install -y python3 build-essential git python3-pip wget pigz
RUN apt-get install -y zlib1g-dev libbz2-dev gsl-bin libgsl0-dev
RUN apt-get install -y libncurses5-dev liblzma-dev libssl-dev libcurl4-openssl-dev
RUN pip3 install matplotlib multiprocess
RUN pip3 install 'matplotlib<3.0' multiprocess
RUN mkdir /usr/local/build; cd /usr/local/build
RUN git clone --recursive https://github.com/heathsc/gemBS.git
RUN (cd gemBS; python3 setup.py install)
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ Documentation can be found at
Changelog:
----------

3.3.0 Make new release for IHEC
3.3.0 Switch conversion default in IHEC_standard configuration to 0.01,0.05 rather than auto, which can give odd results if conversion controls not present or not working correctly
3.3.0 Fix bug where conversion parameters could be ignored
3.2.13 Fix formatting bug in mextr with multiple samples (not triggered in normal gemBS use)
3.2.12 Ensure that conversion statistics are correctly calculated for non-stranded or reverse conversion protocols
3.2.11 Introduce reverse_conversion option for mapping where read 1 is G2A converted and read 2 is C2T converted
3.2.10 Correct regex patch for single end reads
3.2.9 Update Singularity and Dockerfile recipes to allow kemp utils to be built correctly
3.2.9 Make setup.py and gemBS/commands.py read the version information from gemBS/version.py, so ensuring consistency
Expand Down
4 changes: 2 additions & 2 deletions Singularity
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ From: ubuntu:xenial
(mkdir /ext && cd /ext && mkdir disk1 disk2 disk3 disk4 disk5 disk6 disk7 disk8 disk9)
apt-get update
apt-get install -y libpng-dev uuid-dev libmysqlclient-dev
apt-get install -y python3 build-essential git python3-pip wget pigz
apt-get install -y python3 build-essential git python3-pip wget pigz
apt-get install -y zlib1g-dev libbz2-dev gsl-bin libgsl0-dev
apt-get install -y libncurses5-dev liblzma-dev libssl-dev libcurl4-openssl-dev
pip3 install matplotlib multiprocess
pip3 install 'matplotlib<3.0' multiprocess
mkdir /usr/local/build; cd /usr/local/build
git clone --recursive https://github.com/heathsc/gemBS.git
(cd gemBS; python3 setup.py install)
Expand Down
22 changes: 14 additions & 8 deletions gemBS/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@ def makeChromSizes(index_name=None,output=None):
raise ValueError("Info file {} (normally generated by gem-indexer) does not exist".format(info_file))

def mapping(name=None,index=None,fliInfo=None,inputFiles=None,ftype=None,
read_non_stranded=False,outfile=None,
read_non_stranded=False,reverse_conv=False,outfile=None,
paired=False,tmpDir="/tmp",threads=1,under_conversion=None, over_conversion=None):
""" Start the GEM Bisulfite mapping on the given input.
Expand All @@ -670,6 +670,7 @@ def mapping(name=None,index=None,fliInfo=None,inputFiles=None,ftype=None,
inputFiles -- List of input files
ftype -- input file type
read_non_stranded -- Read non stranded
reverse_conv - Reverse the normal conversion
outputDir -- Directory to store the Bisulfite mapping results
paired -- Paired End flag
tmpDir -- Temporary directory to perform sorting operations
Expand Down Expand Up @@ -699,9 +700,15 @@ def mapping(name=None,index=None,fliInfo=None,inputFiles=None,ftype=None,
#Paired End
if paired:
mapping.append("-p")
#Non Stranded
#Conversion
if read_non_stranded:
mapping.extend(["--bisulfite-read","non-stranded"])
mapping.extend(["--bisulfite-conversion","non-stranded"])
else:
if reverse_conv:
mapping.extend(["--bisulfite-conversion","inferred-G2A-C2T"])
else:
mapping.extend(["--bisulfite-conversion","inferred-C2T-G2A"])

#Number of threads
mapping.extend(["-t",threads])
#Mapping stats
Expand Down Expand Up @@ -831,11 +838,10 @@ def prepare(self, sample, input_bam, chrom_list, output_bcf, report_file, contig
if self.haploid:
parameters_bscall.append('-1')
if self.conversion != None:
if self.conversion.lower() == "auto":
if sample in self.sample_conversion:
parameters_bscall.extend(['--conversion', self.sample_conversion[sample]])
else:
parameters_bscall.extend(['--conversion', self.conversion])
if self.conversion.lower() == "auto" and sample in self.sample_conversion:
parameters_bscall.extend(['--conversion', self.sample_conversion[sample]])
else:
parameters_bscall.extend(['--conversion', self.conversion])
if self.ref_bias != None:
parameters_bscall.extend(['--reference-bias', self.ref_bias])
#Thresholds
Expand Down
2 changes: 1 addition & 1 deletion gemBS/etc/gemBS_configs/IHEC_standard.conf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ right_trim = 0
keep_improper_pairs = False
keep_duplicates = False
haploid = False
conversion = auto
conversion = 0.01,0.05
remove_individual_bcfs = True

# Contigs smaller than contig_pool_limit will be called together
Expand Down
2 changes: 1 addition & 1 deletion gemBS/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def read(self, infile):
state = 0

known_var = {
'mapping': ('tmp_dir', 'threads', 'non_stranded', 'remove_individual_bams', 'underconversion_sequence', 'overconversion_sequence', 'bam_dir', 'sequence_dir'),
'mapping': ('tmp_dir', 'threads', 'non_stranded', 'reverse_conversion', 'remove_individual_bams', 'underconversion_sequence', 'overconversion_sequence', 'bam_dir', 'sequence_dir'),
'index': ('index', 'index_dir', 'reference', 'extra_references', 'reference_basename', 'nonbs_index', 'contig_sizes', 'threads', 'dbsnp_files', 'dbsnp_index', 'sampling_rate'),
'calling': ('bcf_dir', 'mapq_threshold', 'qual_threshold', 'left_trim', 'right_trim', 'threads', 'jobs', 'species', 'keep_duplicates', 'keep_improper_pairs',
'remove_individual_bcfs', 'haploid', 'reference_bias', 'conversion', 'contig_list', 'contig_pool_limit'),
Expand Down
33 changes: 20 additions & 13 deletions gemBS/production.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ def register(self,parser):
parser.add_argument('-T', '--type', dest="ftype", help='Type of data file (PAIRED, SINGLE, INTERLEAVED, STREAM, BAM)')
parser.add_argument('-p', '--paired-end', dest="paired_end", action="store_true", help="Input data is Paired End")
parser.add_argument('-r', '--remove', dest="remove", action="store_true", help='Remove individual BAM files after merging.', required=False)
parser.add_argument('-R', '--reverse-conversion', dest="reverse_conv", action="store_true", help='Perform G2A conversion on read 1 and C2T on read 2 rather than the reverse.', required=False)
parser.add_argument('-s', '--read-non-stranded', dest="read_non_stranded", action="store_true",
help='Automatically selects the proper C->T and G->A read conversions based on the level of Cs and Gs on the read.')
parser.add_argument('-u', '--underconversion-sequence', dest="underconversion_sequence", metavar="SEQUENCE", help='Name of unmethylated sequencing control.', default=None,required=False)
Expand Down Expand Up @@ -316,7 +317,10 @@ def run(self, args):

self.tmp_dir = self.jsonData.check(section='mapping',key='tmp_dir',arg=args.tmp_dir,dir_type=True)
self.threads = self.jsonData.check(section='mapping',key='threads',arg=args.threads,default='1')
self.reverse_conv = self.jsonData.check(section='mapping',key='reverse_conversion',arg=args.reverse_conv, boolean=True)
self.read_non_stranded = self.jsonData.check(section='mapping',key='non_stranded',arg=args.read_non_stranded, boolean=True)
if self.read_non_stranded:
self.reverse_conv = False
self.remove = self.jsonData.check(section='mapping',key='remove_individual_bams',arg=args.remove, boolean=True)
self.underconversion_sequence = self.jsonData.check(section='mapping',key='underconversion_sequence',arg=args.underconversion_sequence)
self.overconversion_sequence = self.jsonData.check(section='mapping',key='overconversion_sequence',arg=args.overconversion_sequence)
Expand Down Expand Up @@ -530,6 +534,7 @@ def do_mapping(self, fli):
if args.threads: com.extend(['-t',args.threads])
if args.tmp_dir: com.extend(['-d',args.tmp_dir])
if args.read_non_stranded: com.append('-s')
if args.reverse_conv: com.append('-R')
if args.underconversion_sequence: com.extend(['-u',args.underconversion_sequence])
if args.overconversion_sequence: com.extend(['-v',args.overconversion_sequence])
if not bis: com.append('--non-bs')
Expand All @@ -554,7 +559,7 @@ def do_mapping(self, fli):
tmp = os.path.dirname(outfile)

ret = mapping(name=fli,index=self.index,fliInfo=fliInfo,inputFiles=inputFiles,ftype=ftype,
read_non_stranded=self.read_non_stranded,
read_non_stranded=self.read_non_stranded, reverse_conv=self.reverse_conv,
outfile=outfile,paired=self.paired,tmpDir=tmp,threads=self.threads,
under_conversion=self.underconversion_sequence,over_conversion=self.overconversion_sequence)

Expand Down Expand Up @@ -672,16 +677,17 @@ def extra_log(self):
printer = logging.gemBS.gt

printer("------------ Mapping Parameters ------------")
printer("Sample barcode : %s", self.name)
printer("Data set : %s", self.curr_fli)
printer("No. threads : %s", self.threads)
printer("Index : %s", self.index)
printer("Paired : %s", self.paired)
printer("Read non stranded: %s", self.read_non_stranded)
printer("Type : %s", self.curr_ftype)
printer("Sample barcode : %s", self.name)
printer("Data set : %s", self.curr_fli)
printer("No. threads : %s", self.threads)
printer("Index : %s", self.index)
printer("Paired : %s", self.paired)
printer("Read non stranded : %s", self.read_non_stranded)
printer("Reverse conversion: %s", self.reverse_conv)
printer("Type : %s", self.curr_ftype)
if self.inputFiles:
printer("Input Files : %s", ','.join(self.inputFiles))
printer("Output dir : %s", self.curr_output_dir)
printer("Input Files : %s", ','.join(self.inputFiles))
printer("Output dir : %s", self.curr_output_dir)

printer("")

Expand Down Expand Up @@ -963,17 +969,17 @@ def run(self,args):
stats = SampleStats(name=sample,list_lane_stats=list_stats_lanes)
uc = stats.getUnderConversionRate()
oc = stats.getOverConversionRate()
if uc == "NA":
if uc == "NA" or uc < 0.0:
uc = 0.99
elif uc < 0.95:
uc = 0.95
elif uc > 0.999:
uc = 0.999
if oc == "NA":
if oc == "NA" or oc < 0.0:
oc = 0.05
elif oc > 0.15:
oc = 0.15
elif oc < 0.0:
elif oc < 0.001:
oc = 0.01
self.sample_conversion[sample] = "{:.4f},{:.4f}".format(1-uc,oc)

Expand Down Expand Up @@ -1100,6 +1106,7 @@ def run(self,args):
if args.qual_threshold != None: com2.extend(['-Q',str(args.mapq_threshold)])
if args.right_trim != None: com2.extend(['--right-trim',str(args.right_trim)])
if args.left_trim != None: com2.extend(['--left-trim',str(args.left_trim)])
if args.conversion != None: com2.extend(['--conversion',str(args.conversion)])
if args.keep_duplicates != None: com2.append('-u')
if args.ignore_duplicates != None: com2.append('-U')
if args.keep_unmatched != None: com2.append('-k')
Expand Down
4 changes: 2 additions & 2 deletions gemBS/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__VERSION_MAJOR = "3"
__VERSION_MINOR = "2"
__VERSION_SUBMINOR = "10"
__VERSION_MINOR = "3"
__VERSION_SUBMINOR = "0"
__VERSION__ = "%s.%s.%s" % (__VERSION_MAJOR, __VERSION_MINOR,__VERSION_SUBMINOR)
2 changes: 1 addition & 1 deletion tools/gem3-mapper
Submodule gem3-mapper updated 47 files
+1 −1 VERSION
+0 −1 include/align/pattern/pattern.h
+5 −16 include/archive/archive_text.h
+1 −1 include/archive/locator.h
+1 −9 include/archive/search/archive_search_se_parameters.h
+0 −2 include/filtering/region_profile/region_profile.h
+1 −4 include/matches/classify/matches_predictors.h
+0 −11 include/matches/match_trace.h
+0 −25 include/matches/matches.h
+1 −1 include/system/commons.h
+1 −0 include/text/dna_text.h
+0 −22 include/text/restriction_locate.h
+4 −33 include/text/restriction_text.h
+8 −8 include/text/sequence.h
+0 −6 src/align/pattern/pattern.c
+0 −1 src/approximate_search/approximate_search_filtering_adaptive.c
+0 −1 src/approximate_search/approximate_search_stages.c
+1 −1 src/archive/archive.c
+0 −85 src/archive/archive_text.c
+1 −1 src/archive/locator.c
+0 −9 src/archive/score/archive_score_logit.c
+21 −45 src/archive/score/archive_score_se.c
+2 −0 src/archive/search/archive_check.c
+2 −14 src/archive/search/archive_search_se_parameters.c
+4 −5 src/filtering/candidates/filtering_candidates_align.c
+2 −3 src/filtering/candidates/filtering_candidates_align_local.c
+1 −1 src/filtering/candidates/filtering_candidates_process.c
+0 −1 src/filtering/region/filtering_region_verify.c
+0 −1 src/filtering/region_profile/region_profile.c
+6 −23 src/filtering/region_profile/region_profile_adaptive.c
+193 −453 src/io/output_sam.c
+17 −62 src/mapper/mapper.c
+0 −1 src/mapper/mapper_io.c
+16 −49 src/matches/align/match_align_swg_chained.c
+1 −2 src/matches/align/match_alignment.c
+13 −30 src/matches/classify/matches_classify.c
+5 −46 src/matches/classify/matches_predictors.c
+0 −1 src/matches/classify/paired_matches_classify.c
+0 −6 src/matches/match_trace.c
+4 −198 src/matches/matches.c
+0 −1 src/matches/matches_cigar.c
+0 −1 src/matches/paired_matches.c
+16 −9 src/stats/report_stats.c
+0 −1 src/text/dna_text.c
+10 −374 src/text/restriction_text.c
+8 −18 src/tools/gem-retriever.c
+29 −73 src/tools/interface/mapper_arguments.c
1 change: 1 addition & 0 deletions tools/gemBS_plugins/calc_gt_prob.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ void calc_cpg_meth(args_t *args, int ns, cpg_prob *cpg, gt_meth *g1, gt_meth *g2
double wval[3] = {1.0, 1.0, 0.5};
double pval[3] = {1.0, 0.5, 1.0};
for(int ix = 0; ix < ns; ix++) {
if(g1[ix].skip || g2[ix].skip) continue;
int gt1 = g1[ix].max_gt;
int gt2 = g2[ix].max_gt;
cpg[ix].max_gt[0] = gt1;
Expand Down
2 changes: 1 addition & 1 deletion tools/gemBS_plugins/compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ static void init_compress(void) {
path = DEFAULT_PATH;
for (i = 0; i < COMPRESS_NONE; i++) {
compress_data.compress_suffix[i] = strdup(suff[i]);
compress_data.comp_path[i][0] = compress_data.comp_path[i][0] = NULL;
compress_data.comp_path[i][0] = compress_data.comp_path[i][1] = NULL;
}
int ix = 0;
while(pnames[ix][0] != NULL) {
Expand Down
12 changes: 5 additions & 7 deletions tools/gemBS_plugins/mextr.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <locale.h>
#include <unistd.h>
#include <string.h>
#include <inttypes.h>
Expand Down Expand Up @@ -32,7 +31,8 @@ static FILE *open_ofile(char *name, int compress, bool append) {
char *p = strrchr(tname, '.');
if(p == NULL || strcmp(p + 1, suffix)) {
// No, so we will have to add it
asprintf(&tname, "%s.%s", name, suffix);
tname = malloc(strlen(name) + strlen(suffix) + 2);
sprintf(tname, "%s.%s", name, suffix);
}
int i = child_open(WRITE, tname, cdata->comp_path[comp_ix][0]);
fp = fdopen(i, "w");
Expand Down Expand Up @@ -108,8 +108,9 @@ static void print_file_header(FILE *fp, int ns, char **names) {
fputs("Contig\tPos0\tPos1\tRef", fp);
for(int i = 0; i < ns; i++) {
char *name = names[i];
fprintf(fp, "\t%s:Call\t%s:Flags\t%s:Meth\t%s:non_conv\t%s:conv\t%s:support_call\t%s:total\n", name, name, name, name, name, name, name);
fprintf(fp, "\t%s:Call\t%s:Flags\t%s:Meth\t%s:non_conv\t%s:conv\t%s:support_call\t%s:total", name, name, name, name, name, name, name);
}
fputc('\n', fp);
}
}

Expand Down Expand Up @@ -461,11 +462,10 @@ static fmt_field_t tags[] = {
bcf1_t *process(bcf1_t *rec)
{
static int idx;
static int32_t curr_rid = -1, prev_pos, mq[2];
static int32_t curr_rid = -1, prev_pos;
static bool valid[2] = {false, false};
static bcf1_t prev_rec;

(void)setlocale(LC_NUMERIC, "C");
int ns = bcf_hdr_nsamples(args.hdr);
stats_t *st = args.stats;
if(st != NULL) st->n_sites++;
Expand Down Expand Up @@ -561,8 +561,6 @@ bcf1_t *process(bcf1_t *rec)
}
}
}
mq[idx] = -1;
if(mq_p != NULL) mq[idx] = (int32_t)(0.5 + sqrt(ms_mq / (double)tot_n));
valid[idx] = true;
// Here is the logic for deciding what we print
if(rec->rid != curr_rid) curr_rid = rec->rid;
Expand Down
7 changes: 4 additions & 3 deletions tools/gemBS_plugins/output.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ void output_cpg(args_t *args, bcf1_t *rec, fmt_field_t *tags, gt_meth *sample_gt
double z = 0.0;
gt_meth *g = sample_gt[idx ^ pos] + ix;
if(!g->skip) {
double m = get_meth(g, pos);
if(!pos) {
if(g->counts[5] >= args->min_nc && (g->counts[5] + g->counts[7] >= args->min_inform)) {
if(args->sel_mode == SELECT_HOM) z = exp(g->gt_prob[4]);
Expand Down Expand Up @@ -260,8 +259,9 @@ static char *rgb_tab[11] = { "0,255,0", "55,255,0", "105,255,0", "155,255,0", "2

void output_bedmethyl(args_t *args, bcf1_t *rec, fmt_field_t *tags, gt_meth *sample_gt[], int idx) {
static char *cx;
static int32_t cx_n,old_rid = 0xffffffff;
static int32_t cx_n,old_rid = 0xffffffff, old_pos = -1;

if(rec->rid == old_rid && rec->pos <= old_pos) return;
int ns = bcf_hdr_nsamples(args->hdr);
if(ns > 1) return;
gt_meth *g = sample_gt[idx];
Expand Down Expand Up @@ -310,7 +310,6 @@ void output_bedmethyl(args_t *args, bcf1_t *rec, fmt_field_t *tags, gt_meth *sam
FILE *fp = args->wigfile;
if(fp != NULL) {
if(rec->rid != old_rid) {
old_rid = rec->rid;
fprintf(fp, "variableStep chrom=%s\n", args->hdr->id[BCF_DT_CTG][rec->rid].key);
}
fprintf(fp, "%u\t%.4g\n", rec->pos + 1, 100.0 * m);
Expand All @@ -323,5 +322,7 @@ void output_bedmethyl(args_t *args, bcf1_t *rec, fmt_field_t *tags, gt_meth *sam
args->hdr->id[BCF_DT_CTG][rec->rid].key, rec->pos, rec->pos + 1, args->bedmethyl_desc, cov > 1000 ? 1000 : cov, strand,
rec->pos, rec->pos + 1, rgb_tab[(int)(m * 10.0 + 0.5)], cov, (int)(100.0 * m), rtmp, rtmp + 4, gq);
}
old_rid = rec->rid;
old_pos = rec->pos;
}
}

0 comments on commit c299b93

Please sign in to comment.