Skip to content

Commit

Permalink
Merge branch 'bug_fixes'
Browse files Browse the repository at this point in the history
  • Loading branch information
ch4rr0 committed Jan 16, 2022
2 parents 1c51df4 + 2540fd6 commit 2d2c4db
Show file tree
Hide file tree
Showing 26 changed files with 1,049 additions and 858 deletions.
1 change: 1 addition & 0 deletions .github/workflows/bowtie2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ jobs:
run: |
make allall
make simple-test
make random-test
2 changes: 1 addition & 1 deletion BOWTIE2_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.4.4
2.4.5
4 changes: 4 additions & 0 deletions MANUAL
Original file line number Diff line number Diff line change
Expand Up @@ -1924,6 +1924,10 @@ format:
Fields are separated by tabs. Colorspace is always set to 0 for Bowtie
2.

-o/--output <filename>

Save output to user-specified filename (default: stdout)

-v/--verbose

Print verbose output (for debugging).
Expand Down
9 changes: 9 additions & 0 deletions MANUAL.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -2584,6 +2584,14 @@ names and lengths of the input sequences. The summary has this format:

Fields are separated by tabs. Colorspace is always set to 0 for Bowtie 2.

</td></tr><tr><td id="bowtie2-inspect-options-o">

-o/--output <filename>

</td><td>

Save output to user-specified filename (default: stdout)

</td></tr><tr><td>

-v/--verbose
Expand Down Expand Up @@ -2913,6 +2921,7 @@ warnings due to the case insensitive nature of markdown URLs -->
[`-m`]: #bowtie2-options-m
[`-n`/`--names`]: #bowtie2-inspect-options-n
[`-o`/`--offrate`]: #bowtie2-options-o
[`-o`/`--output`]: #bowtie2-inspect-options-o
[`-o`]: #bowtie2-options-o
[`-p`/`--packed`]: #bowtie2-build-options-p
[`-p`/`--threads`]: #bowtie2-options-p
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ ifneq (,$(findstring MINGW,$(shell uname)))
WINDOWS := 1
MINGW := 1
# POSIX memory-mapped files not currently supported on Windows
endif

ifeq (1, $(WINDOWS))
BOWTIE_MM :=
BOWTIE_SHARED_MEM :=
endif
Expand Down Expand Up @@ -252,7 +255,7 @@ GENERAL_LIST := $(wildcard scripts/*.sh) \

ifeq (1,$(WINDOWS))
BOWTIE2_BIN_LIST := $(BOWTIE2_BIN_LIST) bowtie2.bat bowtie2-build.bat bowtie2-inspect.bat
CXXFLAGS += -static-libgcc -static-libstdc++
CXXFLAGS += -static-libgcc -static-libstdc++ -static
endif

# This is helpful on Windows under MinGW/MSYS, where Make might go for
Expand Down
1,106 changes: 586 additions & 520 deletions NEWS

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

<!-- badges: start -->
![Github Actions](https://github.com/BenLangmead/bowtie2/actions/workflows/bowtie2.yml/badge.svg)
[![Generic badge](https://img.shields.io/badge/version-2.4.4-green.svg)](https://shields.io/)
[![Build Status](https://travis-ci.org/BenLangmead/bowtie2.svg?branch=master)](https://travis-ci.org/BenLangmead/bowtie2)
[![Generic badge](https://img.shields.io/badge/version-2.4.5-green.svg)](https://shields.io/)
<!-- [![Build Status](https://travis-ci.org/BenLangmead/bowtie2.svg?branch=master)](https://travis-ci.org/BenLangmead/bowtie2) -->
[![License: GPL v3](https://img.shields.io/badge/license-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
<!--badges: end -->

Expand Down
6 changes: 6 additions & 0 deletions aln_sink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2114,6 +2114,12 @@ void AlnSinkSam::appendMate(
samc_.printPreservedOptFlags(o, rd);
samc_.printComment(o, rd.name);
o.append('\n');
if(samc_.passthrough()) {
// Original read string
samc_.printOptFieldNewlineEscapedZ(o, rd.readOrigBuf);
o.append('\n');
}

}

#ifdef ALN_SINK_MAIN
Expand Down
41 changes: 18 additions & 23 deletions blockwise_sa.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <stdint.h>
#include <stdlib.h>
#include <future>
#include <iostream>
#include <sstream>
#include <thread>
Expand All @@ -38,6 +39,7 @@
#include "ds.h"
#include "mem_ids.h"
#include "word_io.h"
#include "threadpool.h"

using namespace std;

Expand Down Expand Up @@ -198,6 +200,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
KarkkainenBlockwiseSA(const TStr& __text,
TIndexOffU __bucketSz,
int __nthreads,
thread_pool& pool,
uint32_t __dcV,
uint32_t __seed = 0,
bool __sanityCheck = false,
Expand All @@ -208,6 +211,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
InorderBlockwiseSA<TStr>(__text, __bucketSz, __sanityCheck, __passMemExc, __verbose, __logger),
_sampleSuffs(EBWTB_CAT),
_nthreads(__nthreads),
_pool(pool),
_itrBucketIdx(0),
_cur(0),
_dcV(__dcV),
Expand All @@ -218,17 +222,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
_done(NULL)
{ _randomSrc.init(__seed); reset(); }

~KarkkainenBlockwiseSA() throw()
{
if(_threads.size() > 0) {
for (size_t tid = 0; tid < _threads.size(); tid++) {
_threads[tid]->join();
delete _threads[tid];
}
}
if (_done != NULL)
delete[] _done;
}
~KarkkainenBlockwiseSA() throw() {}

/**
* Allocate an amount of memory that simulates the peak memory
Expand All @@ -253,7 +247,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
{
// Launch threads if not
if(this->_nthreads > 1) {
if(_threads.size() == 0) {
if(_tparams.size() == 0) {
_done = new volatile bool[_sampleSuffs.size() + 1];
for (size_t i = 0; i < _sampleSuffs.size() + 1; i++) {
_done[i] = false;
Expand All @@ -263,9 +257,11 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
for(int tid = 0; tid < this->_nthreads; tid++) {
_tparams[tid].first = this;
_tparams[tid].second = tid;
_threads.push_back(new thread(nextBlock_Worker((void*)&_tparams[tid])));
if (tid == _nthreads - 1)
nextBlock_Worker((void *)&_tparams[tid]);
else
_pool.submit(nextBlock_Worker((void *)&_tparams[tid]));
}
assert_eq(_threads.size(), (size_t)this->_nthreads);
}
}
if(this->_itrPushedBackSuffix != OFF_MASK) {
Expand Down Expand Up @@ -396,7 +392,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
assert(_dc.get() == NULL);
if(_dcV != 0) {
_dc.init(new TDC(this->text(), _dcV, this->verbose(), this->sanityCheck()));
_dc.get()->build(this->_nthreads);
_dc.get()->build(_pool, this->_nthreads);
}
// Calculate sample suffixes
if(this->bucketSz() <= this->text().length()) {
Expand Down Expand Up @@ -436,6 +432,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {

EList<TIndexOffU> _sampleSuffs; /// sample suffixes
int _nthreads; /// # of threads
thread_pool& _pool;
TIndexOffU _itrBucketIdx;
TIndexOffU _cur; /// offset to 1st elt of next block
const uint32_t _dcV; /// difference-cover periodicity
Expand Down Expand Up @@ -522,7 +519,7 @@ struct BinarySortingParam {
};

template<typename TStr>
class BinarySorting_worker {
class BinarySorting_worker {
void *vp;

public:
Expand Down Expand Up @@ -638,7 +635,7 @@ void KarkkainenBlockwiseSA<TStr>::buildSamples() {
// Iterate until all buckets are less than
while(--limit >= 0) {
TIndexOffU numBuckets = (TIndexOffU)_sampleSuffs.size()+1;
AutoArray<std::thread*> threads(this->_nthreads);
std::vector<std::future<void> > threads(_pool.size());
EList<BinarySortingParam<TStr> > tparams;
tparams.resize(this->_nthreads);
for(int tid = 0; tid < this->_nthreads; tid++) {
Expand All @@ -665,19 +662,17 @@ void KarkkainenBlockwiseSA<TStr>::buildSamples() {
tparams[tid].sampleSuffs = &_sampleSuffs;
tparams[tid].begin = (tid == 0 ? 0 : len / this->_nthreads * tid);
tparams[tid].end = (tid + 1 == this->_nthreads ? len : len / this->_nthreads * (tid + 1));
if(this->_nthreads == 1) {
if(this->_nthreads == 1 || tid == _nthreads - 1) {
BinarySorting_worker<TStr>((void*)&tparams[tid])();
} else {
threads[tid] = new std::thread(BinarySorting_worker<TStr>(((void*)&tparams[tid])));
threads[tid] = _pool.submit(BinarySorting_worker<TStr>(((void*)&tparams[tid])));
}
}

if(this->_nthreads > 1) {
for (int tid = 0; tid < this->_nthreads; tid++) {
threads[tid]->join();
for (int tid = 0; tid < _pool.size(); tid++) {
threads[tid].get();
}
for (int tid = 0; tid < this->_nthreads; tid++)
delete threads[tid];
}
EList<TIndexOffU>& bucketSzs = tparams[0].bucketSzs;
EList<TIndexOffU>& bucketReps = tparams[0].bucketReps;
Expand Down
55 changes: 39 additions & 16 deletions bowtie2-build
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,24 @@
along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
"""


import os
import sys
import gzip
import struct
import inspect
import logging
import argparse
import subprocess
from collections import deque

def get_gunzip_size(fn):
size = 0
with gzip.open(fn) as f:
while True:
data = f.read(8192)
size += len(data)
if not data:
break
return size

def main():
parser = argparse.ArgumentParser(add_help = False)
Expand All @@ -51,11 +61,6 @@ def main():
build_bin_spec = os.path.join(ex_path,build_bin_s)

script_options, argv = parser.parse_known_args()
print_help = False
argv = deque(argv)

if '-h' in argv or '--help' in argv:
print_help = True

if script_options.verbose:
logging.getLogger().setLevel(logging.INFO)
Expand All @@ -68,23 +73,41 @@ def main():
build_bin_spec += '-sanitized'
build_bin_l += '-sanitized'

fastas = []
if '-c' not in argv and len(argv) >= 2:
for index in reversed(range(len(argv) - 1)):
arg = argv[index]
if arg.startswith('-') or arg.isdigit():
break
fastas.insert(0, arg)
argv.remove(arg)
if fastas:
fastas = ','.join(fastas)
argv.insert(len(argv) - 1, fastas)

if script_options.large_index:
build_bin_spec = os.path.join(ex_path,build_bin_l)
elif len(argv) >= 2:
ref_fnames = argv[-2]
elif fastas:
tot_size = 0
for fn in ref_fnames.split(','):
for fn in fastas.split(','):
if os.path.exists(fn):
statinfo = os.stat(fn)
tot_size += statinfo.st_size
if fn.endswith('.gz') or fn.endswith(".Z"):
tot_size += get_gunzip_size(fn)
else:
statinfo = os.stat(fn)
tot_size += statinfo.st_size
if tot_size > small_index_max_size:
build_bin_spec = os.path.join(ex_path,build_bin_l)

argv.appendleft('basic-0')
argv.appendleft('--wrapper')
argv.appendleft(build_bin_spec)
if not os.path.exists(build_bin_spec):
sys.stderr.write('{0} does not exist, try running `[g]make {0}\'\n'.format(os.path.basename(build_bin_spec)))
sys.exit(1)

argv.insert(0, 'basic-0')
argv.insert(0, '--wrapper')
argv.insert(0, build_bin_spec)
logging.info('Command: %s' % ' '.join(argv))
sys.exit(subprocess.call(list(argv)))
sys.exit(subprocess.call(argv))

if __name__ == '__main__':
main()
30 changes: 19 additions & 11 deletions bt2_build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,17 @@ static void printUsage(ostream& out) {
out << "Usage: " << tool_name << " [options]* <reference_in> <bt2_index_base>" << endl
<< " reference_in comma-separated list of files with ref sequences" << endl
<< " bt2_index_base write " + gEbwt_ext + " data to files with this dir/basename" << endl
<< "*** Bowtie 2 indexes work only with v2 (not v1). Likewise for v1 indexes. ***" << endl
<< "*** Bowtie 2 indexes will work with Bowtie v1.2.3 and later. ***" << endl
<< "Options:" << endl
<< " -f reference files are Fasta (default)" << endl
<< " -c reference sequences given on cmd line (as" << endl
<< " <reference_in>)" << endl;
if(wrapper == "basic-0") {
out << " --large-index force generated index to be 'large', even if ref" << endl
<< " has fewer than 4 billion nucleotides" << endl
<< " --debug use the debug binary; slower, assertions enabled" << endl
<< " --sanitized use sanitized binary; slower, uses ASan and/or UBSan" << endl
<< " --verbose log the issued command" << endl;
<< " has fewer than 4 billion nucleotides" << endl
<< " --debug use the debug binary; slower, assertions enabled" << endl
<< " --sanitized use sanitized binary; slower, uses ASan and/or UBSan" << endl
<< " --verbose log the issued command" << endl;
}
out << " -a/--noauto disable automatic -p/--bmax/--dcv memory-fitting" << endl
<< " -p/--packed use packed strings internally; slower, less memory" << endl
Expand All @@ -166,16 +166,15 @@ static void printUsage(ostream& out) {
//<< (currentlyBigEndian()? "big":"little") << ")" << endl
<< " --seed <int> seed for random number generator" << endl
<< " -q/--quiet verbose output (for debugging)" << endl
<< " -h/--help print detailed description of tool and its options" << endl
<< " --usage print this usage message" << endl
<< " --h/--help print this message and quit" << endl
<< " --version print version information and quit" << endl
;
if(wrapper.empty()) {
cerr << endl
<< "*** Warning ***" << endl
<< "'" << tool_name << "' was run directly. It is recommended "
<< "that you run the wrapper script 'bowtie2-build' instead."
<< endl << endl;
<< "'" << tool_name << "' was run directly. It is recommended "
<< "that you run the wrapper script 'bowtie2-build' instead."
<< endl << endl;
}
}

Expand Down Expand Up @@ -360,6 +359,14 @@ static void deleteIdxFiles(
}
}

static void renameIdxFiles() {
for (size_t i = 0; i < filesWritten.size(); i++) {
std::string oldName = filesWritten[i] + ".tmp";
std::cerr << "Renaming " << oldName << " to " << filesWritten[i] << std::endl;
std::rename(oldName.c_str(), filesWritten[i].c_str());
}
}

/**
* Drive the index construction process and optionally sanity-check the
* result.
Expand Down Expand Up @@ -686,7 +693,6 @@ int bowtie_build(int argc, const char **argv) {
if(packed) {
driver<S2bDnaString>(infile, infiles, outfile + ".rev", true, reverseType);
}
return 0;
} catch(std::exception& e) {
cerr << "Error: Encountered exception: '" << e.what() << "'" << endl;
cerr << "Command: ";
Expand All @@ -704,5 +710,7 @@ int bowtie_build(int argc, const char **argv) {
deleteIdxFiles(outfile, writeRef || justRef, justRef);
return e;
}
renameIdxFiles();
return 0;
}
}
Loading

0 comments on commit 2d2c4db

Please sign in to comment.