diff --git a/MANUAL b/MANUAL index 5328c74f6..236044795 100644 --- a/MANUAL +++ b/MANUAL @@ -1516,7 +1516,7 @@ by tabs; from left to right, the fields are: will truncate the name at the first whitespace character. This is similar to the behavior of other tools. The standard behavior of truncating at the first whitespace can be suppressed with - --sam-noqname-trunc at the expense of generating non-standard SAM. + --sam-no-qname-trunc at the expense of generating non-standard SAM. 2. Sum of all applicable flags. Flags relevant to Bowtie are: diff --git a/MANUAL.markdown b/MANUAL.markdown index 039118cd9..809243128 100644 --- a/MANUAL.markdown +++ b/MANUAL.markdown @@ -1927,8 +1927,8 @@ left to right, the fields are: If the read name contains any whitespace characters, Bowtie 2 will truncate the name at the first whitespace character. This is similar to the behavior of other tools. The standard behavior of truncating at the first - whitespace can be suppressed with `--sam-noqname-trunc` at the expense of - generating non-standard SAM. + whitespace can be suppressed with `--sam-no-qname-trunc` at the expense of + generating non-standard SAM. 2. Sum of all applicable flags. Flags relevant to Bowtie are: diff --git a/Makefile b/Makefile index 37c97e997..9b4453e7f 100644 --- a/Makefile +++ b/Makefile @@ -24,8 +24,8 @@ prefix = /usr/local bindir = $(prefix)/bin -INC = -LIBS = -lz +INC = $(if $(RELEASE_BUILD),-I$(CURDIR)/.include) +LIBS = $(LDFLAGS) $(if $(RELEASE_BUILD),-L$(CURDIR)/.lib) -lz GCC_PREFIX = $(shell dirname `which gcc`) GCC_SUFFIX = CC ?= $(GCC_PREFIX)/gcc$(GCC_SUFFIX) @@ -33,11 +33,11 @@ CPP ?= $(GCC_PREFIX)/g++$(GCC_SUFFIX) CXX ?= $(CPP) HEADERS = $(wildcard *.h) BOWTIE_MM = 1 -BOWTIE_SHARED_MEM = 0 +BOWTIE_SHARED_MEM = # Detect Cygwin or MinGW -WINDOWS = 0 -MINGW = 0 +WINDOWS = +MINGW = ifneq (,$(findstring MINGW,$(shell uname))) WINDOWS = 1 MINGW = 1 @@ -47,7 +47,7 @@ ifneq (,$(findstring MINGW,$(shell uname))) override EXTRA_FLAGS += -ansi endif -MACOS = 0 +MACOS = ifneq (,$(findstring Darwin,$(shell uname))) MACOS = 1 ifneq (,$(findstring 13,$(shell uname -r))) @@ -55,6 +55,9 @@ ifneq (,$(findstring Darwin,$(shell uname))) CC = clang override EXTRA_FLAGS += -stdlib=libstdc++ endif + ifeq (1, $(RELEASE_BUILD)) + EXTRA_FLAGS += -mmacosx-version-min=10.9 + endif endif POPCNT_CAPABILITY ?= 1 @@ -95,7 +98,7 @@ endif #default is to use Intel TBB ifneq (1,$(NO_TBB)) - LIBS += $(PTHREAD_LIB) -ltbb -ltbbmalloc_proxy + LIBS += $(PTHREAD_LIB) -ltbb -ltbbmalloc$(if $(RELEASE_BUILD),,_proxy) override EXTRA_FLAGS += -DWITH_TBB else LIBS += $(PTHREAD_LIB) @@ -433,9 +436,8 @@ bowtie2-src: $(SRC_PKG_LIST) rm -rf .src.tmp .PHONY: bowtie2-pkg -bowtie2-pkg: $(BIN_PKG_LIST) $(BOWTIE2_BIN_LIST) $(BOWTIE2_BIN_LIST_AUX) - $(eval HAS_TBB=$(shell strings bowtie2-align-l* | grep tbb)) - $(eval PKG_DIR=bowtie2-$(VERSION)$(if $(HAS_TBB),,-legacy)) +bowtie2-pkg: static-libs $(BIN_PKG_LIST) $(BOWTIE2_BIN_LIST) $(BOWTIE2_BIN_LIST_AUX) + $(eval PKG_DIR=bowtie2-$(VERSION)-$(if $(MACOS),macos,$(if $(MINGW),mingw,linux))-x86_64) chmod a+x scripts/*.sh scripts/*.pl rm -rf .bin.tmp mkdir -p .bin.tmp/$(PKG_DIR) @@ -494,13 +496,30 @@ random-test: all perl-deps .PHONY: perl-deps perl-deps: if [ ! -e .perllib.tmp ]; then \ - DL=$$([ `which wget` ] && echo wget -O- || echo curl -L) ; \ + DL=$$([ `which wget` ] && echo "wget --no-check-certificate -O-" || echo "curl -L") ; \ mkdir .perllib.tmp ; \ $$DL http://cpanmin.us | perl - -l $(CURDIR)/.perllib.tmp App::cpanminus local::lib ; \ eval `perl -I $(CURDIR)/.perllib.tmp/lib/perl5 -Mlocal::lib=$(CURDIR)/.perllib.tmp` ; \ cpanm --force Math::Random Clone Test::Deep Sys::Info ; \ fi +static-libs: + if [[ ! -d $(CURDIR)/.lib || ! -d $(CURDIR)/.inc ]]; then \ + mkdir $(CURDIR)/.lib $(CURDIR)/.include ; \ + fi ; \ + if [[ `uname` = "Darwin" ]]; then \ + export CFLAGS=-mmacosx-version-min=10.9 ; \ + export CXXFLAGS=-mmacosx-version-min=10.9 ; \ + fi ; \ + DL=$$([ `which wget` ] && echo "wget --no-check-certificate" || echo "curl -LO") ; \ + cd /tmp ; \ + $$DL https://zlib.net/zlib-1.2.11.tar.gz && tar xzf zlib-1.2.11.tar.gz && cd zlib-1.2.11 ; \ + $(if $(MINGW), mingw32-make -f win32/Makefile.gcc, ./configure --static && make) && cp libz.a $(CURDIR)/.lib && cp zconf.h zlib.h $(CURDIR)/.include ; \ + cd .. ; \ + $$DL https://github.com/01org/tbb/archive/2017_U8.tar.gz && tar xzf 2017_U8.tar.gz && cd tbb-2017_U8; \ + $(if $(MINGW), mingw32-make compiler=gcc arch=ia64 runtime=mingw, make) extra_inc=big_iron.inc -j4 \ + && cp -r include/tbb $(CURDIR)/.include && cp build/*_release/*.a $(CURDIR)/.lib + .PHONY: test test: simple-test random-test @@ -512,3 +531,4 @@ clean: rm -f core.* .tmp.head rm -rf *.dSYM rm -rf .perllib.tmp + rm -rf .include .lib diff --git a/NEWS b/NEWS index 4b0cd6276..cff70a19a 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,12 @@ Please report any issues to the Bowtie 2 Github page or using the Sourceforge bu Version Release History ======================= +Version 2.3.3.1 - Oct 05, 2017 + * Fixed an issue causing input files to be skipped when running + multi-threaded alignment + * Fixed an issue causing the first character of a read name to be + dropped while parsing reads split across multiple input files + Version 2.3.3 - Sep 09, 2017 From this release forward prepackaged bowtie2 binaries are now statically linked to the zlib compression library and, the recommended diff --git a/VERSION b/VERSION index 0bee604df..9d714864f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.3.3 +2.3.3.1 diff --git a/bt2_search.cpp b/bt2_search.cpp index 7214f5776..1daa99631 100644 --- a/bt2_search.cpp +++ b/bt2_search.cpp @@ -29,7 +29,11 @@ #include #include #include + +#ifndef _WIN32 #include +#endif + #include "alphabet.h" #include "assert_helpers.h" #include "endian_swap.h" @@ -819,7 +823,7 @@ static void printUsage(ostream& out) { << " --rg add (\"lab:value\") to @RG line of SAM header." << endl << " Note: @RG line only printed when --rg-id is set." << endl << " --omit-sec-seq put '*' in SEQ and QUAL fields for secondary alignments." << endl - << " --sam-noqname-trunc Suppress standard behavior of truncating readname at first whitespace " << endl + << " --sam-no-qname-trunc Suppress standard behavior of truncating readname at first whitespace " << endl << " at the expense of generating non-standard SAM." << endl << " --xeq Use '='/'X', instead of 'M,' to specify matches/mismatches in SAM record." << endl << " --soft-clipped-unmapped-tlen Exclude soft-clipped bases when reporting TLEN" << endl @@ -4353,7 +4357,7 @@ static void multiseedSearchWorker_2p5(void *vp) { return; } - +#ifndef _WIN32 /** * Print friendly-ish message pertaining to failed system call. */ @@ -4524,7 +4528,7 @@ static void thread_monitor(int pid, int orig_threads, EList& tids, EListloaded()) throw 1; multiseed_refs = refs.get(); - sigset_t set; - sigemptyset(&set); - sigaddset(&set, SIGPIPE); - pthread_sigmask(SIG_BLOCK, &set, NULL); +#ifndef _WIN32 + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGPIPE); + pthread_sigmask(SIG_BLOCK, &set, NULL); +#endif EList tids; #ifdef WITH_TBB //tbb::task_group tbb_grp; @@ -4614,12 +4620,14 @@ static void multiseedSearch( { Timer _t(cerr, "Multiseed full-index search: ", timing); +#ifndef _WIN32 int pid = 0; if(thread_stealing) { pid = getpid(); write_pid(thread_stealing_dir.c_str(), pid); thread_counter = 0; } +#endif for(int i = 0; i < nthreads; i++) { #ifdef WITH_TBB @@ -4644,10 +4652,12 @@ static void multiseedSearch( #endif } +#ifndef _WIN32 if(thread_stealing) { int orig_threads = nthreads; thread_monitor(pid, orig_threads, tids, threads); } +#endif #ifdef WITH_TBB while(all_threads_done < nthreads) { @@ -4659,9 +4669,11 @@ static void multiseedSearch( } #endif +#ifndef _WIN32 if(thread_stealing) { del_pid(thread_stealing_dir.c_str(), pid); } +#endif } if(!metricsPerRead && (metricsOfb != NULL || metricsStderr)) { metrics.reportInterval(metricsOfb, metricsStderr, true, NULL); @@ -5012,7 +5024,9 @@ int bowtie(int argc, const char **argv) { return 1; } +#ifndef _WIN32 thread_stealing = thread_ceiling > nthreads; +#endif if(thread_stealing && thread_stealing_dir.empty()) { cerr << "When --thread-ceiling is specified, must also specify --thread-piddir" << endl; printUsage(cerr); diff --git a/doc/manual.html b/doc/manual.html index 77d2adb84..7c9d65ba7 100644 --- a/doc/manual.html +++ b/doc/manual.html @@ -1058,7 +1058,7 @@

SAM output

Each subsequent line describes an alignment or, if the read failed to align, a read. Each line is a collection of at least 12 fields separated by tabs; from left to right, the fields are:

  1. Name of read that aligned.

    -

    Note that the SAM specification disallows whitespace in the read name. If the read name contains any whitespace characters, Bowtie 2 will truncate the name at the first whitespace character. This is similar to the behavior of other tools. The standard behavior of truncating at the first whitespace can be suppressed with --sam-noqname-trunc at the expense of generating non-standard SAM.

  2. +

    Note that the SAM specification disallows whitespace in the read name. If the read name contains any whitespace characters, Bowtie 2 will truncate the name at the first whitespace character. This is similar to the behavior of other tools. The standard behavior of truncating at the first whitespace can be suppressed with --sam-no-qname-trunc at the expense of generating non-standard SAM.

  3. Sum of all applicable flags. Flags relevant to Bowtie are:

    diff --git a/doc/website/manual.ssi b/doc/website/manual.ssi index d9c222df5..37c745d0c 100644 --- a/doc/website/manual.ssi +++ b/doc/website/manual.ssi @@ -1058,7 +1058,7 @@ Seed 4 rc: TTATGCATGA

    Each subsequent line describes an alignment or, if the read failed to align, a read. Each line is a collection of at least 12 fields separated by tabs; from left to right, the fields are:

    1. Name of read that aligned.

      -

      Note that the SAM specification disallows whitespace in the read name. If the read name contains any whitespace characters, Bowtie 2 will truncate the name at the first whitespace character. This is similar to the behavior of other tools. The standard behavior of truncating at the first whitespace can be suppressed with --sam-noqname-trunc at the expense of generating non-standard SAM.

    2. +

      Note that the SAM specification disallows whitespace in the read name. If the read name contains any whitespace characters, Bowtie 2 will truncate the name at the first whitespace character. This is similar to the behavior of other tools. The standard behavior of truncating at the first whitespace can be suppressed with --sam-no-qname-trunc at the expense of generating non-standard SAM.

    3. Sum of all applicable flags. Flags relevant to Bowtie are:

    diff --git a/doc/website/old_news.ssi b/doc/website/old_news.ssi index 7df0a310c..791e80425 100644 --- a/doc/website/old_news.ssi +++ b/doc/website/old_news.ssi @@ -1,3 +1,52 @@ +

    Bowtie2 developers note

    +

    As of Nov 2015 we had to fix the bowtie2 github repo and relabel the entire history. Developers and contributors should re-clone the bowtie2 github repo from this current state.

    +

    Version 2.2.9 - Apr 22, 2016

    +
      +
    • Fixed the multiple threads issue for the bowtie2-build.
    • +
    • Fixed a TBB related build issue impacting TBB v4.4.
    • +
    +

    Version 2.2.8 - Mar 10, 2016

    +
      +
    • Various website updates.
    • +
    • Fixed the bowtie2-build issue that made TBB compilation fail.
    • +
    • Fixed the static build for Win32 platform.
    • +
    +

    Version 2.2.7 - Feb 10, 2016

    +
      +
    • Added a parallel index build option: bowtie2-build --threads <# threads>.
    • +
    • Fixed an issue whereby IUPAC codes (other than A/C/G/T/N) in reads were converted to As. Now all non-A/C/G/T characters in reads become Ns.
    • +
    • Fixed some compilation issues, including for the Intel C++ Compiler.
    • +
    • Removed debugging code that could impede performance for many alignment threads.
    • +
    • Fixed a few typos in documentation.
    • +
    +

    Version 2.2.6 - Jul 22, 2015

    +
      +
    • Switched to a stable sort to avoid some potential reproducibility confusions.
    • +
    • Added 'install' target for *nix platforms.
    • +
    • Added the Intel TBB option which provides in most situations a better performance output. TBB is not present by default in the current build but can be added by compiling the source code with WITH_TBB=1 option.
    • +
    • Fixed a bug that caused seed lenght to be dependent of the -L and -N parameters order.
    • +
    • Fixed a bug that caused --local followed by -N to reset seed lenght to 22 which is actually the default value for global.
    • +
    • Enable compilation on FreeBDS and clang, although gmake port is still required.
    • +
    • Fixed an issue that made bowtie2 compilation process to fail on Snow Leopard.
    • +
    + +

    Version 2.2.5 - Mar 9, 2015

    +
      +
    • Fixed some situations where incorrectly we could detect a Mavericks platform.
    • +
    • Fixed some manual issues including some HTML bad formating.
    • +
    • Make sure the wrapper correctly identifies the platform under OSX.
    • +
    • Fixed --rg/--rg-id options where included spaces were incorrectly treated.
    • +
    • Various documentation fixes added by contributors.
    • +
    • Fixed the incorrect behavior where parameter file names may contain spaces.
    • +
    • Fixed bugs related with the presence of spaces in the path where bowtie binaries are stored.
    • +
    • Improved exception handling for missformated quality values.
    • +
    • Improved redundancy checks by correctly account for soft clipping.
    • +
    + +

    Lighter released

    +
      +
    • Lighter is an extremely fast and memory-efficient program for correcting sequencing errors in DNA sequencing data. For details on how error correction can help improve the speed and accuracy of downstream analysis tools, see the paper in Genome Biology. Source and software available at GitHub
    • . +

    Version 2.2.4 - Oct 22, 2014

    • Fixed a Mavericks OSX specific bug caused by some linkage ambiguities.
    • diff --git a/doc/website/recent_news.ssi b/doc/website/recent_news.ssi index ed82ee726..9a41e30e5 100644 --- a/doc/website/recent_news.ssi +++ b/doc/website/recent_news.ssi @@ -1,3 +1,9 @@ +

      Version 2.3.3.1 - October 05, 2017

      +
        +
      • Fixed an issue causing input files to be skipped when running multi-threaded alignment
      • +
      • Fixed an issue causing the first character of a read name to be dropped while parsing reads split across multiple input files
      • +
      +

      Version 2.3.3 - September 06, 2017

      From this release forward prepackaged bowtie2 binaries are now statically linked to the zlib compression library and, the recommended threading library, TBB. Users who rely on prepackaged builds are no longer required to have these packages pre-installed. As a result of the aforementioned changes legacy packages have been discontinued.

        @@ -21,6 +27,7 @@
      • Fixed compilation issues caused by gzbuffer function when compiling with zlib v1.2.3.5 and earlier. Users compiling against these libraries will use the zlib default buffer size of 8Kb when decompressing read files.
      • Fixed issue that would cause Bowtie 2 hang when aligning FASTA inputs with more than one thread
      +

      Version 2.3.1 - Mar 03, 2017

      Please note that as of this release Bowtie 2 now has dependencies on zlib and readline libraries. Make sure that all dependencies are met before attempting to build from source.

        @@ -31,6 +38,7 @@
      • Fixed a bug whereby combining -—un-conc with -k or -a would cause bowtie2 to print duplicate reads in one or both of the --un-conc* output files, causing the ends to be misaligned.
      • The default --score-min for --local mode is now 'G,20,8'. That was the stated default in the documentation for a while, but the actual default was 'G,0,10' for many versions. Now the default matches the documentation and, we find, yields more accurate alignments than 'G,0,10'
      +

      Version 2.3.0 - Dec 13, 2016

      This is a major release with some larger and many smaller changes. These notes emphasize the large changes. See commit history for details.

        @@ -42,52 +50,3 @@
      • Now detects and reports inconsistencies between --score-min and --ma
      • Changed default for --bmaxdivn to yield better memory footprint and running time when building an index with many threads
      -

      Bowtie2 developers note

      -

      As of Nov 2015 we had to fix the bowtie2 github repo and relabel the entire history. Developers and contributors should re-clone the bowtie2 github repo from this current state.

      -

      Version 2.2.9 - Apr 22, 2016

      -
        -
      • Fixed the multiple threads issue for the bowtie2-build.
      • -
      • Fixed a TBB related build issue impacting TBB v4.4.
      • -
      -

      Version 2.2.8 - Mar 10, 2016

      -
        -
      • Various website updates.
      • -
      • Fixed the bowtie2-build issue that made TBB compilation fail.
      • -
      • Fixed the static build for Win32 platform.
      • -
      -

      Version 2.2.7 - Feb 10, 2016

      -
        -
      • Added a parallel index build option: bowtie2-build --threads <# threads>.
      • -
      • Fixed an issue whereby IUPAC codes (other than A/C/G/T/N) in reads were converted to As. Now all non-A/C/G/T characters in reads become Ns.
      • -
      • Fixed some compilation issues, including for the Intel C++ Compiler.
      • -
      • Removed debugging code that could impede performance for many alignment threads.
      • -
      • Fixed a few typos in documentation.
      • -
      -

      Version 2.2.6 - Jul 22, 2015

      -
        -
      • Switched to a stable sort to avoid some potential reproducibility confusions.
      • -
      • Added 'install' target for *nix platforms.
      • -
      • Added the Intel TBB option which provides in most situations a better performance output. TBB is not present by default in the current build but can be added by compiling the source code with WITH_TBB=1 option.
      • -
      • Fixed a bug that caused seed lenght to be dependent of the -L and -N parameters order.
      • -
      • Fixed a bug that caused --local followed by -N to reset seed lenght to 22 which is actually the default value for global.
      • -
      • Enable compilation on FreeBDS and clang, although gmake port is still required.
      • -
      • Fixed an issue that made bowtie2 compilation process to fail on Snow Leopard.
      • -
      - -

      Version 2.2.5 - Mar 9, 2015

      -
        -
      • Fixed some situations where incorrectly we could detect a Mavericks platform.
      • -
      • Fixed some manual issues including some HTML bad formating.
      • -
      • Make sure the wrapper correctly identifies the platform under OSX.
      • -
      • Fixed --rg/--rg-id options where included spaces were incorrectly treated.
      • -
      • Various documentation fixes added by contributors.
      • -
      • Fixed the incorrect behavior where parameter file names may contain spaces.
      • -
      • Fixed bugs related with the presence of spaces in the path where bowtie binaries are stored.
      • -
      • Improved exception handling for missformated quality values.
      • -
      • Improved redundancy checks by correctly account for soft clipping.
      • -
      - -

      Lighter released

      -
        -
      • Lighter is an extremely fast and memory-efficient program for correcting sequencing errors in DNA sequencing data. For details on how error correction can help improve the speed and accuracy of downstream analysis tools, see the paper in Genome Biology. Source and software available at GitHub
      • . -
      diff --git a/doc/website/rhsidebar.ssi b/doc/website/rhsidebar.ssi index a490f3e94..3a74fdcb8 100644 --- a/doc/website/rhsidebar.ssi +++ b/doc/website/rhsidebar.ssi @@ -18,10 +18,10 @@
    diff --git a/pat.cpp b/pat.cpp index 16d675414..b86a6be8b 100644 --- a/pat.cpp +++ b/pat.cpp @@ -401,11 +401,11 @@ pair CFilePatternSource::nextBatchImpl( bool batch_a) { bool done = false; - int nread = 0; + unsigned nread = 0; pt.setReadId(readCnt_); while(true) { // loop that moves on to next file when needed do { - pair ret = nextBatchFromFile(pt, batch_a); + pair ret = nextBatchFromFile(pt, batch_a, nread); done = ret.first; nread = ret.second; } while(!done && nread == 0); // not sure why this would happen @@ -413,9 +413,10 @@ pair CFilePatternSource::nextBatchImpl( open(); resetForNextFile(); // reset state to handle a fresh file filecur_++; - if(nread == 0) { + if(nread == 0 || (nread < pt.max_buf_)) { continue; } + done = false; } break; } @@ -678,7 +679,7 @@ bool VectorPatternSource::parse(Read& ra, Read& rb, TReadId rdid) const { */ pair FastaPatternSource::nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a) + bool batch_a, unsigned readi) { int c; EList& readbuf = batch_a ? pt.bufa_ : pt.bufb_; @@ -697,7 +698,6 @@ pair FastaPatternSource::nextBatchFromFile( first_ = false; } bool done = false; - size_t readi = 0; // Read until we run out of input or until we've filled the buffer for(; readi < pt.max_buf_ && !done; readi++) { Read::TBuf& buf = readbuf[readi].readOrigBuf; @@ -803,11 +803,10 @@ bool FastaPatternSource::parse(Read& r, Read& rb, TReadId rdid) const { */ pair FastaContinuousPatternSource::nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a) + bool batch_a, unsigned readi) { int c = -1; EList& readbuf = batch_a ? pt.bufa_ : pt.bufb_; - size_t readi = 0; while(readi < pt.max_buf_) { c = getc_wrapper(); if(c < 0) { @@ -847,18 +846,18 @@ pair FastaContinuousPatternSource::nextBatchFromFile( } if(eat_ > 0) { eat_--; - // Try to keep readCnt_ aligned with the offset + // Try to keep cur_ aligned with the offset // into the reference; that lets us see where // the sampling gaps are by looking at the read // name if(!beginning_) { - readCnt_++; + cur_++; } continue; } // install name readbuf[readi].readOrigBuf = name_prefix_buf_; - itoa10(readCnt_ - subReadCnt_, name_int_buf_); + itoa10(cur_ - last_, name_int_buf_); readbuf[readi].readOrigBuf.append(name_int_buf_); readbuf[readi].readOrigBuf.append('\t'); // install sequence @@ -872,7 +871,7 @@ pair FastaContinuousPatternSource::nextBatchFromFile( readbuf[readi].readOrigBuf.append(c); } eat_ = freq_-1; - readCnt_++; + cur_++; beginning_ = false; readi++; } @@ -947,7 +946,7 @@ bool FastaContinuousPatternSource::parse( */ pair FastqPatternSource::nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a) + bool batch_a, unsigned readi) { int c = -1; EList* readbuf = batch_a ? &pt.bufa_ : &pt.bufb_; @@ -964,15 +963,13 @@ pair FastqPatternSource::nextBatchFromFile( throw 1; } first_ = false; - (*readbuf)[0].readOrigBuf.append('@'); + (*readbuf)[readi].readOrigBuf.append('@'); } bool done = false, aborted = false; - size_t readi = 0; // Read until we run out of input or until we've filled the buffer while (readi < pt.max_buf_ && !done) { Read::TBuf& buf = (*readbuf)[readi].readOrigBuf; - assert(readi == 0 || buf.empty()); int newlines = 4; while(newlines) { c = getc_wrapper(); @@ -1118,7 +1115,7 @@ bool FastqPatternSource::parse(Read &r, Read& rb, TReadId rdid) const { // Set up a default name if one hasn't been set if(r.name.empty()) { char cbuf[20]; - itoa10(static_cast(readCnt_), cbuf); + itoa10(static_cast(rdid), cbuf); r.name.install(cbuf); } r.parsed = true; @@ -1133,14 +1130,13 @@ bool FastqPatternSource::parse(Read &r, Read& rb, TReadId rdid) const { */ pair TabbedPatternSource::nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a) + bool batch_a, unsigned readi) { int c = getc_wrapper(); while(c >= 0 && (c == '\n' || c == '\r')) { c = getc_wrapper(); } EList& readbuf = batch_a ? pt.bufa_ : pt.bufb_; - size_t readi = 0; // Read until we run out of input or until we've filled the buffer for(; readi < pt.max_buf_ && c >= 0; readi++) { readbuf[readi].readOrigBuf.clear(); @@ -1267,14 +1263,14 @@ bool TabbedPatternSource::parse(Read& ra, Read& rb, TReadId rdid) const { */ pair RawPatternSource::nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a) + bool batch_a, + unsigned readi) { int c = getc_wrapper(); while(c >= 0 && (c == '\n' || c == '\r')) { c = getc_wrapper(); } EList& readbuf = batch_a ? pt.bufa_ : pt.bufb_; - size_t readi = 0; // Read until we run out of input or until we've filled the buffer for(; readi < pt.max_buf_ && c >= 0; readi++) { readbuf[readi].readOrigBuf.clear(); diff --git a/pat.h b/pat.h index 57726a396..ce312785a 100644 --- a/pat.h +++ b/pat.h @@ -382,7 +382,8 @@ class CFilePatternSource : public PatternSource { */ virtual std::pair nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a) = 0; + bool batch_a, + unsigned read_idx) = 0; /** * Reset state to handle a fresh file @@ -471,7 +472,8 @@ class FastaPatternSource : public CFilePatternSource { */ virtual std::pair nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a); + bool batch_a, + unsigned read_idx); /** * Scan to the next FASTA record (starting with >) and return the first @@ -523,7 +525,8 @@ class TabbedPatternSource : public CFilePatternSource { */ virtual std::pair nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a); + bool batch_a, + unsigned read_idx); bool secondName_; // true if --tab6, false if --tab5 }; @@ -568,7 +571,8 @@ class QseqPatternSource : public CFilePatternSource { */ virtual std::pair nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a); + bool batch_a, + unsigned read_idx); EList qualToks_; }; @@ -588,7 +592,8 @@ class FastaContinuousPatternSource : public CFilePatternSource { eat_(length_-1), beginning_(true), bufCur_(0), - subReadCnt_(0llu) + cur_(0llu), + last_(0llu) { assert_gt(freq_, 0); resetForNextFile(); @@ -612,7 +617,8 @@ class FastaContinuousPatternSource : public CFilePatternSource { */ virtual std::pair nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a); + bool batch_a, + unsigned read_idx); /** * Reset state to be read for the next file. @@ -622,7 +628,7 @@ class FastaContinuousPatternSource : public CFilePatternSource { name_prefix_buf_.clear(); beginning_ = true; bufCur_ = 0; - subReadCnt_ = readCnt_; + last_ = cur_; } private: @@ -638,7 +644,8 @@ class FastaContinuousPatternSource : public CFilePatternSource { char name_int_buf_[20]; /// for composing offsets for names size_t bufCur_; /// buffer cursor; points to where we should /// insert the next character - uint64_t subReadCnt_;/// number to subtract from readCnt_ to get + uint64_t cur_; + uint64_t last_; /// number to subtract from readCnt_ to get /// the pat id to output (so it resets to 0 for /// each new sequence) }; @@ -675,7 +682,8 @@ class FastqPatternSource : public CFilePatternSource { */ virtual std::pair nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a); + bool batch_a, + unsigned read_idx); /** * Reset state to be ready for the next file. @@ -719,7 +727,8 @@ class RawPatternSource : public CFilePatternSource { */ virtual std::pair nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a); + bool batch_a, + unsigned read_idx); /** * Reset state to be ready for the next file. diff --git a/pthreadGC2.dll b/pthreadGC2.dll deleted file mode 100644 index 8b9116c78..000000000 Binary files a/pthreadGC2.dll and /dev/null differ diff --git a/read_qseq.cpp b/read_qseq.cpp index f428d65be..627bfe4ed 100644 --- a/read_qseq.cpp +++ b/read_qseq.cpp @@ -53,14 +53,13 @@ static int parseName( */ pair QseqPatternSource::nextBatchFromFile( PerThreadReadBuf& pt, - bool batch_a) + bool batch_a, unsigned readi) { int c = getc_wrapper(); while(c >= 0 && (c == '\n' || c == '\r')) { c = getc_wrapper(); } EList& readbuf = batch_a ? pt.bufa_ : pt.bufb_; - size_t readi = 0; // Read until we run out of input or until we've filled the buffer for(; readi < pt.max_buf_ && c >= 0; readi++) { readbuf[readi].readOrigBuf.clear(); @@ -72,6 +71,9 @@ pair QseqPatternSource::nextBatchFromFile( c = getc_wrapper(); } } + if (c != EOF) { + ungetc_wrapper(c); + } return make_pair(c < 0, readi); } diff --git a/scripts/test/simple_tests.pl b/scripts/test/simple_tests.pl index f1fdde74f..51ef2be5e 100644 --- a/scripts/test/simple_tests.pl +++ b/scripts/test/simple_tests.pl @@ -341,7 +341,7 @@ fastq1 => "\@r0\nAGCATCGATC\r\n+\nIIIIIIIIII\n". "\@r1\nTCAGTTTTTGA\r\n+\nIIIIIIIIIII\n", fastq2 => "\@r0\nTCAGTTTTTGA\n+\nIIIIIIIIIII\n". - "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII", + "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII\n", pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, # Paired-end reads that should align @@ -354,7 +354,7 @@ fastq1 => "\@r0\nAGCATCGATC\r\n+\nIIIIIIIIII\n". "\@r1\nTCAGTTTTTGA\n+\nIIIIIIIIIII\n", fastq2 => "\@r0\nTCAGTTTTTGA\n+\nIIIIIIIIIII\n". - "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII", + "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII\n", pairhits => [ { }, { "0,8" => 1 } ] }, # Paired-end reads that should align @@ -367,7 +367,7 @@ fastq1 => "\@r0\nAGCATCGATC\r\n+\nIIIIIIIIII\n". "\@r1\nTCAGTTTTTGA\r\n+\nIIIIIIIIIII\n", fastq2 => "\@r0\nTCAGTTTTTGA\n+\nIIIIIIIIIII\n". - "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII", + "\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII\n", pairhits => [ { "0,8" => 1 }, { } ] }, # Paired-end reads with left end entirely trimmed away @@ -4327,16 +4327,18 @@ ($$$$$$$$$) $fq1, $fq2) = @_; - open(FQ1, defined($compressed) ? "| gzip -c >$fq1.gz" : ">$fq1") || die "Could not open '$fq1' for writing"; - open(FQ2, defined($compressed) ? "| gzip -c >$fq2.gz" : ">$fq2") || die "Could not open '$fq2' for writing"; my $pe = (defined($mate1s) && $mate1s ne ""); if($pe) { for (0..scalar(@$mate1s)-1) { + open(FQ1, defined($compressed) ? "| gzip -c >$fq1->[$_]" : ">$fq1->[$_]") || die "Could not open '$fq1->[$_]' for writing"; + open(FQ2, defined($compressed) ? "| gzip -c >$fq2->[$_]" : ">$fq2->[$_]") || die "Could not open '$fq2->[$_]' for writing"; + my $m1 = $mate1s->[$_]; my $m2 = $mate2s->[$_]; my $q1 = $qual1s->[$_]; my $q2 = $qual2s->[$_]; my $nm = $names->[$_]; + defined($m1) || die; defined($m2) || die; $q1 = $q1 || ("I" x length($m1)); @@ -4344,20 +4346,24 @@ ($$$$$$$$$) $nm = $nm || "r$_"; print FQ1 "\@$nm/1\n$m1\n+\n$q1\n"; print FQ2 "\@$nm/2\n$m2\n+\n$q2\n"; + close(FQ1); + close(FQ2); } } else { for (0..scalar(@$reads)-1) { + open(FQ1, defined($compressed) ? "| gzip -c >$fq1->[$_]" : ">$fq1->[$_]") || die "Could not open '$fq1->[$_]' for writing"; + my $read = $reads->[$_]; defined($read) || die; my $qual = $quals->[$_]; my $nm = $names->[$_]; + $qual = $qual || ("I" x length($read)); $nm = $nm || "r$_"; print FQ1 "\@$nm\n$read\n+\n$qual\n"; + close(FQ1); } } - close(FQ1); - close(FQ2); } ## @@ -4476,6 +4482,21 @@ ($$$$$$$$$$$$$$$$$$$$$$) } } } else { + $mate1arg = []; + $mate2arg = []; + my $ext = $compressed ? ".fq.gz" : ".fq"; + my $base_filename = ".simple_tests"; + + for (0 .. scalar($pe ? @$mate1s : @$reads) - 1) { + my $f = $base_filename . ".1" . ('a' .. 'z')[$_] . $ext; + push @$mate1arg, $f; + + if ($pe) { + $f = $base_filename . ".2" . ('a' .. 'z')[$_] . $ext; + push @$mate2arg, $f; + } + } + writeReads( $reads, $quals, @@ -4484,10 +4505,9 @@ ($$$$$$$$$$$$$$$$$$$$$$) $mate2s, $qual2s, $names, - ".simple_tests.1.fq", - ".simple_tests.2.fq"); - $mate1arg = defined($compressed) ? ".simple_tests.1.fq.gz" : ".simple_tests.1.fq"; - $mate2arg = defined($compressed) ? ".simple_tests.2.fq.gz" : ".simple_tests.2.fq"; + $mate1arg, + $mate2arg); + $formatarg = "-q"; $readarg = $mate1arg; } @@ -4495,12 +4515,23 @@ ($$$$$$$$$$$$$$$$$$$$$$) my $debug_arg = ""; $debug_arg = "--debug" if $debug_mode; my $cmd; + my $batch_size = int(rand(16) + 1); if($pe) { # Paired-end case - $cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args -x .simple_tests.tmp $formatarg -1 $mate1arg -2 $mate2arg"; + if (ref $mate1arg eq "ARRAY") { + $cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args --reads-per-batch $batch_size -x .simple_tests.tmp $formatarg -1 " . join(",", @$mate1arg) . " -2 " . join(",", @$mate2arg); + } + else { + $cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args --reads-per-batch $batch_size -x .simple_tests.tmp $formatarg -1 $mate1arg -2 $mate2arg"; + } } else { # Unpaired case - $cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args -x .simple_tests.tmp $formatarg $readarg"; + if (ref $readarg eq "ARRAY") { + $cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args --reads-per-batch $batch_size -x .simple_tests.tmp $formatarg " . join(",", @$readarg); + } + else { + $cmd = "$bowtie2 $debug_arg @ARGV $idx_type $args --reads-per-batch $batch_size -x .simple_tests.tmp $formatarg $readarg"; + } } print "$cmd\n"; open(BT, "$cmd |") || die "Could not open pipe '$cmd |'";
    - Bowtie2 2.3.3 + Bowtie2 2.3.3.1 - 09/06/17  + 10/05/17