diff --git a/.github/workflows/bowtie2.yml b/.github/workflows/bowtie2.yml
index da7de103..a64a8a38 100644
--- a/.github/workflows/bowtie2.yml
+++ b/.github/workflows/bowtie2.yml
@@ -52,3 +52,4 @@ jobs:
         run: |
           make allall
           make simple-test
+          make random-test
diff --git a/BOWTIE2_VERSION b/BOWTIE2_VERSION
index ab6d2789..26f8b8bc 100644
--- a/BOWTIE2_VERSION
+++ b/BOWTIE2_VERSION
@@ -1 +1 @@
-2.4.4
\ No newline at end of file
+2.4.5
\ No newline at end of file
diff --git a/MANUAL b/MANUAL
index b39e9475..17320660 100644
--- a/MANUAL
+++ b/MANUAL
@@ -1924,6 +1924,10 @@ format:
 Fields are separated by tabs. Colorspace is always set to 0 for Bowtie
 2.
 
+    -o/--output <filename>
+
+Save output to user-specified filename (default: stdout)
+
     -v/--verbose
 
 Print verbose output (for debugging).
diff --git a/MANUAL.markdown b/MANUAL.markdown
index ab3ae7c6..fcb5cf69 100644
--- a/MANUAL.markdown
+++ b/MANUAL.markdown
@@ -2584,6 +2584,14 @@ names and lengths of the input sequences.  The summary has this format:
 
 Fields are separated by tabs.  Colorspace is always set to 0 for Bowtie 2.
 
+</td></tr><tr><td id="bowtie2-inspect-options-o">
+
+    -o/--output <filename>
+
+</td><td>
+
+Save output to user-specified filename (default: stdout)
+
 </td></tr><tr><td>
 
     -v/--verbose
@@ -2913,6 +2921,7 @@ warnings due to the case insensitive nature of markdown URLs -->
 [`-m`]:                                               #bowtie2-options-m
 [`-n`/`--names`]:                                     #bowtie2-inspect-options-n
 [`-o`/`--offrate`]:                                   #bowtie2-options-o
+[`-o`/`--output`]:                                    #bowtie2-inspect-options-o
 [`-o`]:                                               #bowtie2-options-o
 [`-p`/`--packed`]:                                    #bowtie2-build-options-p
 [`-p`/`--threads`]:                                   #bowtie2-options-p
diff --git a/Makefile b/Makefile
index 6c8fccc1..9c01b729 100644
--- a/Makefile
+++ b/Makefile
@@ -43,6 +43,9 @@ ifneq (,$(findstring MINGW,$(shell uname)))
   WINDOWS := 1
   MINGW := 1
   # POSIX memory-mapped files not currently supported on Windows
+endif
+
+ifeq (1, $(WINDOWS))
   BOWTIE_MM :=
   BOWTIE_SHARED_MEM :=
 endif
@@ -252,7 +255,7 @@ GENERAL_LIST := $(wildcard scripts/*.sh) \
 
 ifeq (1,$(WINDOWS))
   BOWTIE2_BIN_LIST := $(BOWTIE2_BIN_LIST) bowtie2.bat bowtie2-build.bat bowtie2-inspect.bat
-  CXXFLAGS += -static-libgcc -static-libstdc++
+  CXXFLAGS += -static-libgcc -static-libstdc++ -static
 endif
 
 # This is helpful on Windows under MinGW/MSYS, where Make might go for
diff --git a/NEWS b/NEWS
index 6c4b123c..6bc676ba 100644
--- a/NEWS
+++ b/NEWS
@@ -4,7 +4,7 @@ Bowtie 2 NEWS
 Bowtie 2 is available for download from the project website,
 http://bowtie-bio.sf.net/bowtie2 and on Github,
 https://github.com/BenLangmead/bowtie2/releases.  2.0.0-beta1 is
-the first version released to the public and 2.4.4 is the latest
+the first version released to the public and 2.4.5 is the latest
 version.  Bowtie 2 is licensed under the GPLv3 license.  See `LICENSE'
 file for details.
 
@@ -19,546 +19,612 @@ Please report any issues to the Bowtie 2 Github page or using the Sourceforge bu
 Version Release History
 =======================
 
-Version 2.4.4 - May 23, 2021
+## Version 2.4.5 - Jan 15, 2022 ##
 
-    * Fixed an issue that would sometimes cause deadlocks in `bowtie2`
+### bowtie2 ###
+
+  * Fixed issues with `bowtie2` BAM parser that would cause `bowtie2` to
+    crash when processing input that was encoded with tools other than
+    samtools e.g. Picard.
+  * Fixed an issue causing `bowtie2` to drop certain optional fields when
+    when aligning BAM reads with the `--preserve-tags` option.
+  * Fixed an issue causing `bowtie2` to produce mangled SAM output when
+    specifying `--sam-append-comment` together with the `--passthrough`
+    option.
+  * Appended `GO:query` to SAM `@HD` entry to indicate that reads are
+    grouped by query name, bump SAM version to 1.5 to indicate support
+    for this change.
+
+### bowtie2-build ###
+
+  * Implemented thread pool to address performance regressions
+    introduced during the switch to C++11 threads.
+  * Fixed an issue causing masked-sequence metadata to be omitted
+    from index. This issue would subsequently result in sequence
+    data, `@SQ`, being left out from alignment SAM header.
+  * Included `.tmp` extension to index files currenlty being built.
+    The extension is removed only upon successful build. This change
+    seeks to address the assumption that `bowtie2-build` ran successfully
+    without building the reverse indexes.
+  * Fixed an issue causing `bowtie2-build` to sometimes incorrectly
+    calculate input size. This issue would result in the wrong index type
+    being chosen and only happened with GZip compressed files.
+
+### bowtie2-inspect ###
+
+  * Added a new `-o/--output` option to save the output of
+    `bowtie2-inspect` to a file instead of being dumped to standard
+    output.
+
+
+## Version 2.4.4 - May 23, 2021 ##
+
+  * Fixed an issue that would sometimes cause deadlocks in `bowtie2`
       when running multithreaded
 
-Version 2.4.3 - May 13, 2021
-
-    * Replaced TBB concurrency with C++ threads
-    * Added native support for processing Zstd-compressed read files
-      to bowtie2
-    * Added native support for processing Zstd-compressed
-      reference-genome files to bowtie2-build
-    * Fixed an issue causing `bowtie2` to report incorrect alignments
-      on big-endian machines
-    * Fixed an issue causing `bowtie2` to incorrectly process BAM
-      files on big-endian machines
-    * Fixed an issue causing Bowtie2 to set an incorrect MAPQ when AS and
-      XS are the maximum for read length
-    * Add support for building on Apple M1 processors
-
-Version 2.4.2 - Oct 5, 2020
-
-    * Fixed an issue that would cause the `bowtie2` wrapper script to throw
-      an error when using wrapper-specific arguments.
-    * Added new `--sam-append-comment` flag that appends comment from FASTA/Q
-      read to corresponding SAM record.
-    * Fixed an issue that would cause qupto, `-u`, to overflow when there
-      are >= 2^32 query sequences (PR #312).
-    * Fixed an issue that would cause bowtie2-build script to incorrectly
-      process reference files.
-
-Version 2.4.1 - Feb 28, 2020
-
-    * Fixed an issue that would cause the `bowtie2` wrapper script to incorrectly
-      process certain arguments.
-
-Version 2.4.0 - Feb 25, 2020
-
-    * Fixed an issue in `-b <bam>` input mode where one might prematurely close the
-      read file pointer causing "Bad file descriptor" in other threads
-    * Fixed an issue that could cause `bowtie2` to crash in `--no-1mm-upfront` mode
-    * Modified `bowtie2-build` to better handle of flags and positional parameters
-    * Migrated all `python` scripts to `python3`
-    * Added support for wildcards in input files to `bowtie2`, e.g. `bowtie2 -x
-      index -q *.fq` as opposed to `bowtie2 -x index -q 1.fq,2.fq,3.fq...`
-    * Fixed an issue causing <code>bowtie2</code> to incorrectly process read names
-      with slash mates plus extra characters (see
-      [#265](https://github.com/BenLangmead/bowtie2/issues/265))
-    * Clarified support for overriding presets with more specific options e.g
-      `bowtie2 -x index --local --very-fast-local --L22 -q reads.fq` will set the
-      seed length to 22, overriding the 25 set by `--very-fast-local`
-    * Modified SAM output for `-k`/`-a` so that supplementary alignments get
-      assigned a MAPQ of 255
-    * Fixed an issue that would sometimes cause `bowtie2-build` to not generate
-      reverse index files
-    * Added preliminary support for ppc64le architectures with the help of SIMDE
-      project (see [#271](https://github.com/BenLangmead/bowtie2/issues/271))
-    * Fixed an issue causing `bowtie2` to incorrectly calculate the MAPQ when `--mp`
-      was specified in combination with `--ignore-quals`
-
-Version 2.3.5.1 - Apr 16, 2019
-
-    * Added official support for BAM input files
-    * Added official support for CMake build system
-    * Added changes to Makefile for creating Reproducible builds (via [210](https://github.com/BenLangmead/bowtie2/pull/210))
-    * Fix an issue whereby building on aarch64 would require patching sed commands (via [#243](https://github.com/BenLangmead/bowtie2/pull/243))
-    * Fix an issue whereby `bowtie2` would incorrectly throw an error while processing `--interleaved` input
-
-Version 2.3.5 - Mar 16, 2019
-
-    * Added support for obtaining input reads directly from the Sequence Read Archive, via NCBI's [NGS language bindings](https://github.com/ncbi/ngs).  This is activated via the [`--sra-acc`](manual.shtml#bowtie2-options-sra-acc) option.  This implementation is based on Daehwan Kim's in [HISAT2](https://ccb.jhu.edu/software/hisat2).  Supports both unpaired and paired-end inputs.
-    * Bowtie 2 now compiles on ARM architectures (via [#216](https://github.com/BenLangmead/bowtie2/pull/216))
-    * `--interleaved` can now be combined with FASTA inputs (worked only with FASTQ before)
-    * Fixed issue whereby large indexes were not successfully found in the `$BOWTIE2_INDEXES` directory
-    * Fixed input from FIFOs (e.g. via process substitution) to distinguish gzip-compressed versus uncompressed input
-    * Fixed issue whereby arguments containing `bz2` `lz4` were misinterpretted as files
-    * Fixed several compiler warnings
-    * Fixed issue whereby both ends of a paired-end read could have negative TLEN if they exactly coincided
-    * Fixed issue whereby `bowtie2-build` would hang on end-of-file (via [#228](https://github.com/BenLangmead/bowtie2/pull/228))
-    * Fixed issue whereby wrapper script would sometimes create zombie processes (via [#51](https://github.com/BenLangmead/bowtie2/pull/51))
-    * Fixed issue whereby `bowtie2-build` and `bowtie2-inspect` wrappers would fail on some versions of Python/PyPy
-    * Replaced old, unhelpful `README.md` in the project with a version that includes badges, links and some highlights from the manual
-    * Note: BAM input support and CMake build support both remain experimental, but we expect to finalize them in the next release
-
-Version 2.3.4.3 - Sep 17, 2018
-
-    * Fixed an issue causing `bowtie2-build` and `bowtie2-inspect`
-      to output incomplete help text.
-    * Fixed an issue causing `bowtie2-inspect` to crash.
-    * Fixed an issue preventing `bowtie2` from processing paired and/or
-      unpaired FASTQ reads together with interleaved FASTQ reads.
-
-Version 2.3.4.2 - Aug 7, 2018
-
-    * Fixed issue causing `bowtie2` to fail in `--fast-local` mode.
-    * Fixed issue causing `--soft-clipped-unmapped-tlen` to be a positional argument.
-    * New option `--trim-to N` causes `bowtie2` to trim reads longer
-      than `N` bases to exactly `N` bases.  Can trim from either 3'
-      or 5' end, e.g. `--trim-to 5:30` trims reads to 30 bases,
-      truncating at the 5' end.
-    * Updated "Building from source" manual section with additional
-      instructions on installing TBB.
-    * Several other updates to manual, including new mentions of
-      [Bioconda](http://bioconda.github.io) and
-      [Biocontainers](https://biocontainers.pro).
-    * Fixed an issue preventing `bowtie2` from processing more than
-      one pattern source when running single threaded.
-    * Fixed an issue causing `bowtie2` and `bowtie2-inspect` to crash if
-      the index contains a gap-only segment.
-    * Added experimental BAM input mode `-b`. Works only with unpaired
-      input reads and BAM files that are sorted by read name (`samtools
-      sort -n`). BAM input mode also supports the following options:
-        o `--preserve-sam-tags`: Preserve any optional fields present in BAM record
-        o `--align-paired-reads`: Paired-end mode for BAM files
-    * Added experimental cmake support
+## Version 2.4.3 - May 13, 2021 ##
+
+  * Replaced TBB concurrency with C++ threads
+  * Added native support for processing Zstd-compressed read files
+    to bowtie2
+  * Added native support for processing Zstd-compressed
+    reference-genome files to bowtie2-build
+  * Fixed an issue causing `bowtie2` to report incorrect alignments
+    on big-endian machines
+  * Fixed an issue causing `bowtie2` to incorrectly process BAM
+    files on big-endian machines
+  * Fixed an issue causing Bowtie2 to set an incorrect MAPQ when AS and
+    XS are the maximum for read length
+  * Add support for building on Apple M1 processors
+
+## Version 2.4.2 - Oct 5, 2020 ##
+
+  * Fixed an issue that would cause the `bowtie2` wrapper script to throw
+    an error when using wrapper-specific arguments.
+  * Added new `--sam-append-comment` flag that appends comment from FASTA/Q
+    read to corresponding SAM record.
+  * Fixed an issue that would cause qupto, `-u`, to overflow when there
+    are >= 2^32 query sequences (PR #312).
+  * Fixed an issue that would cause bowtie2-build script to incorrectly
+    process reference files.
+
+## Version 2.4.1 - Feb 28, 2020 ##
+
+  * Fixed an issue that would cause the `bowtie2` wrapper script to incorrectly
+    process certain arguments.
+
+## Version 2.4.0 - Feb 25, 2020 ##
+
+  * Fixed an issue in `-b <bam>` input mode where one might prematurely close the
+    read file pointer causing "Bad file descriptor" in other threads
+  * Fixed an issue that could cause `bowtie2` to crash in `--no-1mm-upfront` mode
+  * Modified `bowtie2-build` to better handle of flags and positional parameters
+  * Migrated all `python` scripts to `python3`
+  * Added support for wildcards in input files to `bowtie2`, e.g. `bowtie2 -x
+    index -q *.fq` as opposed to `bowtie2 -x index -q 1.fq,2.fq,3.fq...`
+  * Fixed an issue causing <code>bowtie2</code> to incorrectly process read names
+    with slash mates plus extra characters (see
+    [#265](https://github.com/BenLangmead/bowtie2/issues/265))
+  * Clarified support for overriding presets with more specific options e.g
+    `bowtie2 -x index --local --very-fast-local --L22 -q reads.fq` will set the
+    seed length to 22, overriding the 25 set by `--very-fast-local`
+  * Modified SAM output for `-k`/`-a` so that supplementary alignments get
+    assigned a MAPQ of 255
+  * Fixed an issue that would sometimes cause `bowtie2-build` to not generate
+    reverse index files
+  * Added preliminary support for ppc64le architectures with the help of SIMDE
+    project (see [#271](https://github.com/BenLangmead/bowtie2/issues/271))
+  * Fixed an issue causing `bowtie2` to incorrectly calculate the MAPQ when `--mp`
+    was specified in combination with `--ignore-quals`
+
+## Version 2.3.5.1 - Apr 16, 2019 ##
+
+  * Added official support for BAM input files
+  * Added official support for CMake build system
+  * Added changes to Makefile for creating Reproducible builds (via [210](https://github.com/BenLangmead/bowtie2/pull/210))
+  * Fix an issue whereby building on aarch64 would require patching sed commands (via [#243](https://github.com/BenLangmead/bowtie2/pull/243))
+  * Fix an issue whereby `bowtie2` would incorrectly throw an error while processing `--interleaved` input
+
+## Version 2.3.5 - Mar 16, 2019 ##
+
+  * Added support for obtaining input reads directly from the Sequence Read Archive, via NCBI's [NGS language bindings](https://github.com/ncbi/ngs).  This is activated via the [`--sra-acc`](manual.shtml#bowtie2-options-sra-acc) option.  This implementation is based on Daehwan Kim's in [HISAT2](https://ccb.jhu.edu/software/hisat2).  Supports both unpaired and paired-end inputs.
+  * Bowtie 2 now compiles on ARM architectures (via [#216](https://github.com/BenLangmead/bowtie2/pull/216))
+  * `--interleaved` can now be combined with FASTA inputs (worked only with FASTQ before)
+  * Fixed issue whereby large indexes were not successfully found in the `$BOWTIE2_INDEXES` directory
+  * Fixed input from FIFOs (e.g. via process substitution) to distinguish gzip-compressed versus uncompressed input
+  * Fixed issue whereby arguments containing `bz2` `lz4` were misinterpretted as files
+  * Fixed several compiler warnings
+  * Fixed issue whereby both ends of a paired-end read could have negative TLEN if they exactly coincided
+  * Fixed issue whereby `bowtie2-build` would hang on end-of-file (via [#228](https://github.com/BenLangmead/bowtie2/pull/228))
+  * Fixed issue whereby wrapper script would sometimes create zombie processes (via [#51](https://github.com/BenLangmead/bowtie2/pull/51))
+  * Fixed issue whereby `bowtie2-build` and `bowtie2-inspect` wrappers would fail on some versions of Python/PyPy
+  * Replaced old, unhelpful `README.md` in the project with a version that includes badges, links and some highlights from the manual
+  * Note: BAM input support and CMake build support both remain experimental, but we expect to finalize them in the next release
+
+## Version 2.3.4.3 - Sep 17, 2018 ##
+
+  * Fixed an issue causing `bowtie2-build` and `bowtie2-inspect`
+    to output incomplete help text.
+  * Fixed an issue causing `bowtie2-inspect` to crash.
+  * Fixed an issue preventing `bowtie2` from processing paired and/or
+    unpaired FASTQ reads together with interleaved FASTQ reads.
+
+## Version 2.3.4.2 - Aug 7, 2018 ##
+
+  * Fixed issue causing `bowtie2` to fail in `--fast-local` mode.
+  * Fixed issue causing `--soft-clipped-unmapped-tlen` to be a positional argument.
+  * New option `--trim-to N` causes `bowtie2` to trim reads longer
+    than `N` bases to exactly `N` bases.  Can trim from either 3'
+    or 5' end, e.g. `--trim-to 5:30` trims reads to 30 bases,
+    truncating at the 5' end.
+  * Updated "Building from source" manual section with additional
+    instructions on installing TBB.
+  * Several other updates to manual, including new mentions of
+    [Bioconda](http://bioconda.github.io) and
+    [Biocontainers](https://biocontainers.pro).
+  * Fixed an issue preventing `bowtie2` from processing more than
+    one pattern source when running single threaded.
+  * Fixed an issue causing `bowtie2` and `bowtie2-inspect` to crash if
+    the index contains a gap-only segment.
+  * Added experimental BAM input mode `-b`. Works only with unpaired
+    input reads and BAM files that are sorted by read name (`samtools
+    sort -n`). BAM input mode also supports the following options:
+      o `--preserve-sam-tags`: Preserve any optional fields present in BAM record
+      o `--align-paired-reads`: Paired-end mode for BAM files
+  * Added experimental cmake support
 
 Thread-scaling paper appears - July 19, 2018
 
-    * Our latest work on Bowtie's core thread scaling capabilities
-      [just appeared Open Access in the journal Bioinformatics](href="https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/bty648/5055585)
+  * Our latest work on Bowtie's core thread scaling capabilities
+    [just appeared Open Access in the journal Bioinformatics](href="https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/bty648/5055585)
 
-Version 2.3.4.1 - Feb 3, 2018
+## Version 2.3.4.1 - Feb 3, 2018 ##
 
-    * Fixed an issue with `--reorder` that caused bowtie2 to crash
-      while reordering SAM output
+  * Fixed an issue with `--reorder` that caused bowtie2 to crash
+    while reordering SAM output
 
-Version 2.3.4 - Dec 29, 2017
+## Version 2.3.4 - Dec 29, 2017 ##
 
-    * Fixed major issue causing corrupt SAM output when using many
-      threads (-p/--threads) on certain systems.
-    * Fixed an issue whereby bowtie2 processes could overwrite each
-      others' named pipes on HPC systems.
-    * Fixed an issue causing bowtie2-build and bowtie2-inspect to
-      return prematurely on Windows.
-    * Fixed issues raised by compiler "sanitizers" that could
-      potentially have caused memory corruption or undefined behavior.
-    * Added the "continuous FASTA" input format (`-F`) for aligning
-      all the k-mers in the sequences of a FASTA file.  Useful for
-      determining mapability of regions of the genome, and similar
-      tasks.
+  * Fixed major issue causing corrupt SAM output when using many
+    threads (-p/--threads) on certain systems.
+  * Fixed an issue whereby bowtie2 processes could overwrite each
+    others' named pipes on HPC systems.
+  * Fixed an issue causing bowtie2-build and bowtie2-inspect to
+    return prematurely on Windows.
+  * Fixed issues raised by compiler "sanitizers" that could
+    potentially have caused memory corruption or undefined behavior.
+  * Added the "continuous FASTA" input format (`-F`) for aligning
+    all the k-mers in the sequences of a FASTA file.  Useful for
+    determining mapability of regions of the genome, and similar
+    tasks.
 
-Version 2.3.3.1 - Oct 05, 2017
+## Version 2.3.3.1 - Oct 05, 2017 ##
 
-    * Fixed an issue causing input files to be skipped when running
-    multi-threaded alignment
-    * Fixed an issue causing the first character of a read name to be
-    dropped while parsing reads split across multiple input files
+  * Fixed an issue causing input files to be skipped when running
+  multi-threaded alignment
+  * Fixed an issue causing the first character of a read name to be
+  dropped while parsing reads split across multiple input files
+
+## Version 2.3.3 - Sep 09, 2017 ##
 
-Version 2.3.3 - Sep 09, 2017
 From this release forward prepackaged bowtie2 binaries are now
 statically linked to the zlib compression library and, the recommended
 threading library, TBB. Users who rely on prepackaged builds are
 no longer required to have these packages pre-installed. As a result
 of the aforementioned changes legacy packages have been discontinued.
 
-    * bowtie2-build now supports gzip-compressed FASTA inputs
-    * New --xeq parameter for bowtie2 disambiguates the 'M' CIGAR
-      flag. When specified, matches are indicated with the '=' operation and
-      mismatches with 'X'
-    * Fixed a possible infinite loop during parallel index building due
-      to the compiler optimizing away a loop condition
-    * Added --soft-clipped-unmapped-tlen parameter for bowtie2 that
-      ignores soft-clipped bases when calculating template length (TLEN)
-    * Added support for multi-line sequences in FASTA read inputs
-    * Expanded explanation of MD:Z field in manual
-    * Fixed a crashing bug when output is redirected to a pipe
-    * Fixed ambiguity in the SEED alignment policy that sometimes caused -N parameter to be ignored
-
-Version 2.3.2 - May 05, 2017
-    * Added support for interleaved paired-end FASTQ inputs
-      (--interleaved)
-    * Now reports MREVERSE SAM flag for unaligned end when only one
-      end of a pair aligns
-    * Fixed issue where first character of some read names was
-      omitted from SAM output when using tabbed input formats
-    * Added --sam-no-qname-trunc option, which causes entire read
-      name, including spaces, to be written to SAM output.  This
-      violates SAM specification, but can be useful in applications
-      that immediately postprocess the SAM.
-    * Fixed compilation error caused by pointer comparison issue
-      in aligner_result.cpp
-    * Removed termcap and readline dependencies introduced in v2.3.1
-    * Fixed compilation issues caused by gzbuffer function when
-      compiling with zlib v1.2.3.5 and earlier. Users compiling against
-      these libraries will use the zlib default buffer size of 8Kb
-      when decompressing read files.
-    * Fixed issue that would cause Bowtie 2 hang when aligning FASTA
-      inputs with more than one thread
-
-Version 2.3.1 - Mar 04, 2017
+  * bowtie2-build now supports gzip-compressed FASTA inputs
+  * New --xeq parameter for bowtie2 disambiguates the 'M' CIGAR
+    flag. When specified, matches are indicated with the '=' operation and
+    mismatches with 'X'
+  * Fixed a possible infinite loop during parallel index building due
+    to the compiler optimizing away a loop condition
+  * Added --soft-clipped-unmapped-tlen parameter for bowtie2 that
+    ignores soft-clipped bases when calculating template length (TLEN)
+  * Added support for multi-line sequences in FASTA read inputs
+  * Expanded explanation of MD:Z field in manual
+  * Fixed a crashing bug when output is redirected to a pipe
+  * Fixed ambiguity in the SEED alignment policy that sometimes caused -N parameter to be ignored
+
+## Version 2.3.2 - May 05, 2017 ##
+
+  * Added support for interleaved paired-end FASTQ inputs
+    (--interleaved)
+  * Now reports MREVERSE SAM flag for unaligned end when only one
+    end of a pair aligns
+  * Fixed issue where first character of some read names was
+    omitted from SAM output when using tabbed input formats
+  * Added --sam-no-qname-trunc option, which causes entire read
+    name, including spaces, to be written to SAM output.  This
+    violates SAM specification, but can be useful in applications
+    that immediately postprocess the SAM.
+  * Fixed compilation error caused by pointer comparison issue
+    in aligner_result.cpp
+  * Removed termcap and readline dependencies introduced in v2.3.1
+  * Fixed compilation issues caused by gzbuffer function when
+    compiling with zlib v1.2.3.5 and earlier. Users compiling against
+    these libraries will use the zlib default buffer size of 8Kb
+    when decompressing read files.
+  * Fixed issue that would cause Bowtie 2 hang when aligning FASTA
+    inputs with more than one thread
+
+## Version 2.3.1 - Mar 04, 2017 ##
+
 Please note that as of this release Bowtie 2 now has dependencies on zlib and readline libraries.
 Make sure that all dependencies are met before attempting to build from source.
 
-    * Added native support for gzipped read files.  The wrapper
-      script is no longer responsible for decompression.  This
-      simplifies the wrapper and improves speed and thread scalability
-      for gzipped inputs.
-    * Fixed a bug that caused `bowtie2-build` to crash when the
-      first FASTA sequence contains all Ns.
-    * Add support for interleaved FASTQ format (`--interleaved`)
-    * Empty FASTQ inputs would yield an error in Bowtie 2 2.3.0,
-      whereas previous versions would simply align 0 reads and report
-      the SAM header as usual.  This version returns to the pre-2.3.0
-      behavior, resolving a compatibility issue between TopHat2 and
-      Bowtie 2 2.3.0.
-    * Fixed a bug whereby combining `--un-conc*` with `-k` or `-a`
-      would cause `bowtie2` to print duplicate reads in one or both
-      of the `--un-conc*` output files, causing the ends to be
-      misaligned.
-    * The default `--score-min` for `--local` mode is now `G,20,8`.
-      That was the stated default in the documentation for a while,
-      but the actual default was `G,0,10` for many versions.  Now the
-      default matches the documentation and, we find, yields more
-      accurate alignments than `G,0,10`
-
-Version 2.3.0 - Dec 13, 2016
-   * Code related to read parsing was completely rewritten to improve
-     scalability to many threads.  In short, the critical section is
-     simpler and parses input reads in batches rather than one at a time.
-     The improvement applies to all read formats.
-   * TBB is now the default threading library.  We consistently found TBB to give
-     superior thread scaling.  It is widely available and widely installed.
-     That said, we are also preserving a "legacy" version of Bowtie that,
-     like previous releases, does not use TBB.  To compile Bowtie source
-     in legacy mode use `NO_TBB=1`.  To
-     use legacy binaries, download the appropriate binary archive with
-     "legacy" in the name.
-   * Bowtie now uses a queue-based lock rather
-     than a spin or heavyweight lock.  We find this gives superior thread
-     scaling; we saw an order-of-magnitude throughput improvements at
-     120 threads in one experiment, for example.
-   * Unnecessary thread synchronization removed
-   * Fixed issue with parsing FASTA records with greater-than symbol in the name
-   * Now detects and reports inconsistencies between `--score-min` and `--ma`
-   * Changed default for `--bmaxdivn` to yield better memory footprint and running time when building an index with many threads
-
-Version 2.2.9 - Apr 22, 2016
-   * Fixed the multiple threads issue for the bowtie2-build.
-   * Fixed a TBB related build issue impacting TBB v4.4.
-
-Version 2.2.8 - Mar 10, 2016
-   * Various website updates.
-   * Fixed the bowtie2-build issue that made TBB compilation fail.
-   * Fixed the static build for Win32 platform.
-   
-
-Version 2.2.7 - Feb 10, 2016
-   * Added a parallel index build option: bowtie2-build --threads <# threads>.
-   * Fixed an issue whereby IUPAC codes (other than A/C/G/T/N) in reads were
-     converted to As. Now all non-A/C/G/T characters in reads become Ns.
-   * Fixed some compilation issues, including for the Intel C++ Compiler.
-   * Removed debugging code that could impede performance for many alignment
-     threads.
-   * Fixed a few typos in documentation.
-
-Version 2.2.6 - Jul 22, 2015
-   * Switched to a stable sort to avoid some potential reproducibility confusions.
-   * Added 'install' target for *nix platforms.
-   * Added the Intel TBB option which provides in most situations a better performance
-     output. TBB is not present by default in the current build but can be added 
-     by compiling the source code with WITH_TBB=1 option.
-   * Fixed a bug that caused seed lenght to be dependent of the -L and -N parameters order.
-   * Fixed a bug that caused --local followed by -N to reset seed lenght to 22 which is
-     actually the default value for global.
-   * Enable compilation on FreeBSD and clang, although gmake port is still required.
-   * Fixed an issue that made bowtie2 compilation process to fail on Snow Leopard.
-
-
-Version 2.2.5 - Mar 9, 2015
-   * Fixed some situations where incorrectly we could detect a Mavericks platform.
-   * Fixed some manual issues including some HTML bad formating.
-   * Make sure the wrapper correctly identifies the platform under OSX.
-   * Fixed --rg/--rg-id options where included spaces were incorrectly treated.
-   * Various documentation fixes added by contributors.
-   * Fixed the incorrect behavior where parameter file names may contain spaces.
-   * Fixed bugs related with the presence of spaces in the path where bowtie binaries are stored.
-   * Improved exception handling for missformated quality values. 
-   * Improved redundancy checks by correctly account for soft clipping. 
-
-Version 2.2.4 - Oct 22, 2014
-   * Fixed a Mavericks OSX specific bug caused by some linkage ambiguities.
-   * Added lz4 compression option for the wrapper.
-   * Fixed the vanishing --no-unal help line.
-   * Added the static linkage for MinGW builds.
-   * Added extra seed-hit output.
-   * Fixed missing 0-length read in fastq --passthrough output.
-   * Fixed an issue that would cause different output in -a mode depending on random seed.
-
-Version 2.2.3 - May 30, 2014
-   * Fixed a bug that made loading an index into memory crash sometimes.
-   * Fixed a silent failure to warn the user in case the -x option is missing.
-   * Updated al, un, al-conc and un-conc options to avoid confusion in cases
-     where the user does not provide a base file name.
-   * Fixed a spurious assert that made bowtie2-inspect debug fail. 
-
-Version 2.2.2 - April 10, 2014
-   * Improved performance for cases where the reference contains ambiguous 
-     or masked nucleobases represented by Ns.  
-
-Version 2.2.1 - February 28, 2014
-   * Improved way in which index files are loaded for alignment.  Should fix
-     efficiency problems on some filesystems.
-   * Fixed a bug that made older systems unable to correctly deal with bowtie 
-     relative symbolic links.
-   * Fixed a bug that, for very big indexes, could determine to determine file
-     offsets correctly.
-   * Fixed a bug where using --no-unal option incorrectly suppressed
-     --un/--un-conc output.
-   * Dropped a perl dependency that could cause problems on old systems.
-   * Added --no-1mm-upfront option and clarified documentation for parameters
-     governing the multiseed heuristic.
-
-Version 2.2.0 - February 10, 2014
-   * Improved index querying efficiency using "population count" instructions
-     available since SSE4.2.
-   * Added support for large and small indexes, removing 4-billion-nucleotide
-     barrier.  Bowtie 2 can now be used with reference genomes of any size.
-   * Fixed bug that could cause bowtie2-build to crash when reference length
-     is close to 4 billion.
-   * Fixed issue in bowtie2-inspect that caused -e mode not to output
-     nucleotides properly.
-   * Added a CL: string to the @PG SAM header to preserve information about
-     the aligner binary and paramteres.
-   * No longer releasing 32-bit binaries.  Simplified manual and Makefile
-     accordingly.
-   * Credits to the Intel(r) enabling team for performance optimizations
-     included in this release.  Thank you!
-   * Phased out CygWin support.
-   * Added the .bat generation for Windows.
-   * Fixed some issues with some uncommon chars in fasta files.
-   * Fixed wrappers so bowtie can now be used with symlinks.
+  * Added native support for gzipped read files.  The wrapper
+    script is no longer responsible for decompression.  This
+    simplifies the wrapper and improves speed and thread scalability
+    for gzipped inputs.
+  * Fixed a bug that caused `bowtie2-build` to crash when the
+    first FASTA sequence contains all Ns.
+  * Add support for interleaved FASTQ format (`--interleaved`)
+  * Empty FASTQ inputs would yield an error in Bowtie 2 2.3.0,
+    whereas previous versions would simply align 0 reads and report
+    the SAM header as usual.  This version returns to the pre-2.3.0
+    behavior, resolving a compatibility issue between TopHat2 and
+    Bowtie 2 2.3.0.
+  * Fixed a bug whereby combining `--un-conc*` with `-k` or `-a`
+    would cause `bowtie2` to print duplicate reads in one or both
+    of the `--un-conc*` output files, causing the ends to be
+    misaligned.
+  * The default `--score-min` for `--local` mode is now `G,20,8`.
+    That was the stated default in the documentation for a while,
+    but the actual default was `G,0,10` for many versions.  Now the
+    default matches the documentation and, we find, yields more
+    accurate alignments than `G,0,10`
+
+## Version 2.3.0 - Dec 13, 2016 ##
+
+  * Code related to read parsing was completely rewritten to improve
+    scalability to many threads.  In short, the critical section is
+    simpler and parses input reads in batches rather than one at a time.
+    The improvement applies to all read formats.
+  * TBB is now the default threading library.  We consistently found TBB to give
+    superior thread scaling.  It is widely available and widely installed.
+    That said, we are also preserving a "legacy" version of Bowtie that,
+    like previous releases, does not use TBB.  To compile Bowtie source
+    in legacy mode use `NO_TBB=1`.  To
+    use legacy binaries, download the appropriate binary archive with
+    "legacy" in the name.
+  * Bowtie now uses a queue-based lock rather
+    than a spin or heavyweight lock.  We find this gives superior thread
+    scaling; we saw an order-of-magnitude throughput improvements at
+    120 threads in one experiment, for example.
+  * Unnecessary thread synchronization removed
+  * Fixed issue with parsing FASTA records with greater-than symbol in the name
+  * Now detects and reports inconsistencies between `--score-min` and `--ma`
+  * Changed default for `--bmaxdivn` to yield better memory footprint and running time when building an index with many threads
+
+## Version 2.2.9 - Apr 22, 2016 ##
+
+  * Fixed the multiple threads issue for the bowtie2-build.
+  * Fixed a TBB related build issue impacting TBB v4.4.
+
+## Version 2.2.8 - Mar 10, 2016 ##
+
+  * Various website updates.
+  * Fixed the bowtie2-build issue that made TBB compilation fail.
+  * Fixed the static build for Win32 platform.
+
+
+## Version 2.2.7 - Feb 10, 2016 ##
+
+  * Added a parallel index build option: bowtie2-build --threads <# threads>.
+  * Fixed an issue whereby IUPAC codes (other than A/C/G/T/N) in reads were
+    converted to As. Now all non-A/C/G/T characters in reads become Ns.
+  * Fixed some compilation issues, including for the Intel C++ Compiler.
+  * Removed debugging code that could impede performance for many alignment
+    threads.
+  * Fixed a few typos in documentation.
+
+## Version 2.2.6 - Jul 22, 2015 ##
+
+  * Switched to a stable sort to avoid some potential reproducibility confusions.
+  * Added 'install' target for *nix platforms.
+  * Added the Intel TBB option which provides in most situations a better performance
+    output. TBB is not present by default in the current build but can be added
+    by compiling the source code with WITH_TBB=1 option.
+  * Fixed a bug that caused seed lenght to be dependent of the -L and -N parameters order.
+  * Fixed a bug that caused --local followed by -N to reset seed lenght to 22 which is
+    actually the default value for global.
+  * Enable compilation on FreeBSD and clang, although gmake port is still required.
+  * Fixed an issue that made bowtie2 compilation process to fail on Snow Leopard.
+
+
+## Version 2.2.5 - Mar 9, 2015 ##
+
+  * Fixed some situations where incorrectly we could detect a Mavericks platform.
+  * Fixed some manual issues including some HTML bad formating.
+  * Make sure the wrapper correctly identifies the platform under OSX.
+  * Fixed --rg/--rg-id options where included spaces were incorrectly treated.
+  * Various documentation fixes added by contributors.
+  * Fixed the incorrect behavior where parameter file names may contain spaces.
+  * Fixed bugs related with the presence of spaces in the path where bowtie binaries are stored.
+  * Improved exception handling for missformated quality values.
+  * Improved redundancy checks by correctly account for soft clipping.
+
+## Version 2.2.4 - Oct 22, 2014 ##
+
+  * Fixed a Mavericks OSX specific bug caused by some linkage ambiguities.
+  * Added lz4 compression option for the wrapper.
+  * Fixed the vanishing --no-unal help line.
+  * Added the static linkage for MinGW builds.
+  * Added extra seed-hit output.
+  * Fixed missing 0-length read in fastq --passthrough output.
+  * Fixed an issue that would cause different output in -a mode depending on random seed.
+
+## Version 2.2.3 - May 30, 2014 ##
+
+  * Fixed a bug that made loading an index into memory crash sometimes.
+  * Fixed a silent failure to warn the user in case the -x option is missing.
+  * Updated al, un, al-conc and un-conc options to avoid confusion in cases
+    where the user does not provide a base file name.
+  * Fixed a spurious assert that made bowtie2-inspect debug fail.
+
+## Version 2.2.2 - April 10, 2014 ##
+
+  * Improved performance for cases where the reference contains ambiguous
+    or masked nucleobases represented by Ns.
+
+## Version 2.2.1 - February 28, 2014 ##
+
+  * Improved way in which index files are loaded for alignment.  Should fix
+    efficiency problems on some filesystems.
+  * Fixed a bug that made older systems unable to correctly deal with bowtie
+    relative symbolic links.
+  * Fixed a bug that, for very big indexes, could determine to determine file
+    offsets correctly.
+  * Fixed a bug where using --no-unal option incorrectly suppressed
+    --un/--un-conc output.
+  * Dropped a perl dependency that could cause problems on old systems.
+  * Added --no-1mm-upfront option and clarified documentation for parameters
+    governing the multiseed heuristic.
+
+## Version 2.2.0 - February 10, 2014 ##
+
+  * Improved index querying efficiency using "population count" instructions
+    available since SSE4.2.
+  * Added support for large and small indexes, removing 4-billion-nucleotide
+    barrier.  Bowtie 2 can now be used with reference genomes of any size.
+  * Fixed bug that could cause bowtie2-build to crash when reference length
+    is close to 4 billion.
+  * Fixed issue in bowtie2-inspect that caused -e mode not to output
+    nucleotides properly.
+  * Added a CL: string to the @PG SAM header to preserve information about
+    the aligner binary and paramteres.
+  * No longer releasing 32-bit binaries.  Simplified manual and Makefile
+    accordingly.
+  * Credits to the Intel(r) enabling team for performance optimizations
+    included in this release.  Thank you!
+  * Phased out CygWin support.
+  * Added the .bat generation for Windows.
+  * Fixed some issues with some uncommon chars in fasta files.
+  * Fixed wrappers so bowtie can now be used with symlinks.
 
 Bowtie 2 on GitHub - February 4, 2014
-   * Bowtie 2 source now lives in a public GitHub repository:
-     https://github.com/BenLangmead/bowtie2.
-
-Version 2.1.0 - February 21, 2013
-   * Improved multithreading support so that Bowtie 2 now uses native Windows
-     threads when compiled on Windows and uses a faster mutex.  Threading
-     performance should improve on all platforms.
-   * Improved support for building 64-bit binaries for Windows x64 platforms.
-   * Bowtie is using a spinlocking mechanism by default. 
-   * Test option --nospin is no longer available. However bowtie2 can always
-     be recompiled with EXTRA_FLAGS="-DNO_SPINLOCK" in order to drop the 
-     default spinlock usage.
-
-Version 2.0.6 - January 27, 2013
-   * Fixed issue whereby spurious output would be written in --no-unal mode.
-   * Fixed issue whereby multiple input files combined with --reorder would
-     cause truncated output and a memory spike.
-   * Fixed spinlock datatype for Win64 API (LLP64 data model) which made it
-     crash when compiled under Windows 7 x64.
-   * Fixed bowtie2 wrapper to handle filename/paths operations in a more 
-     platform independent manner.
-   * Added pthread as a default library option under cygwin, and pthreadGC
-     for MinGW.
-   * Fixed some minor issues that made MinGW compilation fail.
-
-Version 2.0.5 - January 4, 2013
-   * Fixed an issue that would cause excessive memory allocation when aligning
-     to very repetitive genomes.
-   * Fixed an issue that would cause a pseudo-randomness-related assert to be
-     thrown in debug mode under rare circumstances.
-   * When bowtie2-build fails, it will now delete index files created so far so
-     that invalid index files don't linger.
-   * Tokenizer no longer has limit of 10,000 tokens, which was a problem for
-     users trying to index a very large number of FASTA files.
-   * Updated manual's discussion of the -I and -X options to mention that
-     setting them farther apart makes Bowtie 2 slower.
-   * Renamed COPYING to LICENSE and created a README to be GitHub-friendly.
-
-Version 2.0.4 - December 17, 2012
-   * Fixed issue whereby --un, --al, --un-conc and --al-conc options would
-     incorrectly suppress SAM output.
-   * Fixed minor command-line parsing issue in wrapper script.
-   * Fixed issue on Windows where wrapper script would fail to find
-     bowtie2-align.exe binary.
-   * Updated some of the index-building scripts and documentation.
-   * Updated author's contact info in usage message.
-
-Version 2.0.3 - December 14, 2012
-   * Fixed thread safely issues that could cause crashes with a large number of
-     threads.  Thanks to John O'Neill for identifying these issues.
-   * Fixed some problems with pseudo-random number generation that could cause
-     unequal distribution of alignments across equally good candidate loci.
-   * The --un, --al, --un-conc, and --al-conc options (and their compressed
-     analogs) are all much faster now, making it less likely that they become
-     the bottleneck when Bowtie 2 is run with large -p.
-   * Fixed issue with innaccurate mapping qualities, XS:i, and YS:i flags when
-     --no-mixed and --no-discordant are specified at the same time.
-   * Fixed some compiler warnings and errors when using clang++ to compile.
-   * Fixed race condition in bowtie2 script when named pipes are used.
-   * Added more discussion of whitespace in read names to manual.
-
-Version 2.0.2 - October 31, 2012
-   * Fixes a couple small issues pointed out to me immediately after 2.0.1
-     release
-   * Mac binaries now built on 10.6 in order to be forward-compatible with more
-     Mac OS versions
-   * Small changes to source to make it compile with gcc versions up to 4.7
-     without warnings
-
-Version 2.0.1 - October 31, 2012
-   * First non-beta release.
-   * Fixed an issue that would cause Bowtie 2 to use excessive amounts of
-     memory for closely-matching and highly repetitive reads under some
-     circumstances.
-   * Fixed a bug in --mm mode that would fail to report when an index file
-     could not be memory-mapped.
-   * Added --non-deterministic option, which better matches how some users
-     expect the pseudo-random generator inside Bowtie 2 to work.  Normally, if
-     you give the same read (same name, sequence and qualities) and --seed, you
-     get the same answer.  --non-deterministic breaks that guarantee.  This can
-     be more appropriate for datasets where the input contains many identical
-     reads (same name, same sequence, same qualities).
-   * Fixed a bug in bowtie2-build would yield corrupt index files when memory
-     settings were adjusted in the middle of indexing.
-   * Clarified in manual that --un (or --un-*) options print reads exactly as
-     they appeared in the input, and that they are not necessarily written in
-     the same order as they appeared in the input.
-   * Fixed issue whereby wrapper would incorrectly interpret arguments with
-     --al as a prefix (e.g. --all) as --al.
-   * Added option --omit-sec-seq, which causes Bowtie 2 to set SEQ and QUAL
-     fields to "*" for secondary alignments.
-
-Version 2.0.0-beta7 - July 9, 2012
-   * Fixed an issue in how Bowtie 2 aligns longer reads in --local mode.  Some
-     alignments were incorrectly curtailed on the left-hand side.
-   * Fixed issue --un (or --un-*) would fail to output unaligned reads when
-     --no-unal was also specified.
-   * Fixed issue whereby --un-* were significantly slowing down Bowtie 2 when
-     -p was set greater than 1.
-   * Fixed issue that would could cause hangs in -a mode or when -k is set
-     high.
-   * Fixed issue whereby the SAM FLAGS field could be set incorrectly for
-     secondary paired-end alignments with -a or -k > 1.
-   * When input reads are unpaired, Bowtie 2 no longer removes the trailing /1
-     or /2 from the read name.
-   * -M option is now deprecated.  It will be removed in subsequent versions.
-     What used to be called -M mode is still the default mode, and -k and -a
-     are still there alternatives to the default mode, but adjusting the -M
-     setting is deprecated.  Use the -D and -R options to adjust the effort
-     expended to find valid alignments.
-   * Gaps are now left-aligned in a manner similar to BWA and other tools.
-   * Fixed issue whereby wrapper script would not pass on exitlevel correctly,
-     sometimes spuriously hiding non-0 exitlevel.
-   * Added documentation for YT:Z to manual.
-   * Fixed documentation describing how Bowtie 2 searches for an index given an
-     index basename.
-   * Fixed inconsistent documentation for the default value of the -i parameter
-
-Version 2.0.0-beta6 - May 7, 2012
-   * Bowtie 2 now handles longer reads in a more memory-economical fashion,
-     which should prevent many out-of-memory issues for longer reads.
-   * Error message now produced when -L is set greater than 32.
-   * Added a warning message to warn when bowtie2-align binary is being run
-     directly, rather than via the wrapper.  Some functionality is provided by
-     the wrapper, so Bowtie 2 should always be run via the bowtie2 executable
-     rather than bowtie2-align.
-   * Fixed an occasional crashing bug that was usually caused by setting the
-     seed length relatively short.
-   * Fixed an issue whereby the FLAG, RNEXT and PNEXT fields were incorrect for
-     some paired-end alignments.  Specifically, this affected paired-end
-     alignments where both mates aligned and one or both mates aligned
-     non-uniquely.
-   * Fixed issue whereby compressed input would sometimes be mishandled.
-   * Renamed --sam-* options to omit the "sam-" prefix for brevity.  The old
-     option names will also work.
-   * Added --no-unal option to suppress SAM records corresponding to unaligned
-     reads, i.e., records where FLAG field has 0x4 set.
-   * Added --rg-id option and enhanced the documentation for both --rg-id and
-     --rg.  Users were confused by the need to specify --rg "ID:(something)" in
-     order for the @RG line to be printed; hopefully this is clearer now.
-   * Index updates: indexes linked to in the right-hand sidebar have been
-     updated to include the unplaced contigs appearing in the UCSC "random"
-     FASTA files.  This makes the indexes more complete.  Also, an index for
-     the latest mouse assembly, mm10 (AKA "GRCm38") has been added.
-
-Version 2.0.0-beta5 - December 14, 2011
-   * Added --un, --al, --un-conc, and --al-conc options that write unpaired
-     and/or paired-end reads to files depending on whether they align at least
-     once or fail to align.
-   * Added --reorder option.  When enabled, the order of the SAM records output
-     by Bowtie 2 will match the order of the input reads even when -p is set
-     greater than 1.  This is disabled by default; enabling it makes Bowtie 2
-     somewhat slower and use somewhat more memory when -p is set greater than
-     1.
-   * Changed the default --score-min in --local mode to G,20,8.  This ought to
-     improve sensitivity and accuracy in many cases.
-   * Improved error reporting.
-   * Fixed some minor documentation issues.
-   * Note: I am aware of an issue whereby longer reads (>10,000 bp) drive the
-     memory footprint way up and often cause an out-of-memory exception.  This
-     will be fixed in a future version.
-
-Version 2.0.0-beta4 - December 5, 2011
-   * Accuracy improvements.
-   * Speed improvements in some situations.
-   * Fixed a handful of crashing bugs.
-   * Fixed some documentation bugs.
-   * Fixed bug whereby --version worked incorrectly.
-   * Fixed formatting bug with MD:Z optional field that would sometimes fail to
-     follow a mismatch with a number.
-   * Added -D option for controlling the maximum number of seed extensions that
-     can fail in a row before we move on.  This option or something like it
-	 will eventually replace the argument to -M.
-   * Added -R option to control maximum number of times re-seeding is attempted
-     for a read with repetitive seeds.
-   * Changed default to --no-dovetail.  Specifying --dovetail turns it back on.
-   * Added second argument for --mp option so that user can set maximum and
-     minimum mismatch penalties at once.  Also tweaked the formula for
-     calculating the quality-aware mismatch penalty.
-
-Version 2.0.0-beta3 - November 1, 2011
-   * Accuracy improvements.
-   * Speed improvements in some situations.
-   * Fixed a handful of crashing bugs.
-   * Fixed a bug whereby number of repetitively aligned reads could be
-     misreported in the summary output.
-
-Version 2.0.0-beta2 - October 16, 2011
-   * Added manual, both included in the download package and on the website.
-     The website will always have the manual for the latest version.
-   * Added Linux 32-bit and 64-bit binary packages.  Mac OS X packages to come.
-     Still working on a Windows package.
-   * Fixed a bug that led to crashes when seed-alignment result memory was
-     exhausted.
-   * Changed the --end-to-end mode --score-min default to be less permissive.
-     The previous threshold seemed to be having an adverse effect on accuracy,
-     though the fix implemented in this version comes at the expense of some
-     sensitivity.
-   * Changed the --end-to-end mode -M default to be lower by 2 notches.  This
-     offsets any detrimental effect that the previous change would have had on
-     speed, without a large adverse impact on accuracy.  As always, setting -M
-     higher will yield still greater accuracy at the expense of speed.
-
-Version 2.0.0-beta1 - September 22, 2011
-   * First public release.
-   * Caveats: as of now, the manual is incomplete, there's no tutorial, and no
-     example genome or example reads.  All these will be fixed in upcoming
-     releases.
-   * Only a source package is currently available.  Platform-specific binaries
-     will be included in future releases.
+  * Bowtie 2 source now lives in a public GitHub repository:
+    https://github.com/BenLangmead/bowtie2.
+
+## Version 2.1.0 - February 21, 2013 ##
+
+  * Improved multithreading support so that Bowtie 2 now uses native Windows
+    threads when compiled on Windows and uses a faster mutex.  Threading
+    performance should improve on all platforms.
+  * Improved support for building 64-bit binaries for Windows x64 platforms.
+  * Bowtie is using a spinlocking mechanism by default.
+  * Test option --nospin is no longer available. However bowtie2 can always
+    be recompiled with EXTRA_FLAGS="-DNO_SPINLOCK" in order to drop the
+    default spinlock usage.
+
+## Version 2.0.6 - January 27, 2013 ##
+
+  * Fixed issue whereby spurious output would be written in --no-unal mode.
+  * Fixed issue whereby multiple input files combined with --reorder would
+    cause truncated output and a memory spike.
+  * Fixed spinlock datatype for Win64 API (LLP64 data model) which made it
+    crash when compiled under Windows 7 x64.
+  * Fixed bowtie2 wrapper to handle filename/paths operations in a more
+    platform independent manner.
+  * Added pthread as a default library option under cygwin, and pthreadGC
+    for MinGW.
+  * Fixed some minor issues that made MinGW compilation fail.
+
+## Version 2.0.5 - January 4, 2013 ##
+
+  * Fixed an issue that would cause excessive memory allocation when aligning
+    to very repetitive genomes.
+  * Fixed an issue that would cause a pseudo-randomness-related assert to be
+    thrown in debug mode under rare circumstances.
+  * When bowtie2-build fails, it will now delete index files created so far so
+    that invalid index files don't linger.
+  * Tokenizer no longer has limit of 10,000 tokens, which was a problem for
+    users trying to index a very large number of FASTA files.
+  * Updated manual's discussion of the -I and -X options to mention that
+    setting them farther apart makes Bowtie 2 slower.
+  * Renamed COPYING to LICENSE and created a README to be GitHub-friendly.
+
+## Version 2.0.4 - December 17, 2012 ##
+
+  * Fixed issue whereby --un, --al, --un-conc and --al-conc options would
+    incorrectly suppress SAM output.
+  * Fixed minor command-line parsing issue in wrapper script.
+  * Fixed issue on Windows where wrapper script would fail to find
+    bowtie2-align.exe binary.
+  * Updated some of the index-building scripts and documentation.
+  * Updated author's contact info in usage message.
+
+## Version 2.0.3 - December 14, 2012 ##
+
+  * Fixed thread safely issues that could cause crashes with a large number of
+    threads.  Thanks to John O'Neill for identifying these issues.
+  * Fixed some problems with pseudo-random number generation that could cause
+    unequal distribution of alignments across equally good candidate loci.
+  * The --un, --al, --un-conc, and --al-conc options (and their compressed
+    analogs) are all much faster now, making it less likely that they become
+    the bottleneck when Bowtie 2 is run with large -p.
+  * Fixed issue with innaccurate mapping qualities, XS:i, and YS:i flags when
+    --no-mixed and --no-discordant are specified at the same time.
+  * Fixed some compiler warnings and errors when using clang++ to compile.
+  * Fixed race condition in bowtie2 script when named pipes are used.
+  * Added more discussion of whitespace in read names to manual.
+
+## Version 2.0.2 - October 31, 2012 ##
+
+  * Fixes a couple small issues pointed out to me immediately after 2.0.1
+    release
+  * Mac binaries now built on 10.6 in order to be forward-compatible with more
+    Mac OS versions
+  * Small changes to source to make it compile with gcc versions up to 4.7
+    without warnings
+
+## Version 2.0.1 - October 31, 2012 ##
+
+  * First non-beta release.
+  * Fixed an issue that would cause Bowtie 2 to use excessive amounts of
+    memory for closely-matching and highly repetitive reads under some
+    circumstances.
+  * Fixed a bug in --mm mode that would fail to report when an index file
+    could not be memory-mapped.
+  * Added --non-deterministic option, which better matches how some users
+    expect the pseudo-random generator inside Bowtie 2 to work.  Normally, if
+    you give the same read (same name, sequence and qualities) and --seed, you
+    get the same answer.  --non-deterministic breaks that guarantee.  This can
+    be more appropriate for datasets where the input contains many identical
+    reads (same name, same sequence, same qualities).
+  * Fixed a bug in bowtie2-build would yield corrupt index files when memory
+    settings were adjusted in the middle of indexing.
+  * Clarified in manual that --un (or --un-*) options print reads exactly as
+    they appeared in the input, and that they are not necessarily written in
+    the same order as they appeared in the input.
+  * Fixed issue whereby wrapper would incorrectly interpret arguments with
+    --al as a prefix (e.g. --all) as --al.
+  * Added option --omit-sec-seq, which causes Bowtie 2 to set SEQ and QUAL
+    fields to "*" for secondary alignments.
+
+## Version 2.0.0-beta7 - July 9, 2012 ##
+
+  * Fixed an issue in how Bowtie 2 aligns longer reads in --local mode.  Some
+    alignments were incorrectly curtailed on the left-hand side.
+  * Fixed issue --un (or --un-*) would fail to output unaligned reads when
+    --no-unal was also specified.
+  * Fixed issue whereby --un-* were significantly slowing down Bowtie 2 when
+    -p was set greater than 1.
+  * Fixed issue that would could cause hangs in -a mode or when -k is set
+    high.
+  * Fixed issue whereby the SAM FLAGS field could be set incorrectly for
+    secondary paired-end alignments with -a or -k > 1.
+  * When input reads are unpaired, Bowtie 2 no longer removes the trailing /1
+    or /2 from the read name.
+  * -M option is now deprecated.  It will be removed in subsequent versions.
+    What used to be called -M mode is still the default mode, and -k and -a
+    are still there alternatives to the default mode, but adjusting the -M
+    setting is deprecated.  Use the -D and -R options to adjust the effort
+    expended to find valid alignments.
+  * Gaps are now left-aligned in a manner similar to BWA and other tools.
+  * Fixed issue whereby wrapper script would not pass on exitlevel correctly,
+    sometimes spuriously hiding non-0 exitlevel.
+  * Added documentation for YT:Z to manual.
+  * Fixed documentation describing how Bowtie 2 searches for an index given an
+    index basename.
+  * Fixed inconsistent documentation for the default value of the -i parameter
+
+## Version 2.0.0-beta6 - May 7, 2012 ##
+
+  * Bowtie 2 now handles longer reads in a more memory-economical fashion,
+    which should prevent many out-of-memory issues for longer reads.
+  * Error message now produced when -L is set greater than 32.
+  * Added a warning message to warn when bowtie2-align binary is being run
+    directly, rather than via the wrapper.  Some functionality is provided by
+    the wrapper, so Bowtie 2 should always be run via the bowtie2 executable
+    rather than bowtie2-align.
+  * Fixed an occasional crashing bug that was usually caused by setting the
+    seed length relatively short.
+  * Fixed an issue whereby the FLAG, RNEXT and PNEXT fields were incorrect for
+    some paired-end alignments.  Specifically, this affected paired-end
+    alignments where both mates aligned and one or both mates aligned
+    non-uniquely.
+  * Fixed issue whereby compressed input would sometimes be mishandled.
+  * Renamed --sam-* options to omit the "sam-" prefix for brevity.  The old
+    option names will also work.
+  * Added --no-unal option to suppress SAM records corresponding to unaligned
+    reads, i.e., records where FLAG field has 0x4 set.
+  * Added --rg-id option and enhanced the documentation for both --rg-id and
+    --rg.  Users were confused by the need to specify --rg "ID:(something)" in
+    order for the @RG line to be printed; hopefully this is clearer now.
+  * Index updates: indexes linked to in the right-hand sidebar have been
+    updated to include the unplaced contigs appearing in the UCSC "random"
+    FASTA files.  This makes the indexes more complete.  Also, an index for
+    the latest mouse assembly, mm10 (AKA "GRCm38") has been added.
+
+## Version 2.0.0-beta5 - December 14, 2011 ##
+
+  * Added --un, --al, --un-conc, and --al-conc options that write unpaired
+    and/or paired-end reads to files depending on whether they align at least
+    once or fail to align.
+  * Added --reorder option.  When enabled, the order of the SAM records output
+    by Bowtie 2 will match the order of the input reads even when -p is set
+    greater than 1.  This is disabled by default; enabling it makes Bowtie 2
+    somewhat slower and use somewhat more memory when -p is set greater than
+    1.
+  * Changed the default --score-min in --local mode to G,20,8.  This ought to
+    improve sensitivity and accuracy in many cases.
+  * Improved error reporting.
+  * Fixed some minor documentation issues.
+  * Note: I am aware of an issue whereby longer reads (>10,000 bp) drive the
+    memory footprint way up and often cause an out-of-memory exception.  This
+    will be fixed in a future version.
+
+## Version 2.0.0-beta4 - December 5, 2011 ##
+
+  * Accuracy improvements.
+  * Speed improvements in some situations.
+  * Fixed a handful of crashing bugs.
+  * Fixed some documentation bugs.
+  * Fixed bug whereby --version worked incorrectly.
+  * Fixed formatting bug with MD:Z optional field that would sometimes fail to
+    follow a mismatch with a number.
+  * Added -D option for controlling the maximum number of seed extensions that
+    can fail in a row before we move on.  This option or something like it
+    will eventually replace the argument to -M.
+  * Added -R option to control maximum number of times re-seeding is attempted
+    for a read with repetitive seeds.
+  * Changed default to --no-dovetail.  Specifying --dovetail turns it back on.
+  * Added second argument for --mp option so that user can set maximum and
+    minimum mismatch penalties at once.  Also tweaked the formula for
+    calculating the quality-aware mismatch penalty.
+
+## Version 2.0.0-beta3 - November 1, 2011 ##
+
+  * Accuracy improvements.
+  * Speed improvements in some situations.
+  * Fixed a handful of crashing bugs.
+  * Fixed a bug whereby number of repetitively aligned reads could be
+    misreported in the summary output.
+
+## Version 2.0.0-beta2 - October 16, 2011 ##
+
+  * Added manual, both included in the download package and on the website.
+    The website will always have the manual for the latest version.
+  * Added Linux 32-bit and 64-bit binary packages.  Mac OS X packages to come.
+    Still working on a Windows package.
+  * Fixed a bug that led to crashes when seed-alignment result memory was
+    exhausted.
+  * Changed the --end-to-end mode --score-min default to be less permissive.
+    The previous threshold seemed to be having an adverse effect on accuracy,
+    though the fix implemented in this version comes at the expense of some
+    sensitivity.
+  * Changed the --end-to-end mode -M default to be lower by 2 notches.  This
+    offsets any detrimental effect that the previous change would have had on
+    speed, without a large adverse impact on accuracy.  As always, setting -M
+    higher will yield still greater accuracy at the expense of speed.
+
+## Version 2.0.0-beta1 - September 22, 2011 ##
+
+  * First public release.
+  * Caveats: as of now, the manual is incomplete, there's no tutorial, and no
+    example genome or example reads.  All these will be fixed in upcoming
+    releases.
+  * Only a source package is currently available.  Platform-specific binaries
+    will be included in future releases.
diff --git a/README.md b/README.md
index ea651e85..900ea66b 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 
 <!-- badges: start -->
 ![Github Actions](https://github.com/BenLangmead/bowtie2/actions/workflows/bowtie2.yml/badge.svg)
-[![Generic badge](https://img.shields.io/badge/version-2.4.4-green.svg)](https://shields.io/)
-[![Build Status](https://travis-ci.org/BenLangmead/bowtie2.svg?branch=master)](https://travis-ci.org/BenLangmead/bowtie2)
+[![Generic badge](https://img.shields.io/badge/version-2.4.5-green.svg)](https://shields.io/)
+<!-- [![Build Status](https://travis-ci.org/BenLangmead/bowtie2.svg?branch=master)](https://travis-ci.org/BenLangmead/bowtie2) -->
 [![License: GPL v3](https://img.shields.io/badge/license-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
 <!--badges: end -->
 
diff --git a/aln_sink.cpp b/aln_sink.cpp
index ed80580d..793d245d 100644
--- a/aln_sink.cpp
+++ b/aln_sink.cpp
@@ -2114,6 +2114,12 @@ void AlnSinkSam::appendMate(
 	samc_.printPreservedOptFlags(o, rd);
 	samc_.printComment(o, rd.name);
 	o.append('\n');
+	if(samc_.passthrough()) {
+		// Original read string
+		samc_.printOptFieldNewlineEscapedZ(o, rd.readOrigBuf);
+		o.append('\n');
+	}
+
 }
 
 #ifdef ALN_SINK_MAIN
diff --git a/blockwise_sa.h b/blockwise_sa.h
index 6be553a0..a10d889c 100644
--- a/blockwise_sa.h
+++ b/blockwise_sa.h
@@ -22,6 +22,7 @@
 
 #include <stdint.h>
 #include <stdlib.h>
+#include <future>
 #include <iostream>
 #include <sstream>
 #include <thread>
@@ -38,6 +39,7 @@
 #include "ds.h"
 #include "mem_ids.h"
 #include "word_io.h"
+#include "threadpool.h"
 
 using namespace std;
 
@@ -198,6 +200,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
 	KarkkainenBlockwiseSA(const TStr& __text,
 			      TIndexOffU __bucketSz,
 			      int __nthreads,
+			      thread_pool& pool,
 			      uint32_t __dcV,
 			      uint32_t __seed = 0,
 			      bool __sanityCheck = false,
@@ -208,6 +211,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
 		InorderBlockwiseSA<TStr>(__text, __bucketSz, __sanityCheck, __passMemExc, __verbose, __logger),
 		_sampleSuffs(EBWTB_CAT),
 		_nthreads(__nthreads),
+		_pool(pool),
 		_itrBucketIdx(0),
 		_cur(0),
 		_dcV(__dcV),
@@ -218,17 +222,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
 		_done(NULL)
 		{ _randomSrc.init(__seed); reset(); }
 
-	~KarkkainenBlockwiseSA() throw()
-		{
-			if(_threads.size() > 0) {
-				for (size_t tid = 0; tid < _threads.size(); tid++) {
-					_threads[tid]->join();
-					delete _threads[tid];
-				}
-			}
-			if (_done != NULL)
-				delete[] _done;
-		}
+	~KarkkainenBlockwiseSA() throw() {}
 
 	/**
 	 * Allocate an amount of memory that simulates the peak memory
@@ -253,7 +247,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
 		{
 			// Launch threads if not
 			if(this->_nthreads > 1) {
-				if(_threads.size() == 0) {
+				if(_tparams.size() == 0) {
 					_done = new volatile bool[_sampleSuffs.size() + 1];
 					for (size_t i = 0; i < _sampleSuffs.size() + 1; i++) {
 						_done[i] = false;
@@ -263,9 +257,11 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
 					for(int tid = 0; tid < this->_nthreads; tid++) {
 						_tparams[tid].first = this;
 						_tparams[tid].second = tid;
-						_threads.push_back(new thread(nextBlock_Worker((void*)&_tparams[tid])));
+						if (tid == _nthreads - 1)
+							nextBlock_Worker((void *)&_tparams[tid]);
+						else
+							_pool.submit(nextBlock_Worker((void *)&_tparams[tid]));
 					}
-					assert_eq(_threads.size(), (size_t)this->_nthreads);
 				}
 			}
 			if(this->_itrPushedBackSuffix != OFF_MASK) {
@@ -396,7 +392,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
 		assert(_dc.get() == NULL);
 		if(_dcV != 0) {
 			_dc.init(new TDC(this->text(), _dcV, this->verbose(), this->sanityCheck()));
-			_dc.get()->build(this->_nthreads);
+			_dc.get()->build(_pool, this->_nthreads);
 		}
 		// Calculate sample suffixes
 		if(this->bucketSz() <= this->text().length()) {
@@ -436,6 +432,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
 
 	EList<TIndexOffU>  _sampleSuffs; /// sample suffixes
 	int                _nthreads;    /// # of threads
+	thread_pool& _pool;
 	TIndexOffU         _itrBucketIdx;
 	TIndexOffU         _cur;         /// offset to 1st elt of next block
 	const uint32_t   _dcV;         /// difference-cover periodicity
@@ -522,7 +519,7 @@ struct BinarySortingParam {
 };
 
 template<typename TStr>
-class BinarySorting_worker {
+class  BinarySorting_worker {
         void *vp;
 
 public:
@@ -638,7 +635,7 @@ void KarkkainenBlockwiseSA<TStr>::buildSamples() {
 	// Iterate until all buckets are less than
 	while(--limit >= 0) {
 		TIndexOffU numBuckets = (TIndexOffU)_sampleSuffs.size()+1;
-		AutoArray<std::thread*> threads(this->_nthreads);
+		std::vector<std::future<void> > threads(_pool.size());
 		EList<BinarySortingParam<TStr> > tparams;
 		tparams.resize(this->_nthreads);
 		for(int tid = 0; tid < this->_nthreads; tid++) {
@@ -665,19 +662,17 @@ void KarkkainenBlockwiseSA<TStr>::buildSamples() {
 			tparams[tid].sampleSuffs = &_sampleSuffs;
 			tparams[tid].begin = (tid == 0 ? 0 : len / this->_nthreads * tid);
 			tparams[tid].end = (tid + 1 == this->_nthreads ? len : len / this->_nthreads * (tid + 1));
-			if(this->_nthreads == 1) {
+			if(this->_nthreads == 1 || tid == _nthreads - 1) {
 				BinarySorting_worker<TStr>((void*)&tparams[tid])();
 			} else {
-				threads[tid] = new std::thread(BinarySorting_worker<TStr>(((void*)&tparams[tid])));
+				threads[tid] = _pool.submit(BinarySorting_worker<TStr>(((void*)&tparams[tid])));
 			}
 		}
 
 		if(this->_nthreads > 1) {
-			for (int tid = 0; tid < this->_nthreads; tid++) {
-				threads[tid]->join();
+			for (int tid = 0; tid < _pool.size(); tid++) {
+				threads[tid].get();
 			}
-			for (int tid = 0; tid < this->_nthreads; tid++)
-				delete threads[tid];
 		}
 		EList<TIndexOffU>& bucketSzs = tparams[0].bucketSzs;
 		EList<TIndexOffU>& bucketReps = tparams[0].bucketReps;
diff --git a/bowtie2-build b/bowtie2-build
index 116c0bda..0effb80f 100755
--- a/bowtie2-build
+++ b/bowtie2-build
@@ -19,14 +19,24 @@
  along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 """
 
-
 import os
 import sys
+import gzip
+import struct
 import inspect
 import logging
 import argparse
 import subprocess
-from collections import deque
+
+def get_gunzip_size(fn):
+    size = 0
+    with gzip.open(fn) as f:
+        while True:
+            data = f.read(8192)
+            size += len(data)
+            if not data:
+                break
+    return size
 
 def main():
     parser = argparse.ArgumentParser(add_help = False)
@@ -51,11 +61,6 @@ def main():
     build_bin_spec      = os.path.join(ex_path,build_bin_s)
 
     script_options, argv = parser.parse_known_args()
-    print_help = False
-    argv = deque(argv)
-
-    if '-h' in argv or '--help' in argv:
-        print_help = True
 
     if script_options.verbose:
         logging.getLogger().setLevel(logging.INFO)
@@ -68,23 +73,41 @@ def main():
         build_bin_spec += '-sanitized'
         build_bin_l += '-sanitized'
 
+    fastas = []
+    if '-c' not in argv and len(argv) >= 2:
+        for index in reversed(range(len(argv) - 1)):
+            arg = argv[index]
+            if arg.startswith('-') or arg.isdigit():
+                break
+            fastas.insert(0, arg)
+            argv.remove(arg)
+        if fastas:
+            fastas = ','.join(fastas)
+            argv.insert(len(argv) - 1, fastas)
+
     if script_options.large_index:
         build_bin_spec = os.path.join(ex_path,build_bin_l)
-    elif len(argv) >= 2:
-        ref_fnames = argv[-2]
+    elif fastas:
         tot_size = 0
-        for fn in ref_fnames.split(','):
+        for fn in fastas.split(','):
             if os.path.exists(fn):
-                statinfo = os.stat(fn)
-                tot_size += statinfo.st_size
+                if fn.endswith('.gz') or fn.endswith(".Z"):
+                    tot_size += get_gunzip_size(fn)
+                else:
+                    statinfo = os.stat(fn)
+                    tot_size += statinfo.st_size
         if tot_size > small_index_max_size:
             build_bin_spec = os.path.join(ex_path,build_bin_l)
 
-    argv.appendleft('basic-0')
-    argv.appendleft('--wrapper')
-    argv.appendleft(build_bin_spec)
+    if not os.path.exists(build_bin_spec):
+        sys.stderr.write('{0} does not exist, try running `[g]make {0}\'\n'.format(os.path.basename(build_bin_spec)))
+        sys.exit(1)
+
+    argv.insert(0, 'basic-0')
+    argv.insert(0, '--wrapper')
+    argv.insert(0, build_bin_spec)
     logging.info('Command: %s' % ' '.join(argv))
-    sys.exit(subprocess.call(list(argv)))
+    sys.exit(subprocess.call(argv))
 
 if __name__ == '__main__':
     main()
diff --git a/bt2_build.cpp b/bt2_build.cpp
index 2f9ce275..9615c381 100644
--- a/bt2_build.cpp
+++ b/bt2_build.cpp
@@ -138,17 +138,17 @@ static void printUsage(ostream& out) {
 	out << "Usage: " << tool_name << " [options]* <reference_in> <bt2_index_base>" << endl
 	    << "    reference_in            comma-separated list of files with ref sequences" << endl
 	    << "    bt2_index_base          write " + gEbwt_ext + " data to files with this dir/basename" << endl
-	    << "*** Bowtie 2 indexes work only with v2 (not v1).  Likewise for v1 indexes. ***" << endl
+	    << "*** Bowtie 2 indexes will work with Bowtie v1.2.3 and later. ***" << endl
 	    << "Options:" << endl
 	    << "    -f                      reference files are Fasta (default)" << endl
 	    << "    -c                      reference sequences given on cmd line (as" << endl
 	    << "                            <reference_in>)" << endl;
 	if(wrapper == "basic-0") {
 	out << "    --large-index           force generated index to be 'large', even if ref" << endl
-		<< "                            has fewer than 4 billion nucleotides" << endl
-		<< "    --debug                 use the debug binary; slower, assertions enabled" << endl
-		<< "    --sanitized             use sanitized binary; slower, uses ASan and/or UBSan" << endl
-		<< "    --verbose               log the issued command" << endl;
+	    << "                            has fewer than 4 billion nucleotides" << endl
+	    << "    --debug                 use the debug binary; slower, assertions enabled" << endl
+	    << "    --sanitized             use sanitized binary; slower, uses ASan and/or UBSan" << endl
+	    << "    --verbose               log the issued command" << endl;
 	}
 	out << "    -a/--noauto             disable automatic -p/--bmax/--dcv memory-fitting" << endl
 	    << "    -p/--packed             use packed strings internally; slower, less memory" << endl
@@ -166,16 +166,15 @@ static void printUsage(ostream& out) {
 	    //<< (currentlyBigEndian()? "big":"little") << ")" << endl
 	    << "    --seed <int>            seed for random number generator" << endl
 	    << "    -q/--quiet              verbose output (for debugging)" << endl
-	    << "    -h/--help               print detailed description of tool and its options" << endl
-	    << "    --usage                 print this usage message" << endl
+	    << "    --h/--help              print this message and quit" << endl
 	    << "    --version               print version information and quit" << endl
 	    ;
 	if(wrapper.empty()) {
 		cerr << endl
 		     << "*** Warning ***" << endl
-			 << "'" << tool_name << "' was run directly.  It is recommended "
-			 << "that you run the wrapper script 'bowtie2-build' instead."
-			 << endl << endl;
+		     << "'" << tool_name << "' was run directly.  It is recommended "
+		     << "that you run the wrapper script 'bowtie2-build' instead."
+		     << endl << endl;
 	}
 }
 
@@ -360,6 +359,14 @@ static void deleteIdxFiles(
 	}
 }
 
+static void renameIdxFiles() {
+	for (size_t i = 0; i < filesWritten.size(); i++) {
+		std::string oldName = filesWritten[i] + ".tmp";
+		std::cerr << "Renaming " << oldName << " to " << filesWritten[i] << std::endl;
+		std::rename(oldName.c_str(), filesWritten[i].c_str());
+	}
+}
+
 /**
  * Drive the index construction process and optionally sanity-check the
  * result.
@@ -686,7 +693,6 @@ int bowtie_build(int argc, const char **argv) {
 		if(packed) {
 			driver<S2bDnaString>(infile, infiles, outfile + ".rev", true, reverseType);
 		}
-		return 0;
 	} catch(std::exception& e) {
 		cerr << "Error: Encountered exception: '" << e.what() << "'" << endl;
 		cerr << "Command: ";
@@ -704,5 +710,7 @@ int bowtie_build(int argc, const char **argv) {
 		deleteIdxFiles(outfile, writeRef || justRef, justRef);
 		return e;
 	}
+	renameIdxFiles();
+	return 0;
 }
 }
diff --git a/bt2_idx.h b/bt2_idx.h
index e3856cce..f39be309 100644
--- a/bt2_idx.h
+++ b/bt2_idx.h
@@ -54,6 +54,7 @@
 #include "random_source.h"
 #include "mem_ids.h"
 #include "btypes.h"
+#include "threadpool.h"
 
 #ifdef POPCNT_CAPABILITY
 #include "processor_support.h"
@@ -664,8 +665,8 @@ class Ebwt {
 		ProcessorSupport ps;
 		_usePOPCNTinstruction = ps.POPCNTenabled();
 #endif
-		_in1Str = file + ".1." + gEbwt_ext;
-		_in2Str = file + ".2." + gEbwt_ext;
+		_in1Str = file + ".1." + gEbwt_ext + ".tmp";
+		_in2Str = file + ".2." + gEbwt_ext + ".tmp";
 		packed_ = packed;
 		// Open output files
 		ofstream fout1(_in1Str.c_str(), ios::binary);
@@ -1068,6 +1069,7 @@ class Ebwt {
 			streampos out1pos = out1.tellp();
 			streampos out2pos = out2.tellp();
 			// Look for bmax/dcv parameters that work.
+			thread_pool pool(nthreads - 1);
 			while(true) {
 				if(!first && bmax < 40 && _passMemExc) {
 					cerr << "Could not find approrpiate bmax/dcv settings for building this index." << endl;
@@ -1131,7 +1133,7 @@ class Ebwt {
 						VMSG_NL("");
 					}
 					VMSG_NL("Constructing suffix-array element generator");
-					KarkkainenBlockwiseSA<TStr> bsa(s, bmax, nthreads, dcv, seed, _sanity, _passMemExc, _verbose, outfile);
+					KarkkainenBlockwiseSA<TStr> bsa(s, bmax, nthreads, pool, dcv, seed, _sanity, _passMemExc, _verbose, outfile);
 					assert(bsa.suffixItrIsReset());
 					assert_eq(bsa.size(), s.length()+1);
 					VMSG_NL("Converting suffix-array elements to index image");
@@ -2403,7 +2405,7 @@ class Ebwt {
 		assert_lt(_zEbwtBpOff, 4);
 		assert_lt(_zEbwtByteOff, eh._ebwtTotSz);
 		assert_lt(_zOff, eh._bwtLen);
-		assert_geq(_nFrag, _nPat);
+		// assert_geq(_nFrag, _nPat);
 		return true;
 	}
 
@@ -2592,9 +2594,6 @@ TStr Ebwt::join(EList<FileBuf*>& l,
 		while(!l[i]->eof()) {
 			RefRecord rec = fastaRefReadAppend(*l[i], first, ret, dstoff, rpcp);
 			first = false;
-			if(rec.first && rec.len == 0) {
-				continue;
-			}
 			TIndexOffU bases = rec.len;
 			assert_eq(rec.off, szs[szsi].off);
 			assert_eq(rec.len, szs[szsi].len);
@@ -2639,10 +2638,10 @@ void Ebwt::joinToDisk(
 	this->_nFrag = 0;
 	for(TIndexOffU i = 0; i < szs.size(); i++) {
 		if(szs[i].len > 0) this->_nFrag++;
-		if(szs[i].first && szs[i].len > 0) this->_nPat++;
+		if(szs[i].first) this->_nPat++;
 	}
 	assert_gt(this->_nPat, 0);
-	assert_geq(this->_nFrag, this->_nPat);
+	// assert_geq(this->_nFrag, this->_nPat);
 	_rstarts.reset();
 	writeU<TIndexOffU>(out1, this->_nPat, _switchEndian);
 	// Allocate plen[]
@@ -2656,15 +2655,12 @@ void Ebwt::joinToDisk(
 	// For each pattern, set plen
 	TIndexOff npat = -1;
 	for(TIndexOffU i = 0; i < szs.size(); i++) {
-		if(szs[i].first && szs[i].len > 0) {
+		if(szs[i].first) {
 			if(npat >= 0) {
 				writeU<TIndexOffU>(out1, this->plen()[npat], _switchEndian);
 			}
 			this->plen()[++npat] = (szs[i].len + szs[i].off);
-		} else if(!szs[i].first) {
-			// edge case, but we could get here with npat == -1
-			// e.g. when building from a reference of all Ns
-			if (npat < 0) npat = 0;
+		} else {
 			this->plen()[npat] += (szs[i].len + szs[i].off);
 		}
 	}
@@ -2691,7 +2687,7 @@ void Ebwt::joinToDisk(
 				*l[i], first, ret, dstoff, rpcp, &_refnames.back());
 			first = false;
 			TIndexOffU bases = rec.len;
-			if(rec.first && rec.len > 0) {
+			if(rec.first) {
 				if(_refnames.back().length() == 0) {
 					// If name was empty, replace with an index
 					ostringstream stm;
@@ -2701,18 +2697,16 @@ void Ebwt::joinToDisk(
 			} else {
 				// This record didn't actually start a new sequence so
 				// no need to add a name
-				//assert_eq(0, _refnames.back().length());
+				// assert_eq(0, _refnames.back().length());
 				_refnames.pop_back();
 			}
-			if(rec.first && rec.len == 0) {
-				continue;
-			}
 			assert_lt(szsi, szs.size());
 			assert_eq(rec.off, szs[szsi].off);
 			assert_eq(rec.len, szs[szsi].len);
 			assert_eq(rec.first, szs[szsi].first);
 			assert(rec.first || rec.off > 0);
 			ASSERT_ONLY(szsi++);
+
 			// Increment seqsRead if this is the first fragment
 			if(rec.first) seqsRead++;
 			if(bases == 0) continue;
@@ -2865,7 +2859,6 @@ void Ebwt::buildToDisk(
 		// Write length word
 		writeU<TIndexOffU>(*bwtOut, len+1, _switchEndian);
 	}
-
 	while(side < ebwtTotSz) {
 		// Sanity-check our cursor into the side buffer
 		assert_geq(sideCur, 0);
diff --git a/bt2_inspect.cpp b/bt2_inspect.cpp
index 76707157..3d1d7c3d 100644
--- a/bt2_inspect.cpp
+++ b/bt2_inspect.cpp
@@ -21,6 +21,7 @@
 
 #include <string>
 #include <iostream>
+#include <fstream>
 #include <getopt.h>
 #include <stdexcept>
 
@@ -38,8 +39,9 @@ static int names_only   = 0;  // just print the sequence names in the index
 static int summarize_only = 0; // just print summary of index and quit
 static int across       = 60; // number of characters across in FASTA output
 static bool refFromEbwt = false; // true -> when printing reference, decode it from Ebwt instead of reading it from BitPairReference
+static string out_filename = ""; // name of output file
 static string wrapper;
-static const char *short_options = "vhnsea:";
+static const char *short_options = "vhnsea:o:";
 
 enum {
 	ARG_VERSION = 256,
@@ -56,6 +58,7 @@ static struct option long_options[] = {
 	{(char*)"help",     no_argument,        0, 'h'},
 	{(char*)"across",   required_argument,  0, 'a'},
 	{(char*)"ebwt-ref", no_argument,        0, 'e'},
+	{(char*)"output",   required_argument,  0, 'o'},
 	{(char*)"wrapper",  required_argument,  0, ARG_WRAPPER},
 	{(char*)0, 0, 0, 0} // terminator
 };
@@ -76,23 +79,23 @@ static void printUsage(ostream& out) {
 	<< "Options:" << endl;
 	if(wrapper == "basic-0") {
 		out << "  --large-index      force inspection of the 'large' index, even if a" << endl
-			<< "                     'small' one is present." << endl
-			<< "  --debug            use the debug binary; slower, assertions enabled" << endl
-			<< "  --sanitized        use sanitized binary; slower, uses ASan and/or UBSan" << endl
-			<< "  --verbose          log the issued command" << endl;
+		    << "                     'small' one is present." << endl
+		    << "  --debug            use the debug binary; slower, assertions enabled" << endl
+		    << "  --sanitized        use sanitized binary; slower, uses ASan and/or UBSan" << endl
+		    << "  --verbose          log the issued command" << endl;
 	}
 	out << "  -a/--across <int>  Number of characters across in FASTA output (default: 60)" << endl
-	<< "  -n/--names         Print reference sequence names only" << endl
-	<< "  -s/--summary       Print summary incl. ref names, lengths, index properties" << endl
-	<< "  -v/--verbose       Verbose output (for debugging)" << endl
-	<< "  -h/--help          print detailed description of tool and its options" << endl
-	;
+	    << "  -n/--names         Print reference sequence names only" << endl
+	    << "  -s/--summary       Print summary incl. ref names, lengths, index properties" << endl
+	    << "  -o/--output        Save output to filename (default stdout)" << endl
+	    << "  -v/--verbose       Verbose output (for debugging)" << endl
+	    << "  -h/--help          print this and message quit" << endl;
 	if(wrapper.empty()) {
 		cerr << endl
 		     << "*** Warning ***" << endl
-			 << "'boowtie2-inspect' was run directly.  It is recommended "
-			 << "to use the wrapper script instead."
-			 << endl << endl;
+		     << "'boowtie2-inspect' was run directly.  It is recommended "
+		     << "to use the wrapper script instead."
+		     << endl << endl;
 	}
 }
 
@@ -141,6 +144,7 @@ static void parseOptions(int argc, char **argv) {
 			case 'e': refFromEbwt = true; break;
 			case 'n': names_only = true; break;
 			case 's': summarize_only = true; break;
+			case 'o': out_filename = optarg; break;
 			case 'a': across = parseInt(-1, "-a/--across arg must be at least 1"); break;
 			case -1: break; /* Done with options. */
 			case 0:
@@ -369,18 +373,21 @@ static void driver(
 	const string& ebwtFileBase,
 	const string& query)
 {
-	// Adjust
+	ostream *fout = out_filename == "" ? &cout : new ofstream(out_filename);
+	if (!fout->good()) {
+		cerr << "Unable to open " << out_filename << " for writing." << endl;
+		exit(1);
+	}
 	string adjustedEbwtFileBase = adjustEbwtBase(argv0, ebwtFileBase, verbose);
-
 	if (names_only) {
-		print_index_sequence_names(adjustedEbwtFileBase, cout);
+		print_index_sequence_names(adjustedEbwtFileBase, *fout);
 	} else if(summarize_only) {
-		print_index_summary(adjustedEbwtFileBase, cout);
+		print_index_summary(adjustedEbwtFileBase, *fout);
 	} else {
 		// Initialize Ebwt object
 		bool color = readEbwtColor(adjustedEbwtFileBase);
 		Ebwt ebwt(
-			adjustedEbwtFileBase, 
+			adjustedEbwtFileBase,
 			color,                // index is colorspace
 			-1,                   // don't care about entire-reverse
 			true,                 // index is for the forward direction
@@ -412,7 +419,7 @@ static void driver(
 			EList<string> refnames;
 			readEbwtRefnames(adjustedEbwtFileBase, refnames);
 			print_ref_sequences(
-				cout,
+				*fout,
 				readEbwtColor(ebwtFileBase),
 				refnames,
 				ebwt.plen(),
@@ -422,6 +429,10 @@ static void driver(
 		if(ebwt.isInMemory()) {
 			ebwt.evictFromMemory();
 		}
+		if (fout != &cout) {
+			((ofstream *)fout)->close();
+			delete fout;
+		}
 	}
 }
 
diff --git a/bt2_io.cpp b/bt2_io.cpp
index 78ee8b25..b571f934 100644
--- a/bt2_io.cpp
+++ b/bt2_io.cpp
@@ -287,7 +287,7 @@ void Ebwt::readIntoMemory(
 		cerr << "Reading rstarts (" << this->_nFrag*3 << "): ";
 		logTime(cerr);
 	}
-	assert_geq(this->_nFrag, this->_nPat);
+	// assert_geq(this->_nFrag, this->_nPat);
 	_rstarts.reset();
 	if(loadRstarts) {
 		if(_useMm) {
@@ -695,9 +695,9 @@ readEbwtRefnames(FILE* fin, EList<string>& refnames) {
 	while(true) {
 		char c = '\0';
 		int read_value = 0;
-        read_value = fgetc(fin);
+		read_value = fgetc(fin);
 		if(read_value == EOF) break;
-        c = read_value;
+		c = read_value;
 		if(c == '\0') break;
 		else if(c == '\n') {
 			refnames.push_back("");
@@ -713,7 +713,7 @@ readEbwtRefnames(FILE* fin, EList<string>& refnames) {
 	}
 
 	// Be kind
-    fseeko(fin, 0, SEEK_SET);
+	fseeko(fin, 0, SEEK_SET);
 	assert(ferror(fin) == 0);
 }
 
@@ -935,10 +935,11 @@ void Ebwt::szsToDisk(const EList<RefRecord>& szs, ostream& os, int reverse) {
 	TIndexOffU off = 0;
 	TIndexOffU totlen = 0;
 	for(unsigned int i = 0; i < szs.size(); i++) {
-		if(szs[i].len == 0) continue;
 		if(szs[i].first) off = 0;
 		off += szs[i].off;
-		if(szs[i].first && szs[i].len > 0) seq++;
+		if(szs[i].first) seq++;
+		if(szs[i].len == 0) continue;
+
 		TIndexOffU seqm1 = seq-1;
 		assert_lt(seqm1, _nPat);
 		TIndexOffU fwoff = off;
diff --git a/diff_sample.h b/diff_sample.h
index b722fae9..7eb0bc1d 100644
--- a/diff_sample.h
+++ b/diff_sample.h
@@ -30,6 +30,7 @@
 #include "mem_ids.h"
 #include "ls.h"
 #include "btypes.h"
+#include "threadpool.h"
 
 using namespace std;
 
@@ -506,7 +507,7 @@ class DifferenceCoverSample {
 	const EList<uint32_t>& dmap() const  { return _dmap; }
 	ostream& log() const                 { return _logger; }
 
-	void     build(int nthreads);
+	void     build(thread_pool& pool, int nthreads);
 	uint32_t tieBreakOff(TIndexOffU i, TIndexOffU j) const;
 	int64_t  breakTie(TIndexOffU i, TIndexOffU j) const;
 	bool     isCovered(TIndexOffU i) const;
@@ -740,7 +741,7 @@ class VSorting_worker {
  * packed according to the mu mapping, in _isaPrime.
  */
 template <typename TStr>
-void DifferenceCoverSample<TStr>::build(int nthreads) {
+void DifferenceCoverSample<TStr>::build(thread_pool& pool, int nthreads) {
 	// Local names for relevant types
 	VMSG_NL("Building DifferenceCoverSample");
 	// Local names for relevant data
@@ -806,7 +807,7 @@ void DifferenceCoverSample<TStr>::build(int nthreads) {
 				mkeyQSortSuf2(t, sPrimeArr, sPrimeSz, sPrimeOrderArr, 4,
 				              this->verbose(), false, query_depth, &boundaries);
 				if(boundaries.size() > 0) {
-					AutoArray<std::thread*> threads(nthreads);
+					std::vector<std::future<void> > threads(pool.size());
 					EList<VSortingParam<TStr> > tparams;
 					size_t cur = 0;
 					MUTEX_T mutex;
@@ -822,13 +823,14 @@ void DifferenceCoverSample<TStr>::build(int nthreads) {
 						tparams[tid].boundaries = &boundaries;
 						tparams[tid].cur = &cur;
 						tparams[tid].mutex = &mutex;
-						threads[tid] = new std::thread(VSorting_worker<TStr>(((void*)&tparams[tid])));
+						if (tid == nthreads - 1)
+							VSorting_worker<TStr>(((void*)&tparams[tid]));
+						else
+							threads[tid] = pool.submit(VSorting_worker<TStr>(((void*)&tparams[tid])));
 					}
-					for (int tid = 0; tid < nthreads; tid++) {
-						threads[tid]->join();
+					for (int tid = 0; tid < pool.size(); tid++) {
+						threads[tid].get();
 					}
-					for (int tid = 0; tid < nthreads; tid++)
-						delete threads[tid];
 				}
 				if(this->sanityCheck()) {
 					sanityCheckOrderedSufs(t, t.length(), sPrimeArr, sPrimeSz, v);
diff --git a/doc/manual.html b/doc/manual.html
index b5c6c06a..f313cb6c 100644
--- a/doc/manual.html
+++ b/doc/manual.html
@@ -1555,6 +1555,14 @@ <h3 id="options-2">Options</h3>
 </td>
 </tr>
 <tr>
+<td id="bowtie2-inspect-options-o">
+<pre><code>-o/--output &lt;filename&gt;</code></pre>
+</td>
+<td>
+<p>Save output to user-specified filename (default: stdout)</p>
+</td>
+</tr>
+<tr>
 <td>
 <pre><code>-v/--verbose</code></pre>
 </td>
diff --git a/doc/website/manual.ssi b/doc/website/manual.ssi
index 03b1da34..1a578a9c 100644
--- a/doc/website/manual.ssi
+++ b/doc/website/manual.ssi
@@ -1529,6 +1529,14 @@ Sequence-N  &lt;name&gt;  &lt;len&gt;</code></pre>
 </td>
 </tr>
 <tr>
+<td id="bowtie2-inspect-options-o">
+<pre><code>-o/--output &lt;filename&gt;</code></pre>
+</td>
+<td>
+<p>Save output to user-specified filename (default: stdout)</p>
+</td>
+</tr>
+<tr>
 <td>
 <pre><code>-v/--verbose</code></pre>
 </td>
diff --git a/doc/website/recent_news.ssi b/doc/website/recent_news.ssi
index e4fce9fa..ae3f1ce1 100644
--- a/doc/website/recent_news.ssi
+++ b/doc/website/recent_news.ssi
@@ -1,3 +1,25 @@
+<h2>Version 2.4.5 - Jan 16, 2022</h2>
+<h3 id="bowtie2">bowtie2</h3>
+<ul>
+    <li>Fixed issues with <code>bowtie2</code> BAM parser that would cause <code>bowtie2</code> to crash when processing input that was encoded with tools other than samtools e.g. Picard.</li>
+    <li>Fixed an issue causing <code>bowtie2</code> to drop certain optional fields when when aligning BAM reads with the <code><a href="manual.shtml#bowtie2-options-preserve-tags">--preserve-tags</a></code> option.</li>
+    <li>Fixed an issue causing <code>bowtie2</code> to produce mangled SAM output when specifying <code><a href="manual.shtml#bowtie2-options-sam-append-comment">--sam-append-comment</a></code> together with the <code>--passthrough</code> option.</li>
+    <li>Appended <code>GO:query</code> to SAM <code>@HD</code> entry to indicate that reads are grouped by query name, bump SAM version to 1.5 to indicate support for this change.</li>
+</ul>
+
+<h3 id="bowtie2-build">bowtie2-build</h3>
+<ul>
+    <li>Implemented thread pool to address performance regressions introduced during the switch to C++11 threads.</li>
+    <li>Fixed an issue causing masked-sequence metadata to be omitted from index. This issue would subsequently result in sequence data, <code>@SQ</code>, being left out from alignment SAM header.</li>
+    <li>Included <code>.tmp</code> extension to index files currenlty being built. The extension is removed only upon successful build. This change seeks to address the assumption that <code>bowtie2-build</code> ran successfully without building the reverse indexes.</li>
+    <li>Fixed an issue causing <code>bowtie2-build</code> to sometimes incorrectly calculate input size. This issue would result in the wrong index type being chosen and only happened with GZip compressed files.</li>
+</ul>
+
+<h3 id="bowtie2-inspect">bowtie2-inspect</h3>
+<ul>
+    <li>Added a new <code><a href="manual.shtml#bowtie2-inspect-options-o">-o/--output</a></code> option to save the output of <code>bowtie2-inspect</code> to a file instead of being dumped to standard output.</li>
+</ul>
+
 <h2>Version 2.4.4 - May 23, 2021</h2>
 <ul>
     <li>Fixed an issue that would sometimes cause deadlocks in <code>bowtie2</code> when running multithreaded</li>
diff --git a/doc/website/rhsidebar.ssi b/doc/website/rhsidebar.ssi
index 70c7e9b4..d3525ec1 100644
--- a/doc/website/rhsidebar.ssi
+++ b/doc/website/rhsidebar.ssi
@@ -18,10 +18,10 @@
         </tr>
       <tr>
       <td>
-        <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.4.4">Bowtie2 v2.4.4</a>
+        <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.4.5">Bowtie2 v2.4.5</a>
       </td>
       <td align="right">
-        23/05/21&nbsp;
+        16/01/22&nbsp;
       </td>
       </tr>
       <tr>
diff --git a/pat.cpp b/pat.cpp
index 196cd425..bb7fbb60 100644
--- a/pat.cpp
+++ b/pat.cpp
@@ -480,16 +480,20 @@ void CFilePatternSource::open() {
 	}
 	while(filecur_ < infiles_.size()) {
 		if(infiles_[filecur_] == "-") {
-			// always assume that data from stdin is compressed
-			compressionType_ = CompressionType::GZIP;
 			int fd = dup(fileno(stdin));
-			zfp_ = gzdopen(fd, "rb");
+			if (pp_.format == BAM) {
+				compressionType_ = CompressionType::NONE;
+				fp_ = fdopen(fd, "rb");
+			} else {
+				// always assume that data from stdin is compressed
+				compressionType_ = CompressionType::GZIP;
+				zfp_ = gzdopen(fd, "rb");
 
-			if (zfp_ == NULL) {
-				close(fd);
+				if (zfp_ == NULL) {
+					close(fd);
+				}
 			}
-		}
-		else {
+		} else {
 			const char* filename = infiles_[filecur_].c_str();
 
 			int fd = ::open(filename, O_RDONLY);
@@ -1289,9 +1293,11 @@ std::pair<bool, int> BAMPatternSource::nextBatch(PerThreadReadBuf& pt, bool batc
 			if (ret_code != Z_OK) {
 				return make_pair(true, 0);
 			}
+#ifndef NDEBUG
 			uLong crc = crc32(0L, Z_NULL, 0);
 			crc = crc32(crc, &alignment_batch[0] + delta_, alignment_batch.size() - delta_);
 			assert(crc == block.ftr.crc32);
+#endif
 			delta_ = 0;
 		}
 		std::pair<bool, int> ret = get_alignments(pt, batch_a, nread, lock);
@@ -1307,7 +1313,6 @@ std::pair<bool, int> BAMPatternSource::nextBatch(PerThreadReadBuf& pt, bool batc
 std::pair<bool, int> BAMPatternSource::get_alignments(PerThreadReadBuf& pt, bool batch_a, unsigned& readi, bool lock) {
 	size_t& i = alignment_offset;
 	bool done = false;
-	bool read1 = true;
 
 	if (first_) {
 		char magic[4];
@@ -1334,18 +1339,18 @@ std::pair<bool, int> BAMPatternSource::get_alignments(PerThreadReadBuf& pt, bool
 		}
 
 		uint16_t flag;
-		uint32_t block_size;
-		EList<Read>& readbuf = pp_.align_paired_reads && !read1 ? pt.bufb_ : pt.bufa_;
+		uint32_t block_size = -1;
 
+		if ((alignment_batch.size() - i) < sizeof(block_size))
+			goto next_batch;
 		memcpy(&block_size, &alignment_batch[0] + i, sizeof(block_size));
 		if (currentlyBigEndian())
 			block_size = endianSwapU32(block_size);
 		if (block_size == 0) {
 			return make_pair(done, readi);
 		}
-		if (block_size > (alignment_batch.size() - i)) {
-			// copy the rest of the block to an array
-			// and get another batch of alignments
+		if (block_size > (alignment_batch.size() - i - sizeof(block_size))) {
+		  next_batch:
 			delta_ = alignment_batch.size() - i;
 			memcpy(&alignment_batch[0], &alignment_batch[0] + i, delta_);
 			i = alignment_batch.size();
@@ -1355,121 +1360,54 @@ std::pair<bool, int> BAMPatternSource::get_alignments(PerThreadReadBuf& pt, bool
 		memcpy(&flag, &alignment_batch[0] + i + offset[BAMField::flag], sizeof(flag));
 		if (currentlyBigEndian())
 			flag = endianSwapU16(flag);
+		EList<Read>& readbuf = (pp_.align_paired_reads && (flag & 0x80)) != 0 ? pt.bufb_ : pt.bufa_;
+		if ((flag & 0x4) == 0) {
+			readbuf[readi].readOrigBuf.clear();
+			i += block_size;
+			continue;
+		}
 		if (!pp_.align_paired_reads && ((flag & 0x40) != 0 || (flag & 0x80) != 0)) {
 			readbuf[readi].readOrigBuf.clear();
 			i += block_size;
 			continue;
 		}
-
 		if (pp_.align_paired_reads && ((flag & 0x40) == 0 && (flag & 0x80) == 0)) {
 			readbuf[readi].readOrigBuf.clear();
 			i += block_size;
 			continue;
 		}
 
-		if (pp_.align_paired_reads &&
-                    (((flag & 0x40) != 0 && i + block_size == alignment_batch.size()) ||
-                     ((flag & 0x80) != 0 && i == sizeof(block_size))))
-		{
-			if (lock) {
-				ThreadSafe ts(orphan_mates_mutex_);
-				get_or_store_orhaned_mate(pt.bufa_, pt.bufb_, readi, &alignment_batch[0] + i, block_size);
-				i += block_size;
-			} else {
-				get_or_store_orhaned_mate(pt.bufa_, pt.bufb_, readi, &alignment_batch[0] + i, block_size);
-				i += block_size;
-			}
-
-		} else {
-			readbuf[readi].readOrigBuf.resize(block_size);
-
-			memcpy(readbuf[readi].readOrigBuf.wbuf(), &alignment_batch[0] + i, block_size);
-			i += block_size;
-
-			read1 = !read1;
-			readi = (pp_.align_paired_reads &&
-				 pt.bufb_[readi].readOrigBuf.length() == 0) ? readi : readi + 1;
-		}
+		readbuf[readi].readOrigBuf.resize(block_size);
+		memcpy(readbuf[readi].readOrigBuf.wbuf(), &alignment_batch[0] + i, block_size);
+		i += block_size;
+		readi += (pp_.align_paired_reads &&
+			  pt.bufb_[readi].readOrigBuf.length() == 0) ? 0 : 1;
 	}
 
 	return make_pair(done, readi);
 }
 
-void BAMPatternSource::get_or_store_orhaned_mate(EList<Read>& buf_a, EList<Read>& buf_b, unsigned& readi, const uint8_t *mate, size_t mate_len) {
-	const char *read_name =
-		(const char *)(mate + offset[BAMField::read_name]);
-	size_t i;
-	uint32_t hash = hash_str(read_name);
-	orphan_mate_t *empty_slot = NULL;
-
-	for (i = 0; i < orphan_mates.size(); i++) {
-		if (empty_slot == NULL && orphan_mates[i].empty())
-			empty_slot = &orphan_mates[i];
-		if (orphan_mates[i].hash == hash)
-			break;
-	}
-	if (i == orphan_mates.size()) {
-		// vector is full
-		if (empty_slot == NULL) {
-			orphan_mates.push_back(orphan_mate_t());
-			empty_slot = &orphan_mates.back();
-		}
-		empty_slot->hash = hash;
-		if (empty_slot->cap < mate_len) {
-			delete[] empty_slot->data;
-			empty_slot->data = NULL;
-		}
-		if (empty_slot->data == NULL) {
-			empty_slot->data = new uint8_t[mate_len];
-			empty_slot->cap = mate_len;
-		}
-		memcpy(empty_slot->data, mate, mate_len);
-		empty_slot->size = mate_len;
-	} else {
-		uint8_t flag;
-		Read& ra = buf_a[readi];
-		Read& rb = buf_b[readi];
-
-		memcpy(&flag, mate + offset[BAMField::flag], sizeof(flag));
-		if ((flag & 0x40) != 0) {
-			ra.readOrigBuf.resize(mate_len);
-			memcpy(ra.readOrigBuf.wbuf(), mate, mate_len);
-			rb.readOrigBuf.resize(orphan_mates[i].size);
-			memcpy(rb.readOrigBuf.wbuf(), orphan_mates[i].data, orphan_mates[i].size);
-		} else {
-			rb.readOrigBuf.resize(mate_len);
-			memcpy(rb.readOrigBuf.wbuf(), mate, mate_len);
-			ra.readOrigBuf.resize(orphan_mates[i].size);
-			memcpy(ra.readOrigBuf.wbuf(), orphan_mates[i].data, orphan_mates[i].size);
-		}
-		readi++;
-		orphan_mates[i].reset();
-	}
-}
-
 int BAMPatternSource::decompress_bgzf_block(uint8_t *dst, size_t dst_len, uint8_t *src, size_t src_len) {
-	z_stream strm;
-
-	strm.zalloc = Z_NULL;
-	strm.zfree = Z_NULL;
-	strm.opaque = Z_NULL;
+	stream.zalloc = Z_NULL;
+	stream.zfree = Z_NULL;
+	stream.opaque = Z_NULL;
 
-	strm.avail_in = src_len;
-	strm.next_in = src;
-	strm.avail_out = dst_len;
-	strm.next_out = dst;
+	stream.avail_in = src_len;
+	stream.next_in = src;
+	stream.avail_out = dst_len;
+	stream.next_out = dst;
 
-	int ret  = inflateInit2(&strm, -8);
+	int ret  = inflateInit2(&stream, -8);
 	if (ret != Z_OK) {
 		return ret;
 	}
 
-	ret = inflate(&strm, Z_FINISH);
+	ret = inflate(&stream, Z_FINISH);
 	if (ret != Z_STREAM_END) {
 		return ret;
 	}
 
-	return inflateEnd(&strm);
+	return inflateReset(&stream);
 }
 
 bool BAMPatternSource::parse(Read& ra, Read& rb, TReadId rdid) const {
diff --git a/pat.h b/pat.h
index 5bf27037..d2b107d4 100644
--- a/pat.h
+++ b/pat.h
@@ -826,33 +826,6 @@ class BAMPatternSource : public CFilePatternSource {
 		ftr_t ftr;
 	};
 
-	struct partial_block {
-		char data[500];
-		size_t len;
-	};
-
-	struct orphan_mate_t {
-		orphan_mate_t() :
-			data(NULL),
-			size(0),
-			cap(0),
-			hash(0) {}
-
-		void reset() {
-			size = 0;
-			hash = 0;
-		}
-
-		bool empty() const {
-			return size == 0;
-		}
-
-		uint8_t* data;
-		uint16_t size;
-		uint16_t cap;
-		uint32_t hash;
-	};
-
 	struct BAMField {
 		enum aln_rec_field_name {
 			refID,
@@ -879,11 +852,12 @@ class BAMPatternSource : public CFilePatternSource {
 		first_(true),
 		alignment_batch(0),
 		alignment_offset(0),
-		orphan_mates(p.nthreads * 2),
 		delta_(0),
-		orphan_mates_mutex_(),
 		pp_(p)
 		{
+			stream.zalloc = Z_NULL;
+			stream.zfree = Z_NULL;
+			stream.opaque = Z_NULL;
 			alignment_batch.reserve(1 << 16);
 		}
 
@@ -898,11 +872,6 @@ class BAMPatternSource : public CFilePatternSource {
 	virtual bool parse(Read& ra, Read& rb, TReadId rdid) const;
 
 	~BAMPatternSource() {
-		for (size_t i = 0; i < orphan_mates.size(); i++) {
-			if (orphan_mates[i].data != NULL) {
-				delete[] orphan_mates[i].data;
-			}
-		}
 	}
 
 
@@ -928,19 +897,14 @@ class BAMPatternSource : public CFilePatternSource {
 
 	int decompress_bgzf_block(uint8_t *dst, size_t dst_len, uint8_t *src, size_t src_len);
 	std::pair<bool, int> get_alignments(PerThreadReadBuf& pt, bool batch_a, unsigned& readi, bool lock);
-	void store_orphan_mate(const uint8_t* read, size_t read_len);
-	void get_orphaned_pairs(EList<Read>& buf_a, EList<Read>& buf_b, const size_t max_buf, unsigned& readi);
-	void get_or_store_orhaned_mate(EList<Read>& buf_a, EList<Read>& buf_b, unsigned& readi, const uint8_t *mate, size_t mate_len);
-	size_t get_matching_read(const uint8_t* rec);
 
         static const int offset[];
 	static const uint8_t EOF_MARKER[];
 
 	std::vector<uint8_t> alignment_batch;
 	size_t alignment_offset;
-	std::vector<orphan_mate_t> orphan_mates;
 	size_t delta_;
-	MUTEX_T orphan_mates_mutex_;
+	z_stream stream;
 
 	PatternParams pp_;
 };
diff --git a/ref_read.cpp b/ref_read.cpp
index af3a7938..648ec987 100644
--- a/ref_read.cpp
+++ b/ref_read.cpp
@@ -275,9 +275,7 @@ fastaRefReadSizes(
 			}
 			first = false;
 			// Add the length of this record.
-			if(rec.len == 0 && rec.first) {
-				continue;
-			} else if(rec.first) {
+			if(rec.first) {
 				numSeqs++;
 			}
 			unambigTot += rec.len;
diff --git a/reference.cpp b/reference.cpp
index cf410038..9f6f4d30 100644
--- a/reference.cpp
+++ b/reference.cpp
@@ -595,8 +595,8 @@ BitPairReference::szsFromFasta(
 	RefReadInParams parms = refparams;
 	std::pair<size_t, size_t> sztot;
 	if(!outfile.empty()) {
-		string file3 = outfile + ".3." + gEbwt_ext;
-		string file4 = outfile + ".4." + gEbwt_ext;
+		string file3 = outfile + ".3." + gEbwt_ext + ".tmp";
+		string file4 = outfile + ".4." + gEbwt_ext + ".tmp";
 		// Open output stream for the '.3.gEbwt_ext' file which will
 		// hold the size records.
 		ofstream fout3(file3.c_str(), ios::binary);
diff --git a/sam.cpp b/sam.cpp
index 05e6510d..7257b51f 100644
--- a/sam.cpp
+++ b/sam.cpp
@@ -59,7 +59,7 @@ void SamConfig::printHeader(
 	bool printSq,
 	bool printPg) const
 {
-	if(printHd) printHdLine(o, "1.0");
+	if(printHd) printHdLine(o, "1.5");
 	if(printSq) printSqLines(o);
 	if(!rgid.empty()) {
 		o.append("@RG");
@@ -76,7 +76,7 @@ void SamConfig::printHeader(
 void SamConfig::printHdLine(BTString& o, const char *samver) const {
 	o.append("@HD\tVN:");
 	o.append(samver);
-	o.append("\tSO:unsorted\n");
+	o.append("\tSO:unsorted\tGO:query\n");
 }
 
 /**
@@ -526,11 +526,6 @@ void SamConfig::printAlignedOptFlags(
 		o.append("ZI:i:");
 		o.append(buf);
 	}
-	if(print_xr_) {
-		// Original read string
-		o.append("\n");
-		printOptFieldNewlineEscapedZ(o, rd.readOrigBuf);
-	}
 	if(print_zt_) {
 		// ZT:Z: Extra features for MAPQ estimation
 		WRITE_SEP();
@@ -881,18 +876,13 @@ void SamConfig::printEmptyOptFlags(
 		o.append("ZI:i:");
 		o.append(buf);
 	}
-	if(print_xr_) {
-		// Original read string
-		o.append("\n");
-		printOptFieldNewlineEscapedZ(o, rd.readOrigBuf);
-	}
 }
 
 void SamConfig::printPreservedOptFlags(BTString& o, const Read& rd) const {
     if (rd.preservedOptFlags.length() != 0) {
-		char buf[1024];
+		uint32_t count = 1;
 		const char* b = rd.preservedOptFlags.buf();
-		int i = 0, len = rd.preservedOptFlags.length();
+		size_t i = 0, len = rd.preservedOptFlags.length();
 		while (i < len) {
 			o.append('\t');
 			char tag[2], val_type;
@@ -900,78 +890,64 @@ void SamConfig::printPreservedOptFlags(BTString& o, const Read& rd) const {
 			o.append(tag, 2);
 			i += 2 * sizeof(char);
 			memcpy(&val_type, b + i, 1);
-			o.append(':');
-			if (val_type == 'c' || val_type == 'C'
-					|| val_type == 'i' || val_type == 'I'
-					|| val_type == 's' || val_type == 'S') {
-				o.append('i');
-			} else {
+			if (val_type == 'B') {
+				i += 1;
+				memcpy(&val_type, b + i, 1);
+				i += 1;
+				memcpy(&count, b + i, sizeof(count));
+				i += sizeof(count);
+				o.append(":B:");
 				o.append(val_type);
-			}
-			o.append(':');
-			i += sizeof(char);
+				o.append(",");
+			} else {
+				o.append(':');
+				if (val_type == 'c' || val_type == 'C'
+				    || val_type == 'i' || val_type == 'I'
+				    || val_type == 's' || val_type == 'S') {
+					o.append('i');
+                                } else {
+					o.append(val_type);
+                                }
+				i += sizeof(char);
+				o.append(':');
+                        }
 			switch (val_type) {
-				case 'A':
-					char A_val;
-					memcpy(&A_val, b + i, sizeof(A_val));
-					i += sizeof(A_val);
-					itoa10<char>(A_val, buf);
-					o.append(buf);
-					break;
-				case 'c':
-					int8_t c_val;
-					memcpy(&c_val, b + i, sizeof(c_val));
-					i += sizeof(c_val);
-					itoa10<int8_t>(c_val, buf);
-					o.append(buf);
-					break;
-				case 'C':
-					uint8_t C_val;
-					memcpy(&C_val, b + i, sizeof(C_val));
-					i += sizeof(C_val);
-					itoa10<uint8_t>(C_val, buf);
-					o.append(buf);
-					break;
-				case 's':
-					int16_t s_val;
-					memcpy(&s_val, b + i, sizeof(s_val));
-					i += sizeof(s_val);
-					itoa10<int16_t>(s_val, buf);
-					o.append(buf);
-					break;
-				case 'S':
-					uint16_t S_val;
-					memcpy(&S_val, b + i, sizeof(S_val));
-					i += sizeof(S_val);
-					itoa10<uint16_t>(S_val, buf);
-					o.append(buf);
-					break;
-				case 'i':
-					int32_t i_val;
-					memcpy(&i_val, b + i, sizeof(i_val));
-					i += sizeof(i_val);
-					itoa10<int32_t>(i_val, buf);
-					o.append(buf);
-					break;
-				case 'I':
-					uint32_t I_val;
-					memcpy(&I_val, b + i, sizeof(I_val));
-					i += sizeof(I_val);
-					itoa10<uint32_t>(I_val, buf);
-					o.append(buf);
-					break;
-				case 'Z':
-					char c;
-					memcpy(&c, b + i, sizeof(char));
-					while (c != '\0') {
-						o.append(c);
-						i++;
-						memcpy(&c, b + i, sizeof(char));
-					}
+			case 'A':
+				readTagVal<char>(o, b, i, count);
+				break;
+			case 'c':
+				readTagVal<int8_t>(o, b, i, count);
+				break;
+			case 'C':
+				readTagVal<uint8_t>(o, b, i, count);
+				break;
+			case 's':
+				readTagVal<int16_t>(o, b, i, count);
+				break;
+			case 'S':
+				readTagVal<uint16_t>(o, b, i, count);
+				break;
+			case 'i':
+				readTagVal<int32_t>(o, b, i, count);
+				break;
+			case 'I':
+				readTagVal<uint32_t>(o, b, i, count);
+				break;
+			case 'f':
+				readTagVal<float>(o, b, i, count);
+				break;
+			case 'Z':
+				char c;
+				memcpy(&c, b + i, sizeof(char));
+				while (c != '\0') {
+					o.append(c);
 					i++;
-					break;
-				default:
-					break;
+					memcpy(&c, b + i, sizeof(char));
+				}
+				i++;
+				break;
+			default:
+				break;
 			}
 		}
     }
diff --git a/sam.h b/sam.h
index 7936e02f..1ad5a776 100644
--- a/sam.h
+++ b/sam.h
@@ -21,6 +21,7 @@
 #define SAM_H_
 
 #include <string>
+#include <vector>
 #include "ds.h"
 #include "read.h"
 #include "util.h"
@@ -328,6 +329,22 @@ class SamConfig {
 		}
 	}
 
+	template<typename T>
+	static void readTagVal(BTString& o, const char *data, size_t &offset, size_t count) {
+		std::vector<T> val(count);
+		size_t i = 0;
+
+		memcpy(val.data(), data + offset, sizeof(T) * count);
+		do {
+			std::string str = std::to_string(val[i]);
+			o.append(str.c_str(), str.length());
+			if (i < (count - 1))
+				o.append(",");
+		} while (++i < count);
+		offset += sizeof(T) * count;
+	}
+
+
         /**
 	 * Return true iff we should try to obey the SAM spec's recommendations
 	 * that:
@@ -343,6 +360,10 @@ class SamConfig {
 		return noUnal_;
 	}
 
+	bool passthrough() const {
+		return print_xr_;
+	}
+
 protected:
 
 	bool truncQname_;   // truncate QNAME to 255 chars?
diff --git a/threadpool.h b/threadpool.h
new file mode 100644
index 00000000..d5c6d44b
--- /dev/null
+++ b/threadpool.h
@@ -0,0 +1,134 @@
+#ifndef _THREAD_POOL_H_
+#define _THREAD_POOL_H_
+
+#include <atomic>
+#include <condition_variable>
+#include <functional>
+#include <future>
+#include <iostream>
+#include <memory>
+#include <map>
+#include <mutex>
+#include <queue>
+#include <thread>
+#include <vector>
+
+template<typename T>
+class threadsafe_queue {
+private:
+	mutable std::mutex mut;
+	std::queue<T> data_queue;
+
+public:
+	threadsafe_queue() {}
+
+	void push(T &&new_value) {
+		std::lock_guard<std::mutex> lk(mut);
+		data_queue.emplace(new_value);
+	}
+
+	bool try_pop(T& value) {
+		std::lock_guard<std::mutex> lk(mut);
+		if (data_queue.empty())
+			return false;
+		value = std::move(data_queue.front());
+		data_queue.pop();
+		return true;
+	}
+
+	size_t size() {
+		std::lock_guard<std::mutex> lk(mut);
+		return data_queue.size();
+	}
+};
+
+class thread_pool
+{
+	std::atomic_bool done;
+	int nthreads;
+        std::map<std::thread::id, int> thread_id;
+	threadsafe_queue<std::function<void()>> work_queue;
+	std::vector<std::thread> threads;
+        std::condition_variable cv;
+        std::mutex m;
+
+	void worker_thread() {
+		while (!done) {
+			std::function<void()> task;
+                        if (work_queue.try_pop(task)) {
+				task();
+                        } else {
+                                std::unique_lock<std::mutex> lock(m);
+                                cv.wait(lock, [&] {return work_queue.size() != 0 || done; });
+                        }
+                }
+	}
+public:
+	thread_pool(int nthr):
+		done(false), nthreads(nthr)
+		{
+			try {
+                                for (int i = 0; i < nthreads; ++i) {
+                                        threads.emplace_back(
+                                                std::thread(&thread_pool::worker_thread, this));
+                                        thread_id[threads[i].get_id()] = i;
+                                }
+                        } catch (...) {
+				done = true;
+				throw;
+			}
+		}
+	~thread_pool() {
+		if (nthreads > 0) {
+			done = true;
+			std::unique_lock<std::mutex> lock(m);
+			cv.notify_all();
+			lock.unlock();
+			for (std::thread &thread : threads) {
+				thread.join();
+			}
+                }
+        }
+
+	template<typename Function, typename... Args>
+	std::future<typename std::result_of<Function(Args...)>::type>
+	submit(Function &&f, Args&&... args) {
+		using result_type = typename std::result_of<Function(Args...)>::type;
+		auto task = std::make_shared<std::packaged_task<result_type()>>(std::bind(std::forward<Function>(f), std::forward<Args>(args)...));
+		std::future<result_type> res(task->get_future());
+                work_queue.push([task] { (*task)(); });
+                std::unique_lock<std::mutex> lock(m);
+                cv.notify_one();
+                return res;
+	}
+
+        int size() {
+                return nthreads;
+        }
+
+        int thread_id_to_int(std::thread::id id) {
+                return thread_id[id];
+        }
+
+        template<typename T, typename Function>
+        void parallel_for(T start, T end, T stride, Function &&f) {
+                T range = end - start;
+                T block_size = range / (nthreads);
+                T block_start = start;
+                T block_end = block_start + block_size;
+                if (block_size == 0)
+                        block_end = end;
+                std::vector<std::future<void>> res;
+                while (block_start < end) {
+                        res.emplace_back(submit(f, block_start, block_end, stride));
+                        block_start = block_end;
+                        block_end = block_end + block_size;
+                        if (block_end >= end)
+                                block_end = end;
+                }
+                for (size_t i = 0; i < res.size(); i++)
+                        res[i].get();
+        }
+};
+
+#endif