diff --git a/ext/meryl/.gitmodules b/ext/meryl/.gitmodules
new file mode 100644
index 0000000..680f532
--- /dev/null
+++ b/ext/meryl/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "src/utility"]
+	path = src/utility
+	url = https://github.com/marbl/meryl-utility
diff --git a/ext/meryl/scripts/buildRelease.sh b/ext/meryl/scripts/buildRelease.sh
deleted file mode 100644
index 54ac6c0..0000000
--- a/ext/meryl/scripts/buildRelease.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/sh
-#
-#  Before building a release:
-#
-#  Make a place to work, grab the bits you want to release:
-#    git clone git@github.com:marbl/meryl meryl-release
-#    cd meryl-release
-#
-#  Commit to master:
-#    Increase version in documentation/source/conf.py   (not present in meryl)
-#    Increase version in scripts/version_update.pl
-#
-#  Build.  This pulls in submodule code.  This build isn't used for release
-#  and can be deleted or aborted (once submodules are populated).
-#    cd src && gmake
-#
-#  Tag the next release development
-#    git tag -a v1.3-development -m "Development for v1.3."
-#    git push --follow-tags
-#
-#  Make a branch:
-#    git checkout -b v1.2-maintenance
-#
-#  Commit to branch:
-#    Change 'snapshot' to 'release' in scripts/version_update.pl
-#    git push --set-upstream origin v1.2-maintenance
-#
-#  Run this script:
-#    scripts/buildRelease.sh 1.2
-#
-
-version=$1
-
-if [ x$version = x ] ; then
-  echo usage: $0 numeric-version
-  exit
-fi
-
-#
-#  Cleanup any old build, make space for the new one, and initialize scripts.
-#
-
-if [ -e .git ] ; then
-    echo Moving .git directory out of the way.
-    mv .git dot-git-directory
-fi
-
-echo Preparing build trees.
-
-rm -rf build
-rm -rf build-darwin build-darwin.out
-rm -rf build-linux  build-linux.out
-rm -rf build-src
-
-rm  -f build-linux.sh
-
-rm  -f meryl-${version}.Darwin-amd64.tar meryl-${version}.Darwin-amd64.tar.xz
-rm  -f meryl-${version}.Linux-amd64.tar  meryl-${version}.Linux-amd64.tar.xz
-rm  -f meryl-${version}.tar  meryl-${version}.tar.xz
-
-mkdir -p build-src/scripts
-mkdir -p build-darwin/scripts
-mkdir -p build-linux/scripts
-
-rsync -a src/ build-src/src
-rsync -a src/ build-darwin/src
-rsync -a src/ build-linux/src
-
-cp -p README* build-src/
-cp -p README* build-darwin/
-cp -p README* build-linux/
-
-cp -p scripts/version_update.pl build-src/scripts/
-cp -p scripts/version_update.pl build-darwin/scripts/
-cp -p scripts/version_update.pl build-linux/scripts/
-
-echo >> build-linux.sh  "#!/bin/bash"
-echo >> build-linux.sh  ""
-echo >> build-linux.sh  "rm -rf /dock/build"
-echo >> build-linux.sh  "cd /dock/src"
-echo >> build-linux.sh  "gmake -j 12 > ../build-linux.out 2>&1"
-echo >> build-linux.sh  "cd .."
-echo >> build-linux.sh  ""
-echo >> build-linux.sh  "mv build/* build-linux/"
-echo >> build-linux.sh  ""
-echo >> build-linux.sh  "rm -rf build-darwin/obj"
-echo >> build-linux.sh  "rm -rf build-linux/obj"
-echo >> build-linux.sh  ""
-echo >> build-linux.sh  "mv build-darwin meryl-$version"
-echo >> build-linux.sh  "tar -cf meryl-$version.Darwin-amd64.tar meryl-$version/README* meryl-$version/bin meryl-$version/lib meryl-$version/share"
-echo >> build-linux.sh  "mv meryl-$version build-darwin"
-echo >> build-linux.sh  ""
-echo >> build-linux.sh  "mv build-linux meryl-$version"
-echo >> build-linux.sh  "tar -cf meryl-$version.Linux-amd64.tar  meryl-$version/README*  meryl-$version/bin  meryl-$version/lib  meryl-$version/share"
-echo >> build-linux.sh  "mv meryl-$version build-linux"
-echo >> build-linux.sh  ""
-echo >> build-linux.sh  "mv build-src meryl-$version"
-echo >> build-linux.sh  "tar -cf meryl-$version.tar              meryl-$version/README*  meryl-$version/src  meryl-$version/scripts"
-echo >> build-linux.sh  "mv meryl-$version build-src"
-echo >> build-linux.sh  ""
-echo >> build-linux.sh  ""
-
-chmod 755 build-linux.sh
-
-#
-#
-#
-
-echo Build for MacOS.
-
-cd src
-gmake -j 12 > ../build-darwin.out 2>&1
-cd ..
-
-mv build/* build-darwin/
-
-echo Make static binaries for MacOS.
-
-cd build-darwin
-python ../scripts/statifyOSX.py bin lib true true >> ../build-darwin.out 2>&1
-python ../scripts/statifyOSX.py lib lib true true >> ../build-darwin.out 2>&1
-cd ..
-
-#
-#
-#
-
-echo Build for Linux and make tarballs.
-
-echo \
-docker run -v `pwd`:/dock -t -i --rm phusion/holy-build-box-64:latest /hbb_exe/activate-exec bash /dock/build-linux.sh
-docker run -v `pwd`:/dock -t -i --rm phusion/holy-build-box-64:latest /hbb_exe/activate-exec bash /dock/build-linux.sh
-
-#  strip --only-keep-debug
-
-echo Compress.
-
-xz -9v meryl-$version.Darwin-amd64.tar
-xz -9v meryl-$version.Linux-amd64.tar
-xz -9v meryl-$version.tar
-
-if [ -e dot-git-directory ] ; then
-    echo Restoring .git directory.
-    mv dot-git-directory .git
-fi
-
-exit
diff --git a/ext/meryl/scripts/version_update.pl b/ext/meryl/scripts/version_update.pl
index 9ec23af..a319137 100755
--- a/ext/meryl/scripts/version_update.pl
+++ b/ext/meryl/scripts/version_update.pl
@@ -39,7 +39,7 @@
 
 my $label    = "snapshot";      #  If not 'release' print this in the version output.
 my $major    = "1";             #  Bump before release.
-my $minor    = "2";             #  Bump before release.
+my $minor    = "0";             #  Bump before release.
 
 my $branch   = "master";
 my $version  = "v$major.$minor";
@@ -87,12 +87,7 @@
 
             $version = "v$major.$minor";
         } else {
-            $major   = "0";
-            $minor   = "0";
-            $commits = "0";
-            $hash1   = $_;
-
-            $version = "v$major.$minor";
+            die "Failed to parse describe string '$_'.\n";
         }
     }
     close(F);
@@ -168,12 +163,14 @@
 #  Report what we found.  This is really for the gmake output.
 
 if (defined($commits)) {
-    print "\$(info Building $label $version +$commits changes (r$revCount $hash1) ($dirty))\n";
+    print STDERR "Building $label $version +$commits changes (r$revCount $hash1) ($dirty)\n";
     foreach my $s (@submodules) {
-        print "\$(info \$(space)         $s)\n";
+        print STDERR "         $s\n";
     }
+    print STDERR "\n";
 } else {
-    print "\$(info Building $label $version)\n";
+    print STDERR "Building $label $version\n";
+    print STDERR "\n";
 }
 
 #  Dump a new file, but don't overwrite the original.
diff --git a/ext/meryl/src/9d8b34d1bec567d21facb6b745ad8933bbd9be5d.zip b/ext/meryl/src/9d8b34d1bec567d21facb6b745ad8933bbd9be5d.zip
new file mode 100644
index 0000000..1f6449b
Binary files /dev/null and b/ext/meryl/src/9d8b34d1bec567d21facb6b745ad8933bbd9be5d.zip differ
diff --git a/ext/meryl/src/Makefile b/ext/meryl/src/Makefile
index ea8d562..870edab 100644
--- a/ext/meryl/src/Makefile
+++ b/ext/meryl/src/Makefile
@@ -27,14 +27,7 @@
 #       instances of "$" within them need to be escaped with a second "$" to
 #       accomodate the double expansion that occurs when eval is invoked.
 
-#  Before doing ANYTHING, initialize submodules...if the version of
-#  git is compatible.
-gitv := $(shell git --version | cut -d\  -f 3 | cut -c 1)
-ifeq (1, $(gitv))
-  gitv := $(shell git --version | cut -d\  -f 3)
-  $(error git '$(shell which git)' version '$(gitv)' too old; at least version 2.12 is required)
-endif
-
+#  Before doing ANYTHING, initialize submodules.
 ifeq ($(wildcard utility/src/Makefile), )
   $(info $(shell git submodule update --init utility))
   $(info $(space))
@@ -350,9 +343,8 @@ DIR_STACK :=
 INCDIRS :=
 TGT_STACK :=
 
-# Discover our OS and architecture.  These were previously used to set
-# BUILD_DIR and TARGET_DIR to allow multi-platform builds.  DESTDIR will do
-# that for us too.
+# Discover our OS and architecture.  These are used to set the BUILD_DIR and TARGET_DIR to
+# something more useful than 'build' and '.'.
 
 OSTYPE      := $(shell echo `uname`)
 OSVERSION   := $(shell echo `uname -r`)
@@ -377,18 +369,6 @@ ifeq (${OSTYPE}, SunOS)
   endif
 endif
 
-#  Set paths for building and installing.  If DESTDIR doesn't exist, use the
-#  directory just above us.
-
-ifeq "$(strip ${DESTDIR})" ""
-  BUILD_DIR    := $(realpath ..)/build/obj
-  TARGET_DIR   := $(realpath ..)/build
-else
-  BUILD_DIR    := $(DESTDIR)/canu/build/obj
-  TARGET_DIR   := $(DESTDIR)/canu/build
-endif
-
-#
 #  Set compiler and flags based on discovered hardware
 #
 #  By default, debug symbols are included in all builds (even optimized).
@@ -405,6 +385,7 @@ endif
 #  BUILDJEMALLOC will enable jemalloc library support.
 #
 
+
 ifeq ($(origin CXXFLAGS), undefined)
   ifeq ($(BUILDOPTIMIZED), 1)
   else
@@ -456,27 +437,44 @@ endif
 #  So, we require gcc7 (from MacPorts) or gcc8 (from hommebrew).
 #
 #  If from MacPorts:
-#    port install gcc9
-#    port select gcc mp-gcc9
+#    port install gcc7
+#    port select gcc mp-gcc7
 #
 #  If CC is set to 'cc', the GNU make default, we'll automagically search for other
-#  versions and use those if found.
+#  versions and use those if found, preferring gcc7 over gcc8.
 #
+#  There' definitely a clever way to do this with 'foreach', but my Make is lacking.
+#
+ifeq (${OSTYPE}, Darwin)
+  ifeq ($(CC), cc)
+    CC7    := $(shell echo `which gcc-mp-7`)
+    CXX7   := $(shell echo `which g++-mp-7`)
 
-define TEST_COMPILER
-  ifeq ($${CC}, cc)
-    CCTEST  := $$(shell echo `which gcc-${1}`)
-    CXXTEST := $$(shell echo `which g++-${1}`)
+    ifdef CXX7
+      CC  := $(CC7)
+      CXX := $(CXX7)
+    endif
+  endif
+
+  ifeq ($(CC), cc)
+    CC8    := $(shell echo `which gcc-7`)
+	  CXX8   := $(shell echo `which g++-7`)
 
-    ifdef CXXTEST
-      CC  := $${CCTEST}
-      CXX := $${CXXTEST}
+    ifdef CXX8
+      CC  := $(CC8)
+      CXX := $(CXX8)
     endif
   endif
-endef
 
-ifeq (${OSTYPE}, Darwin)
-  $(foreach suffix,mp-9 9  mp-8 8  mp-7 7,$(eval $(call TEST_COMPILER,${suffix})))
+  ifeq ($(CC), cc)
+    CC8    := $(shell echo `which gcc-8`)
+	  CXX8   := $(shell echo `which g++-8`)
+
+    ifdef CXX8
+      CC  := $(CC8)
+      CXX := $(CXX8)
+    endif
+  endif
 
   ifneq ($(shell echo `$(CXX) --version 2>&1 | grep -c clang`), 0)
      CPATH := $(shell echo `which $(CXX)`)
@@ -516,12 +514,9 @@ ifeq (${CANU_BUILD_ENV}, ports)
 
 else
 
-  # Ignore the gmake default 'c++' and force g++9.
-	ifeq ($(origin CXX), default)
-    CC    = gcc9
-    CXX   = g++9
-    CCLIB = -rpath /usr/local/lib/gcc9
-  endif
+  CC       ?= gcc6
+  CXX      ?= g++6
+  CCLIB    ?= -rpath /usr/local/lib/gcc6
 
   #  GCC
   CXXFLAGS  += -I/usr/local/include -pthread -fopenmp -fPIC
@@ -688,11 +683,11 @@ $(foreach TGT,${ALL_TGTS},\
 #  Makefile processed.  Regenerate the version number file, make some
 #  directories, and report that we're starting the build.
 
-$(eval $(shell ../scripts/version_update.pl meryl utility/src/utility/version.H))
+$(shell ../scripts/version_update.pl meryl utility/src/utility/version.H)
 
 $(shell mkdir -p ${TARGET_DIR}/bin)
 
-$(info For '${OSTYPE}' '${OSVERSION}' as '${MACHINETYPE}' into '${TARGET_DIR}/{bin,obj}'.)
+$(info For '${OSTYPE}' '${OSVERSION}' as '${MACHINETYPE}' into '${DESTDIR}${PREFIX}/$(OSTYPE)-$(MACHINETYPE)/{bin,obj}'.)
 $(info Using '$(shell which ${CXX})' version '${GXX_VV}'.)
 ifneq ($(origin CXXFLAGSUSER), undefined)
 $(info Using user-supplied CXXFLAGS '${CXXFLAGSUSER}'.)
diff --git a/ext/meryl/src/main.mk b/ext/meryl/src/main.mk
index 2a2c826..23810c4 100644
--- a/ext/meryl/src/main.mk
+++ b/ext/meryl/src/main.mk
@@ -1,5 +1,29 @@
-MODULE       :=    meryl
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+
+ifeq "$(strip ${DESTDIR})" ""
+  DESTDIR      :=
+endif
+
+ifeq "$(strip ${PREFIX})" ""
+  ifeq "$(strip ${DESTDIR})" ""
+    PREFIX     := $(realpath ..)
+  else
+    PREFIX     := /meryl
+  endif
+endif
+
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := $(DESTDIR)$(PREFIX)/$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := $(DESTDIR)$(PREFIX)/$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET       := libmeryl.a
+
 SOURCES      := utility/src/utility/edlib.C \
                 \
                 utility/src/utility/files.C \
@@ -28,11 +52,11 @@ SOURCES      := utility/src/utility/edlib.C \
                 utility/src/utility/kmers.C \
                 \
                 utility/src/utility/bits.C \
-                utility/src/utility/bits-wordArray.C \
                 \
                 utility/src/utility/hexDump.C \
                 utility/src/utility/md5.C \
                 utility/src/utility/mt19937ar.C \
+                utility/src/utility/objectStore.C \
                 utility/src/utility/speedCounter.C \
                 utility/src/utility/sweatShop.C \
                 \
@@ -59,12 +83,11 @@ SRC_INCDIRS  := . \
                 utility
 
 SUBMAKEFILES := meryl/meryl.mk \
-                meryl-analyze/meryl-analyze.mk \
                 meryl-simple/meryl-simple.mk \
                 meryl-import/meryl-import.mk \
-                meryl-lookup/meryl-lookup.mk
+                meryl-lookup/meryl-lookup.mk \
+                meryl-check/meryl-check.mk
 
 ifeq ($(BUILDTESTS), 1)
-SUBMAKEFILES += tests/merylCountArrayTest.mk \
-                tests/merylExactLookupTest.mk
+SUBMAKEFILES += tests/merylCountArrayTest.mk
 endif
diff --git a/ext/meryl/src/meryl-analyze/meryl-analyze.C b/ext/meryl/src/meryl-analyze/meryl-analyze.C
index 3904139..9131543 100644
--- a/ext/meryl/src/meryl-analyze/meryl-analyze.C
+++ b/ext/meryl/src/meryl-analyze/meryl-analyze.C
@@ -52,8 +52,8 @@ public:
   void       insert(V value) {
     if ((_minValue <= value) &&
         (value     <= _maxValue)) {
-      _smallestV = std::min(_smallestV, value);
-      _largestV  = std::max(_largestV,  value);
+      _smallestV = min(_smallestV, value);
+      _largestV  = max(_largestV,  value);
 
       _histo[value - _minValue]++;
     }
@@ -107,8 +107,8 @@ public:
   void       insert(V value) {
     if ((_minValue <= value) &&
         (value     <= _maxValue)) {
-      _smallestV = std::min(_smallestV, value);
-      _largestV  = std::max(_largestV,  value);
+      _smallestV = min(_smallestV, value);
+      _largestV  = max(_largestV,  value);
 
       _histo[value]++;
     }
@@ -124,13 +124,13 @@ public:
   };
 
 private:
-  V              _minValue;      //  Minimum value we'll accept into the histogram
-  V              _maxValue;
+  V         _minValue;      //  Minimum value we'll accept into the histogram
+  V         _maxValue;
 
-  V              _smallestV;     //  Minimum value we have seen in the input data
-  V              _largestV;
+  V         _smallestV;     //  Minimum value we have seen in the input data
+  V         _largestV;
 
-  std::map<V,T>  _histo;         //  Histogram data.
+  map<V,T>  _histo;         //  Histogram data.
 };
 
 
@@ -325,8 +325,8 @@ main(int argc, char **argv) {
 
   argc = AS_configure(argc, argv);
 
-  std::vector<char const *>  err;
-  int                        arg = 1;
+  vector<char const *>  err;
+  int                   arg = 1;
   while (arg < argc) {
     if (strcmp(argv[arg], "-mers") == 0) {
       inputDBname = argv[++arg];
diff --git a/ext/meryl/src/meryl-check/meryl-check.C b/ext/meryl/src/meryl-check/meryl-check.C
new file mode 100644
index 0000000..b9a34a2
--- /dev/null
+++ b/ext/meryl/src/meryl-check/meryl-check.C
@@ -0,0 +1,174 @@
+
+/******************************************************************************
+ *
+ *  This file is part of meryl, a genomic k-kmer counter with nice features.
+ *
+ *  This software is based on:
+ *    'Canu' v2.0              (https://github.com/marbl/canu)
+ *  which is based on:
+ *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
+ *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
+ *
+ *  Except as indicated otherwise, this is a 'United States Government Work',
+ *  and is released in the public domain.
+ *
+ *  File 'README.licenses' in the root directory of this distribution
+ *  contains full conditions and disclaimers.
+ */
+
+#include "runtime.H"
+
+#include "kmers.H"
+#include "sequence.H"
+#include "bits.H"
+
+
+
+
+int
+main(int argc, char **argv) {
+  char   *inputSeqName = NULL;
+  char   *inputDBname  = NULL;
+  uint64  minV         = 0;
+  uint64  maxV         = UINT64_MAX;
+  uint32  threads      = 1;
+
+  argc = AS_configure(argc, argv);
+
+  vector<char *>  err;
+  int             arg = 1;
+  while (arg < argc) {
+    if        (strcmp(argv[arg], "-sequence") == 0) {   //  INPUT READS and RANGE TO PROCESS
+      inputSeqName = argv[++arg];
+
+    } else if (strcmp(argv[arg], "-mers") == 0) {
+      inputDBname = argv[++arg];
+
+    } else if (strcmp(argv[arg], "-min") == 0) {
+      minV = strtouint64(argv[++arg]);
+
+    } else if (strcmp(argv[arg], "-max") == 0) {
+      maxV = strtouint64(argv[++arg]);
+
+    } else if (strcmp(argv[arg], "-threads") == 0) {
+      threads = strtouint32(argv[++arg]);
+
+    } else {
+      char *s = new char [1024];
+      snprintf(s, 1024, "Unknown option '%s'.\n", argv[arg]);
+      err.push_back(s);
+    }
+
+    arg++;
+  }
+
+  if (inputSeqName == NULL)
+    err.push_back("No input sequences (-sequence) supplied.\n");
+  if (inputDBname == NULL)
+    err.push_back("No query meryl database (-mers) supplied.\n");
+
+  if (err.size() > 0) {
+    fprintf(stderr, "usage: %s ...\n", argv[0]);
+    fprintf(stderr, "\n");
+
+    for (uint32 ii=0; ii<err.size(); ii++)
+      if (err[ii])
+        fputs(err[ii], stderr);
+
+    exit(1);
+  }
+
+
+
+  map<kmer,uint32>   check;
+
+  //  Open a database, load the kmers and values into 'check'.
+
+  fprintf(stderr, "Open meryl database '%s'.\n", inputDBname);
+  merylFileReader   *merylDB    = new merylFileReader(inputDBname);
+
+  fprintf(stderr, "Convert to lookup table.\n");
+  //merylExactLookup  *kmerLookup = new merylExactLookup(merylDB, minV, maxV);
+
+  fprintf(stderr, "Create mapping to value.\n");
+  uint64                 nKmers     = 0;
+
+  while (merylDB->nextMer() == true) {
+    kmer    kmer  = merylDB->theFMer();
+    uint32  value = merylDB->theValue();
+
+    check[kmer] = value;
+
+    nKmers++;
+
+    if ((nKmers % 100000) == 0) {
+      fprintf(stderr, "Loaded %li kmers.\n", nKmers);
+    }
+  }
+
+  delete merylDB;
+  //delete kmerLookup;
+
+  fprintf(stderr,"Loaded %lu kmers into check map of size %lu\n", nKmers, check.size());
+
+  //
+
+  fprintf(stderr, "Stream kmers from '%s'.\n", inputSeqName);
+
+  dnaSeqFile  *seqFile    = new dnaSeqFile(inputSeqName);
+
+  {
+  uint32   nameMax = 0;
+  char    *name    = NULL;
+  uint64   seqLen  = 0;
+  uint64   seqMax  = 0;
+  char    *seq     = NULL;
+  uint8   *qlt     = NULL;
+
+  char     fString[64];
+  char     rString[64];
+
+  while (seqFile->loadSequence(name, nameMax, seq, qlt, seqMax, seqLen)) {
+    kmerIterator  kiter(seq, seqLen);
+
+    while (kiter.nextMer()) {
+      kmer     fMer  = kiter.fmer();
+      kmer     rMer  = kiter.rmer();
+      uint64   value = 0;
+
+      if (fMer < rMer)
+        value = check[fMer]--;
+      else
+        value = check[rMer]--;
+
+      if (value == 0)
+        fprintf(stdout, "%s\t%s\t%s ZERO\n",
+                name,
+                kiter.fmer().toString(fString),
+                kiter.rmer().toString(rString));
+
+    }
+  }
+
+  delete [] name;
+  delete [] seq;
+  delete [] qlt;
+  }
+
+  delete seqFile;
+
+  //  Check that all values are zero.
+
+  for (map<kmer,uint32>::iterator it=check.begin(); it != check.end(); it++) {
+    kmer    k = it->first;
+    uint32  v = it->second;
+
+    if (v != 0) {
+      char   kmerString[64];
+
+      fprintf(stderr, "%s\t%u\n", k.toString(kmerString), v);
+    }
+  }
+
+  exit(0);
+}
diff --git a/ext/meryl/src/meryl-check/meryl-check.mk b/ext/meryl/src/meryl-check/meryl-check.mk
new file mode 100644
index 0000000..73388ec
--- /dev/null
+++ b/ext/meryl/src/meryl-check/meryl-check.mk
@@ -0,0 +1,20 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
+TARGET   := meryl-check
+SOURCES  := meryl-check.C \
+
+SRC_INCDIRS  := . ../utility/src/utility
+
+TGT_LDFLAGS := -L${TARGET_DIR}/lib
+TGT_LDLIBS  := -lmeryl
+TGT_PREREQS := libmeryl.a
+
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/meryl-import/meryl-import.C b/ext/meryl/src/meryl-import/meryl-import.C
index 0b5d908..698eb9a 100644
--- a/ext/meryl/src/meryl-import/meryl-import.C
+++ b/ext/meryl/src/meryl-import/meryl-import.C
@@ -37,13 +37,13 @@ main(int argc, char **argv) {
   bool    useC         = true;
   bool    useF         = false;
 
-  uint32  threads      = getMaxThreadsAllowed();
-  //uint64  memory     = 8;
+  uint32  threads      = 1;
+  uint64  memory       = 8;
 
   argc = AS_configure(argc, argv);
 
-  std::vector<char const *>  err;
-  int                        arg = 1;
+  vector<char *>  err;
+  int             arg = 1;
   while (arg < argc) {
     if        (strcmp(argv[arg], "-kmers") == 0) {
       inputName = argv[++arg];
@@ -72,7 +72,7 @@ main(int argc, char **argv) {
       threads = strtouint32(argv[++arg]);
 
     } else if (strcmp(argv[arg], "-memory") == 0) {   //  Not implemented.  If implemented, merylCountArray::initializeValues()
-      //memory = strtouint64(argv[++arg]);            //  needs to return a memory size, etc, etc.
+      memory = strtouint64(argv[++arg]);              //  needs to return a memory size, etc, etc.
 
     } else {
       char *s = new char [1024];
@@ -149,7 +149,7 @@ main(int argc, char **argv) {
   uint32  nPrefix   = 1 << wPrefix;
 
   uint32  wData     = 2 * kmerTiny::merSize() - wPrefix;
-  uint64  wDataMask = buildLowBitMask<uint64>(wData);
+  uint64  wDataMask = uint64MASK(wData);
 
   //  Open the input kmer file, allocate space for reading kmer lines.
 
@@ -212,8 +212,8 @@ main(int argc, char **argv) {
 
     //  And use it.
 
-    kmdata  pp = (useF == true) ? ((kmdata)kmerF >> wData)     : ((kmdata)kmerR >> wData);
-    kmdata  mm = (useF == true) ? ((kmdata)kmerF  & wDataMask) : ((kmdata)kmerR  & wDataMask);
+    uint64  pp = (useF == true) ? ((uint64)kmerF >> wData)     : ((uint64)kmerR >> wData);
+    uint64  mm = (useF == true) ? ((uint64)kmerF  & wDataMask) : ((uint64)kmerR  & wDataMask);
 
     assert(pp < nPrefix);
 
diff --git a/ext/meryl/src/meryl-import/meryl-import.mk b/ext/meryl/src/meryl-import/meryl-import.mk
index 1eaf170..99eb8cb 100644
--- a/ext/meryl/src/meryl-import/meryl-import.mk
+++ b/ext/meryl/src/meryl-import/meryl-import.mk
@@ -1,27 +1,21 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET   := meryl-import
 SOURCES  := meryl-import.C \
             ../meryl/merylCountArray.C
 
-SRC_INCDIRS := . ../meryl
-
-#  If we're part of Canu, build with canu support and use Canu's copy of
-#  meryl-utility.  Otherwise, don't.
-ifneq ($(wildcard stores/sqStore.H), )
-  SRC_CXXFLAGS := -DCANU
-  SRC_INCDIRS  += ../../../utility/src/utility ../../../stores
-
-#  If we're part of something else, include the something else's
-#  utility directory.
-else ifneq ($(wildcard meryl/src/meryl/meryl.C), )
-  SRC_INCDIRS  += ../../../utility/src/utility
-
-#  Otherwise, we're building directly in the meryl repo.
-else
-  SRC_INCDIRS  += ../utility/src/utility
-
-endif
+SRC_INCDIRS  := . ../utility/src/utility ../meryl
 
+TGT_LDFLAGS := -L${TARGET_DIR}/lib
+TGT_LDLIBS  := -lmeryl
+TGT_PREREQS := libmeryl.a
 
-TGT_LDFLAGS  := -L${TARGET_DIR}/lib
-TGT_LDLIBS   := -l${MODULE}
-TGT_PREREQS  := lib${MODULE}.a
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/meryl-lookup/dump.C b/ext/meryl/src/meryl-lookup/dump.C
deleted file mode 100644
index 30090c1..0000000
--- a/ext/meryl/src/meryl-lookup/dump.C
+++ /dev/null
@@ -1,148 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl, a genomic k-kmer counter with nice features.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "meryl-lookup.H"
-#include "sweatShop.H"
-
-
-class dumpInput {
-public:
-  dumpInput() {
-  };
-  ~dumpInput() {
-    delete [] fwd;
-    delete [] rev;
-  };
-
-  dnaSeq        seq;
-  uint64        seqIdx;
-
-  kmvalu       *fwd    = nullptr;
-  kmvalu       *rev    = nullptr;
-
-  uint64        maxP;
-};
-
-
-
-static    //  (This really came from merfin)
-void *
-loadSequence(void *G) {
-  lookupGlobal *g = (lookupGlobal *)G;
-  dumpInput    *s = new dumpInput;
-
-  if (g->seqFile1->loadSequence(s->seq) == false) {
-    delete s;
-    return(nullptr);
-  }
-
-  s->seqIdx = g->seqFile1->seqIdx();
-
-  return(s);
-}
-
-
-
-static
-void
-processSequence(void *G, void *T, void *S) {
-  lookupGlobal     *g   = (lookupGlobal *)G;
-  dumpInput        *s   = (dumpInput    *)S;
-  merylExactLookup *L = g->lookupDBs[0];
-
-  //  Allocate and clear outputs.
-
-  s->fwd = new kmvalu [s->seq.length()];
-  s->rev = new kmvalu [s->seq.length()];
-
-  for (uint32 ii=0; ii<s->seq.length(); ii++)
-    s->fwd[ii] = s->rev[ii] = 0;
-
-  //  Zip down all the kmers, saving the value of each.
-
-  kmerIterator  kiter(s->seq.bases(), s->seq.length());
-
-  while (kiter.nextMer()) {
-    uint64  p = kiter.bgnPosition();
-
-    s->fwd[p] = L->value(kiter.fmer());
-    s->rev[p] = L->value(kiter.rmer());
-
-    s->maxP = p+1;
-  }
-
-  //  Release the memory use for storing the sequence.
-
-  s->seq.releaseBases();
-}
-
-
-
-static
-void
-outputSequence(void *G, void *S) {
-  lookupGlobal     *g      = (lookupGlobal *)G;
-  dumpInput        *s      = (dumpInput    *)S;
-
-  //  Allocate space for the output string.
-
-  resizeArray(g->outstring, 0, g->outstringMax, strlen(s->seq.ident()) + 16 + 16 + 16, _raAct::doNothing);
-
-  //  Copy the sequence ident into the output strig.
-
-  char *outptr = g->outstring;
-
-  for (char const *x = s->seq.ident(); *x; )
-    *outptr++ = *x++;
-
-  *outptr++ = '\t';
-
-  //  'outptr' is now where we start adding new info for each kmer,
-  //  and we output the string from 'outroot'.
-
-  for (uint64 p=0; p<s->maxP; p++) {
-    char *t;
-
-    if (s->fwd[p] + s->rev[p] == 0)
-      continue;
-
-    t = toDec(s->seqIdx, outptr);   *t++ = '\t';
-    t = toDec(p, t);                *t++ = '\t';
-    t = toDec(s->fwd[p], t);        *t++ = '\t';
-    t = toDec(s->rev[p], t);        *t++ = '\n';   *t = 0;
-
-    fputs(g->outstring, g->outFile1->file());
-  }
-
-  delete s;
-}
-
-
-
-void
-dumpExistence(lookupGlobal *g) {
-  sweatShop     *ss = new sweatShop(loadSequence, processSequence, outputSequence);
-
-  ss->setLoaderQueueSize(4096);
-  ss->setNumberOfWorkers(omp_get_max_threads());
-  ss->setWriterQueueSize(4096);
-
-  ss->run(g, g->showProgress);
-
-  delete ss;
-}
diff --git a/ext/meryl/src/meryl-lookup/existence.C b/ext/meryl/src/meryl-lookup/existence.C
deleted file mode 100644
index bd90ef4..0000000
--- a/ext/meryl/src/meryl-lookup/existence.C
+++ /dev/null
@@ -1,144 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl, a genomic k-kmer counter with nice features.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "meryl-lookup.H"
-#include "sweatShop.H"
-
-
-
-class existInput {
-public:
-  existInput() {
-  };
-  ~existInput() {
-    delete [] nFound;
-  };
-
-  dnaSeq        seq;
-
-  uint64        nTotal  = 0;
-  uint64       *nFound  = nullptr;
-};
-
-
-
-static    //  (This really came from merfin)
-void *
-loadSequence(void *G) {
-  lookupGlobal *g = (lookupGlobal *)G;
-  existInput   *s = new existInput();
-
-  if (g->seqFile1->loadSequence(s->seq) == false) {
-    delete s;
-    return(nullptr);
-  }
-
-  return(s);
-}
-
-
-
-static
-void
-processSequence(void *G, void *T, void *S) {
-  lookupGlobal *g   = (lookupGlobal *)G;
-  existInput   *s   = (existInput   *)S;
-  int32         nIn = g->lookupDBs.size();
-
-  //  Allocate and clear outputs.
-
-  s->nTotal = 0;
-  s->nFound = new uint64 [nIn];
-
-  for (uint32 dd=0; dd<nIn; dd++)
-    s->nFound[dd] = 0;
-
-  //  Zip through the kmers, counting how many kmers we have and how many we
-  //  found in each input.
-
-  kmerIterator  kiter(s->seq.bases(), s->seq.length());
-
-  while (kiter.nextMer()) {
-    s->nTotal++;
-
-    for (uint32 dd=0; dd<nIn; dd++) {
-      if ((g->lookupDBs[dd]->value(kiter.fmer()) > 0) ||
-          (g->lookupDBs[dd]->value(kiter.rmer()) > 0))
-        s->nFound[dd]++;
-    }
-  }
-
-  //  Release the memory use for storing the sequence.
-
-  s->seq.releaseBases();
-}
-
-
-
-static
-void
-outputSequence(void *G, void *S) {
-  lookupGlobal *g   = (lookupGlobal *)G;
-  existInput   *s   = (existInput   *)S;
-  int32         nIn = g->lookupDBs.size();
-
-  //  Allocate space for the output string.
-
-  resizeArray(g->outstring, 0, g->outstringMax, 16 + 16 * 2 * nIn, _raAct::doNothing);
-
-  //  Create the string.
-
-  char *t = g->outstring;
-
-  *t++ = '\t';
-  t = toDec(s->nTotal, t);
-
-  for (uint32 dd=0; dd<nIn; dd++) {
-    *t++ = '\t';
-    t = toDec(g->lookupDBs[dd]->nKmers(), t);
-
-    *t++ = '\t';
-    t = toDec(s->nFound[dd], t);
-  }
-
-  *t++ = '\n';
-  *t   = 0;
-
-  //  And output it.
-
-  fputs(s->seq.ident(), g->outFile1->file());
-  fputs(g->outstring,   g->outFile1->file());
-
-  delete s;
-}
-
-
-
-
-void
-reportExistence(lookupGlobal *g) {
-  sweatShop     *ss = new sweatShop(loadSequence, processSequence, outputSequence);
-
-  ss->setLoaderQueueSize(4096);
-  ss->setNumberOfWorkers(omp_get_max_threads());
-  ss->setWriterQueueSize(4096);
-
-  ss->run(g, g->showProgress);
-
-  delete ss;
-}
diff --git a/ext/meryl/src/meryl-lookup/include-exclude.C b/ext/meryl/src/meryl-lookup/include-exclude.C
deleted file mode 100644
index d2f9c56..0000000
--- a/ext/meryl/src/meryl-lookup/include-exclude.C
+++ /dev/null
@@ -1,144 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl, a genomic k-kmer counter with nice features.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "meryl-lookup.H"
-#include "sweatShop.H"
-
-
-
-class filterInput {
-public:
-  filterInput() {
-  };
-  ~filterInput() {
-  };
-
-  dnaSeq        seq1;
-  dnaSeq        seq2;
-
-  uint64        nTotal;
-  uint64        nFound;
-};
-
-
-
-
-static
-void *
-loadSequence(void *G) {
-  lookupGlobal *g = (lookupGlobal *)G;
-  filterInput  *s = new filterInput;
-
-  bool   load1 = (g->seqFile1 != nullptr) && (g->seqFile1->loadSequence(s->seq1) == true);
-  bool   load2 = (g->seqFile2 != nullptr) && (g->seqFile2->loadSequence(s->seq2) == true);
-
-  if ((load1 == false) &&
-      (load2 == false)) {
-    delete s;
-    return(nullptr);
-  }
-
-  return(s);
-}
-
-
-
-static
-uint64
-processSequence(merylExactLookup *L, dnaSeq &seq) {
-  kmerIterator kiter(seq.bases(), seq.length());
-  uint64       found = 0;
-
-  while (kiter.nextMer())
-    if ((L->value(kiter.fmer()) > 0) ||
-        (L->value(kiter.rmer()) > 0))
-      found++;
-
-  return(found);
-}
-
-
-static
-void
-processSequence(void *G, void *T, void *S) {
-  lookupGlobal *g = (lookupGlobal *)G;
-  filterInput  *s = (filterInput  *)S;
-
-  //  Count the number of kmers found in the database from either
-  //  seq1 or seq2.
-
-  s->nFound  = processSequence(g->lookupDBs[0], s->seq1);
-  s->nFound += processSequence(g->lookupDBs[0], s->seq2);
-}
-
-
-
-static
-void
-outputSequence(compressedFileWriter  *O,
-               dnaSeq                &seq,
-               uint64                 nFound) {
-
-  if (O == nullptr)
-    return;
-
-  if (seq.quals()[0] == 0)   fprintf(O->file(), ">%s nKmers=%lu\n%s\n",        seq.ident(), nFound, seq.bases());
-  else                       fprintf(O->file(), "@%s nKmers=%lu\n%s\n+\n%s\n", seq.ident(), nFound, seq.bases(), seq.quals());
-}
-
-
-static
-void
-outputSequence(void *G, void *S) {
-  lookupGlobal *g = (lookupGlobal *)G;
-  filterInput  *s = (filterInput  *)S;
-
-  g->nReadsTotal++;
-
-  //  Write output if:
-  //    'include' and    mers found.
-  //    'exclude' and no mers found.
-
-  if (((s->nFound  > 0) && (g->reportType == lookupOp::opInclude)) ||
-      ((s->nFound == 0) && (g->reportType == lookupOp::opExclude))) {
-    g->nReadsFound++;
-
-    outputSequence(g->outFile1, s->seq1, s->nFound);
-    outputSequence(g->outFile2, s->seq2, s->nFound);
-  }
-
-  delete s;
-}
-
-
-
-void
-filter(lookupGlobal *g) {
-  sweatShop     *ss = new sweatShop(loadSequence, processSequence, outputSequence);
-
-  ss->setLoaderQueueSize(omp_get_max_threads());
-  ss->setNumberOfWorkers(omp_get_max_threads());
-  ss->setWriterQueueSize(omp_get_max_threads());
-
-  ss->run(g, g->showProgress);
-
-  delete ss;
-
-  fprintf(stderr, "\nIncluding %lu reads (or read pairs) out of %lu.\n", g->nReadsTotal, g->nReadsFound);
-}
-
diff --git a/ext/meryl/src/meryl-lookup/meryl-lookup.C b/ext/meryl/src/meryl-lookup/meryl-lookup.C
index 51d4e45..5a60b5a 100644
--- a/ext/meryl/src/meryl-lookup/meryl-lookup.C
+++ b/ext/meryl/src/meryl-lookup/meryl-lookup.C
@@ -16,241 +16,273 @@
  *  contains full conditions and disclaimers.
  */
 
-#include "meryl-lookup.H"
+#include "runtime.H"
 
-void   dumpExistence(lookupGlobal *G);
-void   reportExistence(lookupGlobal *G);
-void   filter(lookupGlobal *G);
+#include "kmers.H"
+#include "system.H"
+#include "sequence.H"
+#include "bits.H"
 
 
-void
-lookupGlobal::initialize(void) {
-  omp_set_num_threads(nThreads);
-}
+#define OP_NONE       0
+#define OP_DUMP       1
+#define OP_EXISTENCE  2
+#define OP_INCLUDE    3
+#define OP_EXCLUDE    4
 
 
 
 void
-lookupGlobal::loadLookupTables(void) {
-  std::vector<merylFileReader *>    merylDBs;    //  Input meryl database.
-  std::vector<double>               minMem;      //  Estimated min memory for lookup table.
-  std::vector<double>               optMem;      //  Estimated max memory for lookup table.
-
-  //  Open input meryl databases, initialize lookup.
-
-  for (uint32 ii=0; ii<lookupDBname.size(); ii++) {
-    merylDBs .push_back(new merylFileReader(lookupDBname[ii]));
-    minMem   .push_back(0.0);
-    optMem   .push_back(0.0);
-    lookupDBs.push_back(new merylExactLookup());
-  }
+dumpExistence(dnaSeqFile                  *sfile,
+              compressedFileWriter        *ofile,
+              vector<merylExactLookup *>  &klookup,
+              vector<const char *>        &klabel) {
 
-  //  Estimate memory needed for each lookup table.
+  //  Build a list of labels for each database.  If no labels are provided,
+  //  this is just an empty string.
 
-  double   minMemTotal = 0.0;
-  double   optMemTotal = 0.0;
+  char   **labels = new char * [klookup.size()];
 
-  for (uint32 ii=0; ii<lookupDBname.size(); ii++) {
-    fprintf(stderr, "--\n");
-    fprintf(stderr, "-- Estimating memory usage for '%s'.\n", lookupDBname[ii]);
-    fprintf(stderr, "--\n");
+  for (uint32 ll=0; ll<klookup.size(); ll++) {
 
-    double  minm, optm;
-    lookupDBs[ii]->estimateMemoryUsage(merylDBs[ii], maxMemory, minm, optm, minV, maxV);
+    //  If we don't have the ll'th input label, make an empty string.
 
-    minMemTotal += minm;
-    optMemTotal += optm;
-  }
-
-  //  Use either the smallest or 'fastest' table, or fail, depending on how
-  //  much memory the use lets us use.
+    if (klabel.size() <= ll) {
+      labels[ll]    = new char [1];
+      labels[ll][0] = 0;
+      continue;
+    }
 
-  bool  useOpt = (optMemTotal <= maxMemory);
-  bool  useMin = (minMemTotal <= maxMemory) && (useOpt == false);
+    //  Otherwise, we have a label, so allocate space for a tab, a copy of
+    //  the label, and a NUL byte, then create the string we'll output.
 
-  fprintf(stderr, "--\n");
-  fprintf(stderr, "-- Minimal memory needed: %.3f GB%s\n", minMemTotal, (useMin) ? "  enabled" : "");
-  fprintf(stderr, "-- Optimal memory needed: %.3f GB%s\n", optMemTotal, (useOpt) ? "  enabled" : "");
-  fprintf(stderr, "-- Memory limit           %.3f GB\n",   maxMemory);
-  fprintf(stderr, "--\n");
+    labels[ll] = new char [strlen(klabel[ll]) + 2];
 
-  if ((useMin == false) &&
-      (useOpt == false)) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "Not enough memory to load databases.  Increase -memory.\n");
-    fprintf(stderr, "\n");
-    exit(1);
+    labels[ll][0] = '\t';
+    strcpy(labels[ll] + 1, klabel[ll]);
   }
 
-  if (doEstimate == true) {
-    fprintf(stderr, "-- Stopping after memory estimated reported; -estimate option enabled.\n");
-    exit(0);
+  //  Scan each sequence against each database.
+
+  char     fString[65];
+  char     rString[65];
+  dnaSeq   seq;
+
+  for (uint32 seqId=0; sfile->loadSequence(seq); seqId++) {
+    kmerIterator  kiter(seq.bases(), seq.length());
+
+    while (kiter.nextBase()) {
+      if (kiter.isValid() == false) {
+        fprintf(ofile->file(), "%s\t%u\t%lu\t%c\n",
+                seq.name(),
+                seqId,
+                kiter.position(),
+                kiter.isACGTbgn() ? 'n' : 'N');
+      }
+
+      else {
+        for (uint32 dd=0; dd<klookup.size(); dd++) {
+          uint64  fValue = 0;
+          uint64  rValue = 0;
+          bool    fExists = klookup[dd]->exists(kiter.fmer(), fValue);
+          bool    rExists = klookup[dd]->exists(kiter.rmer(), rValue);
+
+          fprintf(ofile->file(), "%s\t%u\t%lu\t%c\t%s\t%lu\t%s\t%lu\t%s\n",
+                  seq.name(),
+                  seqId,
+                  kiter.position(),
+                  (fExists || rExists) ? 'T' : 'F',
+                  kiter.fmer().toString(fString), fValue,
+                  kiter.rmer().toString(rString), rValue,
+                  labels[dd]);
+        }
+      }
+    }
   }
+}
 
-  //  Now load the data and forget about the input databases.
 
-  for (uint32 ii=0; ii<lookupDBname.size(); ii++) {
-    fprintf(stderr, "--\n");
-    fprintf(stderr, "-- Loading kmers from '%s' into lookup table.\n", lookupDBname[ii]);
-    fprintf(stderr, "--\n");
 
-    if (lookupDBs[ii]->load(merylDBs[ii], maxMemory, useMin, useOpt, minV, maxV) == false)
-      exit(1);
+void
+reportExistence(dnaSeqFile                  *sfile,
+                compressedFileWriter        *ofile,
+                vector<merylExactLookup *>  &klookup,
+                vector<const char *>        &klabel) {
+  dnaSeq   seq;
 
-    delete merylDBs[ii];
+  while (sfile->loadSequence(seq)) {
+    kmerIterator  kiter(seq.bases(), seq.length());
+
+    uint64   nKmer      = 0;
+    uint64   nKmerFound = 0;
+
+    while (kiter.nextMer()) {
+      nKmer++;
+
+      if ((klookup[0]->value(kiter.fmer()) > 0) ||
+          (klookup[0]->value(kiter.rmer()) > 0))
+        nKmerFound++;
+    }
+
+    fprintf(ofile->file(), "%s\t%lu\t%lu\t%lu\n", seq.name(), nKmer, klookup[0]->nKmers(), nKmerFound);
   }
 }
 
 
 
-//  Open input sequences.
 void
-lookupGlobal::openInputs(void) {
+filter(dnaSeqFile                      *sfile1,
+       dnaSeqFile                      *sfile2,
+       compressedFileWriter            *ofile1,
+       compressedFileWriter            *ofile2,
+       vector<merylExactLookup *>  &klookup,
+       bool                             outputIfFound) {
 
-  if (seqName1) {
-    fprintf(stderr, "-- Opening input sequences '%s'.\n", seqName1);
-    seqFile1 = new dnaSeqFile(seqName1);
-  }
+  //  Do nothing if there are no sequences.
 
-  if (seqName2) {
-    fprintf(stderr, "-- Opening input sequences '%s'.\n", seqName2);
-    seqFile2 = new dnaSeqFile(seqName2);
-  }
-}
+  if ((sfile1 == NULL) && (sfile2 == NULL))
+    return;
 
+  //  While we load sequences from all files supplied...
 
+  dnaSeq  seq1;
+  dnaSeq  seq2;
 
-//  Open output writers.
-void
-lookupGlobal::openOutputs(void) {
+  uint64   nReads      = 0;
+  uint64   nReadsFound = 0;
 
-  if (outName1) {
-    fprintf(stderr, "-- Opening output file '%s'.\n", outName1);
-    outFile1 = new compressedFileWriter(outName1);
-  }
+  while (((sfile1 == NULL) || (sfile1->loadSequence(seq1))) &&
+         ((sfile2 == NULL) || (sfile2->loadSequence(seq2)))) {
+    uint32 nKmerFound = 0;
 
-  if (outName2) {
-    fprintf(stderr, "-- Opening output file '%s'.\n", outName1);
-    outFile2 = new compressedFileWriter(outName2);
-  }
-}
+    nReads++;
+
+    if (seq1.length() > 0) {
+      kmerIterator  kiter(seq1.bases(), seq1.length());
+
+      while (kiter.nextMer())
+        if ((klookup[0]->value(kiter.fmer()) > 0) ||
+            (klookup[0]->value(kiter.rmer()) > 0))
+          nKmerFound++;
+    }
+
+    if (seq2.length() > 0) {
+      kmerIterator  kiter(seq2.bases(), seq2.length());
 
+      while (kiter.nextMer())
+        if ((klookup[0]->value(kiter.fmer()) > 0) ||
+            (klookup[0]->value(kiter.rmer()) > 0))
+          nKmerFound++;
+    }
+
+    //  Report the sequence if:
+    //    any kmers are found and     ifFound
+    //    no  kmers are found and not ifFound
+
+    if ((nKmerFound > 0) == outputIfFound) {
+      nReadsFound++;
 
+      if (sfile1 != NULL) {
+        if (seq1.quals()[0] == 0)   fprintf(ofile1->file(), ">%s nKmers=%u\n%s\n",        seq1.name(), nKmerFound, seq1.bases());
+        else                        fprintf(ofile1->file(), "@%s nKmers=%u\n%s\n+\n%s\n", seq1.name(), nKmerFound, seq1.bases(), seq1.quals());
+      }
+
+      if (sfile2 != NULL) {
+        if (seq2.quals()[0] == 0)   fprintf(ofile2->file(), ">%s nKmers=%u\n%s\n",        seq2.name(), nKmerFound, seq2.bases());
+        else                        fprintf(ofile2->file(), "@%s nKmers=%u\n%s\n+\n%s\n", seq2.name(), nKmerFound, seq2.bases(), seq2.quals());
+      }
+    }
+  }
 
+  fprintf(stderr, "\nIncluding %lu reads (or read pairs) out of %lu.\n", nReadsFound, nReads);
+}
 
 
 
 int
 main(int argc, char **argv) {
-  lookupGlobal  *G = new lookupGlobal;
+  char           *seqName1 = NULL;
+  char           *seqName2 = NULL;
+
+  char           *outName1 = NULL;
+  char           *outName2 = NULL;
+
+  vector<const char *>  inputDBname;
+  vector<const char *>  inputDBlabel;
+
+  uint64          minV       = 0;
+  uint64          maxV       = UINT64_MAX;
+  uint32          threads    = omp_get_max_threads();
+  uint32          memory     = 0;
+  uint32          reportType = OP_NONE;
 
   argc = AS_configure(argc, argv);
 
-  std::vector<char const *>  err;
-  for (int32 arg=1; arg < argc; arg++) {
+  vector<char *>  err;
+  int             arg = 1;
+  while (arg < argc) {
     if        (strcmp(argv[arg], "-sequence") == 0) {
-      G->seqName1 = argv[++arg];
+      seqName1 = argv[++arg];
 
       if ((arg + 1 < argc) && (argv[arg + 1][0] != '-'))
-        G->seqName2 = argv[++arg];
+        seqName2 = argv[++arg];
 
     } else if (strcmp(argv[arg], "-mers") == 0) {
       while ((arg + 1 < argc) && (argv[arg + 1][0] != '-'))
-        G->lookupDBname.push_back(argv[++arg]);
+        inputDBname.push_back(argv[++arg]);
 
     } else if (strcmp(argv[arg], "-labels") == 0) {
       while ((arg + 1 < argc) && (argv[arg + 1][0] != '-'))
-        G->lookupDBlabel.push_back(argv[++arg]);
+        inputDBlabel.push_back(argv[++arg]);
 
     } else if (strcmp(argv[arg], "-output") == 0) {
-      G->outName1 = argv[++arg];
+      outName1 = argv[++arg];
 
       if ((arg + 1 < argc) && (argv[arg + 1][0] != '-'))
-        G->outName2 = argv[++arg];
+        outName2 = argv[++arg];
 
     } else if (strcmp(argv[arg], "-min") == 0) {
-      G->minV = (kmvalu)strtouint32(argv[++arg]);
+      minV = strtouint64(argv[++arg]);
 
     } else if (strcmp(argv[arg], "-max") == 0) {
-      G->maxV = (kmvalu)strtouint32(argv[++arg]);
+      maxV = strtouint64(argv[++arg]);
 
     } else if (strcmp(argv[arg], "-threads") == 0) {
-      G->nThreads = strtouint32(argv[++arg]);
+      threads = strtouint32(argv[++arg]);
 
     } else if (strcmp(argv[arg], "-memory") == 0) {
-      G->maxMemory = strtodouble(argv[++arg]);
+      memory = strtouint32(argv[++arg]);
 
     } else if (strcmp(argv[arg], "-dump") == 0) {
-      G->reportType = lookupOp::opDump;
+      reportType = OP_DUMP;
 
     } else if (strcmp(argv[arg], "-existence") == 0) {
-      G->reportType = lookupOp::opExistence;
+      reportType = OP_EXISTENCE;
 
     } else if (strcmp(argv[arg], "-include") == 0) {
-      G->reportType = lookupOp::opInclude;
+      reportType = OP_INCLUDE;
 
     } else if (strcmp(argv[arg], "-exclude") == 0) {
-      G->reportType = lookupOp::opExclude;
-
-    } else if (strcmp(argv[arg], "-estimate") == 0) {
-      G->doEstimate = true;
-
-    } else if (strcmp(argv[arg], "-V") == 0) {
-      G->showProgress = true;
+      reportType = OP_EXCLUDE;
 
     } else {
       char *s = new char [1024];
       snprintf(s, 1024, "Unknown option '%s'.\n", argv[arg]);
       err.push_back(s);
     }
-  }
-
-  //  Check for invalid usage.
-
-  if (G->reportType == lookupOp::opNone) {
-    err.push_back("No report-type (-existence, -dump, -include, -exclude) supplied.\n");
-  }
-
-  if (G->reportType == lookupOp::opDump) {
-    if (G->seqName1 == nullptr)  err.push_back("No input sequences (-sequence) supplied.\n");
-    if (G->seqName2 != nullptr)  err.push_back("Only one input sequence (-sequence) supported for -dump.\n");
-
-    if (G->outName1 == nullptr)  err.push_back("No output file (-output) supplied.\n");
-    if (G->outName2 != nullptr)  err.push_back("Only one output file (-output) supported for -dump.\n");
 
-    if (G->lookupDBname.size() == 0) err.push_back("No meryl database (-mers) supplied.\n");
-    if (G->lookupDBname.size()  > 1) err.push_back("Only one meryl database (-mers) supported for -dump.\n");
-  }
-
-  if (G->reportType == lookupOp::opExistence) {
-    if (G->seqName1 == nullptr)  err.push_back("No input sequences (-sequence) supplied.\n");
-    if (G->seqName2 != nullptr)  err.push_back("Only one input sequence (-sequence) supported for -existence.\n");
-
-    if (G->outName1 == nullptr)  err.push_back("No output file (-output) supplied.\n");
-    if (G->outName2 != nullptr)  err.push_back("Only one output file (-output) supported for -existence.\n");
-
-    if (G->lookupDBname.size() == 0) err.push_back("No meryl database (-mers) supplied.\n");
-  }
-
-  if ((G->reportType == lookupOp::opInclude) ||
-      (G->reportType == lookupOp::opExclude)) {
-    if (G->seqName1 == nullptr)  err.push_back("No input sequences (-sequence) supplied.\n");
-    if (G->outName1 == nullptr)  err.push_back("No output file (-output) supplied.\n");
-
-    if ((G->seqName2 != nullptr) &&
-        (G->outName2 == nullptr)) err.push_back("No second output file (-output) supplied for second input (-input) file.\n");
-
-    if ((G->seqName2 == nullptr) &&
-        (G->outName2 != nullptr)) err.push_back("No second input file (-input) supplied for second output (-output) file.\n");
-
-    if (G->lookupDBname.size() == 0) err.push_back("No meryl database (-mers) supplied.\n");
-    if (G->lookupDBname.size()  > 1) err.push_back("Only one meryl database (-mers) supported for -include or -exclude.\n");
+    arg++;
   }
 
+  if ((seqName1 == NULL) && (seqName2 == NULL))
+    err.push_back("No input sequences (-sequence) supplied.\n");
+  if (inputDBname.size() == 0)
+    err.push_back("No query meryl database (-mers) supplied.\n");
+  if (reportType == OP_NONE)
+    err.push_back("No report-type (-existence, etc) supplied.\n");
 
   if (err.size() > 0) {
     fprintf(stderr, "usage: %s <report-type> \\\n", argv[0]);
-    fprintf(stderr, "        [-estimate] \\\n");
     fprintf(stderr, "         -sequence <input1.fasta> [<input2.fasta>] \\\n");
     fprintf(stderr, "         -output   <output1>      [<output2>]\n");
     fprintf(stderr, "         -mers     <input1.meryl> [<input2.meryl>] [...] \\\n");
@@ -260,7 +292,7 @@ main(int argc, char **argv) {
     fprintf(stderr, "\n");
     fprintf(stderr, "  Multiple databases are supported.\n");
     fprintf(stderr, "\n");
-    fprintf(stderr, "  Up to two input sequences are supported (only for -include / -exclude).\n");
+    fprintf(stderr, "  Up to two inptu sequences are supported (only for -include / -exclude).\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "  Input files can be FASTA or FASTQ; uncompressed, gz, bz2 or xz compressed\n");
     fprintf(stderr, "\n");
@@ -278,25 +310,18 @@ main(int argc, char **argv) {
     fprintf(stderr, "  exits with an error.\n");
     fprintf(stderr, "    -memory m   Don't use more than m GB memory\n");
     fprintf(stderr, "\n");
-    fprintf(stderr, "  If -estimate is supplied, processing will stop after a (quick) estimate\n");
-    fprintf(stderr, "  of memory needed to load the databases is written to stdout.\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "\n");
     fprintf(stderr, "  Exactly one report type must be specified.\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "  -existence");
     fprintf(stderr, "    Report a tab-delimited line for each sequence showing the number of kmers\n");
     fprintf(stderr, "    in the sequence, in the database, and in both.\n");
     fprintf(stderr, "\n");
-    fprintf(stderr, "    Multiple input -mers may be supplied.  If no output is supplied, output is written\n");
-    fprintf(stderr, "    to stdout.\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "    output:  seqName <tab> mersInSeq <tab> mersInDB1 <tab> mersInSeq&DB1 [ <tab> mersInDB2 <tab> mersInSeq&DB2 ... ]\n");
-    fprintf(stderr, "      seqName      - name of the sequence\n");
-    fprintf(stderr, "      mersInSeq    - number of mers in the sequence\n");
-    fprintf(stderr, "      mersInDB     - number of mers in the meryl database\n");
-    fprintf(stderr, "      mersInSeq&DB - number of mers in the sequence that are\n");
-    fprintf(stderr, "                     also in the database\n");
+    fprintf(stderr, "    output:  seqName <tab> mersInSeq <tab> mersInDB <tab> mersInBoth\n");
+    fprintf(stderr, "      seqName    - name of the sequence\n");
+    fprintf(stderr, "      mersInSeq  - number of mers in the sequence\n");
+    fprintf(stderr, "      mersInDB   - number of mers in the meryl database\n");
+    fprintf(stderr, "      mersInBoth - number of mers in the sequence that are\n");
+    fprintf(stderr, "                   also in the database\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "  -dump\n");
@@ -304,9 +329,6 @@ main(int argc, char **argv) {
     fprintf(stderr, "    order, annotated with the value of the kmer in the input database.  If the kmer\n");
     fprintf(stderr, "    does not exist in the database its value will be reported as zero.\n");
     fprintf(stderr, "\n");
-    fprintf(stderr, "    Only one input may be supplied.  If no output is supplied, output is written\n");
-    fprintf(stderr, "    to stdout.\n");
-    fprintf(stderr, "\n");
     fprintf(stderr, "    output:  seqName <tab> seqId <tab> seqPos <tab> exists <tab> fwd-mer <tab> fwd-val <tab> rev-mer <tab> rev-val\n");
     fprintf(stderr, "      seqName    - name of the sequence this kmer is from\n");
     fprintf(stderr, "      seqId      - numeric version of the seqName (0-based)\n");
@@ -317,14 +339,14 @@ main(int argc, char **argv) {
     fprintf(stderr, "      rev-mer    - reverse mer sequence\n");
     fprintf(stderr, "      rev-val    - value of the reverse mer in the database\n");
     fprintf(stderr, "\n");
-    fprintf(stderr, "\n");
     fprintf(stderr, "  -include / -exclude\n");
     fprintf(stderr, "    Extract sequences containing (-include) or not containing (-exclude) kmers in\n");
     fprintf(stderr, "    any input database.  Output sequences are written in the same format as the input\n");
     fprintf(stderr, "    sequences, with the number of kmers found added to the name.\n");
     fprintf(stderr, "\n");
-    fprintf(stderr, "    If two input files are supplied, the corresponding sequences are treated as a pair,\n");
-    fprintf(stderr, "    and two output files MUST be supplied.\n");
+    fprintf(stderr, "    If two input files are supplied, the corresponding sequences are treated as a pair.\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "    output:  sequence given format (fasta or fastq) with the number of overlapping kmers appended\n");
     fprintf(stderr, "             if pairs of sequences are given, R1 will be stdout and R2 be named as <output.r2>\n");
@@ -332,6 +354,13 @@ main(int argc, char **argv) {
     fprintf(stderr, "      seqName    - name of the sequence this kmer is from\n");
     fprintf(stderr, "      mersInBoth - number of mers in both sequence and in the database\n");
     fprintf(stderr, "\n");
+    fprintf(stderr, "  -exclude       Extract sequences *NOT containing* kmers in <input.meryl>.\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "     output:  sequence given format (fasta or fastq) without reads containing kmers\n");
+    fprintf(stderr, "              if pairs of sequences are given, R1 will be stdout and R2 be named as <output.r2>\n");
+    fprintf(stderr, "              <output.r2> will be automatically compressed if ends with .gz, .bz2, or xs\n");
+    fprintf(stderr, "         seqName    - name of the sequence this kmer is from\n");
+    fprintf(stderr, "\n");
 
     for (uint32 ii=0; ii<err.size(); ii++)
       if (err[ii])
@@ -340,26 +369,76 @@ main(int argc, char **argv) {
     exit(1);
   }
 
-  G->initialize();
-  G->loadLookupTables();
-  G->openInputs();
-  G->openOutputs();
-
-  switch (G->reportType) {
-    case lookupOp::opNone:                               break;
-    case lookupOp::opDump:         dumpExistence(G);     break;
-    case lookupOp::opExistence:    reportExistence(G);   break;
-    case lookupOp::opInclude:      filter(G);            break;
-    case lookupOp::opExclude:      filter(G);            break;
-    default:                                             break;
+  omp_set_num_threads(threads);
+
+  //  Open the kmers, build a lookup table.
+
+  vector<merylExactLookup *>  kmerLookups;
+
+  for (uint32 ii=0; ii<inputDBname.size(); ii++) {
+    fprintf(stderr, "-- Loading kmers from '%s' into lookup table.\n", inputDBname[ii]);
+
+    merylFileReader   *merylDB    = new merylFileReader(inputDBname[ii]);
+    merylExactLookup  *kmerLookup = new merylExactLookup(merylDB, memory, minV, maxV);
+
+    kmerLookups.push_back(kmerLookup);
+
+    if (kmerLookup->configure() == false)
+      exit(1);
+
+    kmerLookup->load();
+
+    delete merylDB;   //  Not needed anymore.
   }
 
-  delete G;
-  fprintf(stderr, "Bye!\n");
+  //  Open input sequences.
 
-  return(0);
-}
+  dnaSeqFile  *seqFile1 = NULL;
+  dnaSeqFile  *seqFile2 = NULL;
+
+  if (seqName1 != NULL) {
+    fprintf(stderr, "-- Opening sequences in '%s'.\n", seqName1);
+
+    seqFile1 = new dnaSeqFile(seqName1);
+  }
+
+  if (seqName2 != NULL) {
+    fprintf(stderr, "-- Opening sequences in '%s'.\n", seqName2);
+
+    seqFile2 = new dnaSeqFile(seqName2);
+  }
+
+  //  Open output writers.
+
+  compressedFileWriter  *outFile1 = (outName1 == NULL) ? NULL : new compressedFileWriter(outName1);
+  compressedFileWriter  *outFile2 = (outName2 == NULL) ? NULL : new compressedFileWriter(outName2);
+
+  //  Do something.
+
+  if (reportType == OP_DUMP)
+    dumpExistence(seqFile1, outFile1, kmerLookups, inputDBlabel);
+
+  if (reportType == OP_EXISTENCE)
+    reportExistence(seqFile1, outFile1, kmerLookups, inputDBlabel);
 
+  if (reportType == OP_INCLUDE)
+    filter(seqFile1, seqFile2, outFile1, outFile2, kmerLookups, true);
 
+  if (reportType == OP_EXCLUDE)
+    filter(seqFile1, seqFile2, outFile1, outFile2, kmerLookups, false);
 
+  //  Done!
 
+  delete seqFile1;
+  delete seqFile2;
+
+  delete outFile1;
+  delete outFile2;
+
+  for (uint32 ii=0; ii<kmerLookups.size(); ii++)
+    delete kmerLookups[ii];
+
+  fprintf(stderr, "Bye!\n");
+
+  exit(0);
+}
diff --git a/ext/meryl/src/meryl-lookup/meryl-lookup.H b/ext/meryl/src/meryl-lookup/meryl-lookup.H
deleted file mode 100644
index e631f13..0000000
--- a/ext/meryl/src/meryl-lookup/meryl-lookup.H
+++ /dev/null
@@ -1,101 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl, a genomic k-kmer counter with nice features.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "runtime.H"
-
-#include "sequence.H"
-#include "kmers.H"
-
-
-enum class lookupOp {
-  opNone,
-  opDump,
-  opExistence,
-  opInclude,
-  opExclude
-};
-
-
-
-class lookupGlobal {
-public:
-  lookupGlobal() {
-  };
-
-  ~lookupGlobal() {
-    delete [] outstring;
-
-    for (uint32 ii=0; ii<lookupDBs.size(); ii++)
-      delete lookupDBs[ii];
-
-    delete seqFile1;
-    delete seqFile2;
-
-    delete outFile1;
-    delete outFile2;
-  };
-
-  void initialize(void);
-  void loadLookupTables(void);
-  void openInputs(void);
-  void openOutputs(void);
-
-  //  Inputs.
-
-  uint32                            nThreads     = getMaxThreadsAllowed();
-  double                            maxMemory    = getMaxMemoryAllowed() / 1024.0 / 1024.0 / 1024.0;
-
-  char const                       *seqName1     = nullptr;
-  char const                       *seqName2     = nullptr;
-
-  dnaSeqFile                       *seqFile1     = nullptr;
-  dnaSeqFile                       *seqFile2     = nullptr;
-
-  char const                       *outName1     = "-";
-  char const                       *outName2     = nullptr;
-
-  compressedFileWriter             *outFile1     = nullptr;
-  compressedFileWriter             *outFile2     = nullptr;
-
-
-  std::vector<const char *>         lookupDBname;
-  std::vector<const char *>         lookupDBlabel;
-  std::vector<merylExactLookup *>   lookupDBs;   //  Kmer lookup table.
-
-  kmvalu                            minV         = 0;
-  kmvalu                            maxV         = kmvalumax;
-
-  lookupOp                          reportType   = lookupOp::opNone;
-
-  bool                              doEstimate   = false;
-  bool                              showProgress = false;
-
-  //  Outputs for existence.
-
-  char                             *outstring    = nullptr;
-  uint32                            outstringMax = 0;
-
-  //  Outputs for include/exclude.
-
-  uint64                            nReadsTotal  = 0;
-  uint64                            nReadsFound  = 0;
-
-};
-
-
-
diff --git a/ext/meryl/src/meryl-lookup/meryl-lookup.mk b/ext/meryl/src/meryl-lookup/meryl-lookup.mk
index d9b7967..5c7cfcf 100644
--- a/ext/meryl/src/meryl-lookup/meryl-lookup.mk
+++ b/ext/meryl/src/meryl-lookup/meryl-lookup.mk
@@ -1,29 +1,35 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET   := meryl-lookup
-SOURCES  := meryl-lookup.C \
-            dump.C \
-            existence.C \
-            include-exclude.C
+SOURCES  := meryl-lookup.C
 
-SRC_INCDIRS := .
+SRC_INCDIRS  := . ../utility/src/utility ../meryl
+
+#  If we're part of Canu, build with canu support.
+#  Otherwise, don't.
 
-#  If we're part of Canu, build with canu support and use Canu's copy of
-#  meryl-utility.  Otherwise, don't.
 ifneq ($(wildcard stores/sqStore.H), )
-  SRC_CXXFLAGS := -DCANU
-  SRC_INCDIRS  += ../../../utility/src/utility ../../../stores
 
-#  If we're part of something else, include the something else's
-#  utility directory.
-else ifneq ($(wildcard meryl/src/meryl/meryl.C), )
-  SRC_INCDIRS  += ../../../utility/src/utility
+SRC_CXXFLAGS := -DCANU
+
+TGT_LDFLAGS := -L${TARGET_DIR}/lib
+TGT_LDLIBS  := -lcanu
+TGT_PREREQS := libcanu.a
 
-#  Otherwise, we're building directly in the meryl repo.
 else
-  SRC_INCDIRS  += ../utility/src/utility
 
-endif
+TGT_LDFLAGS := -L${TARGET_DIR}/lib
+TGT_LDLIBS  := -lmeryl
+TGT_PREREQS := libmeryl.a
 
+endif
 
-TGT_LDFLAGS  := -L${TARGET_DIR}/lib
-TGT_LDLIBS   := -l${MODULE}
-TGT_PREREQS  := lib${MODULE}.a
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/meryl-simple/meryl-simple.C b/ext/meryl/src/meryl-simple/meryl-simple.C
index a666aef..43ead0f 100644
--- a/ext/meryl/src/meryl-simple/meryl-simple.C
+++ b/ext/meryl/src/meryl-simple/meryl-simple.C
@@ -36,7 +36,7 @@ main(int argc, char **argv) {
 
   argc = AS_configure(argc, argv);
 
-  std::vector<char *>  err;
+  vector<char *>  err;
   for (int32 arg=1; arg < argc; arg++) {
     if      (strcmp(argv[arg], "-k") == 0) {
       kSize = strtouint32(argv[++arg]);
@@ -152,7 +152,7 @@ main(int argc, char **argv) {
 
   fprintf(stderr, "-- Sorting %lu kmers.\n", kmersLen);
 
-  std::sort(kmers, kmers + kmersLen);
+  sort(kmers, kmers + kmersLen);
 
   //  Scan, count and output stuff.
 
diff --git a/ext/meryl/src/meryl-simple/meryl-simple.mk b/ext/meryl/src/meryl-simple/meryl-simple.mk
index a22a287..9e7e0f1 100644
--- a/ext/meryl/src/meryl-simple/meryl-simple.mk
+++ b/ext/meryl/src/meryl-simple/meryl-simple.mk
@@ -1,26 +1,37 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET   := meryl-simple
 SOURCES  := meryl-simple.C
 
-SRC_INCDIRS := .
+SRC_INCDIRS  := . ../utility/src/utility
 
 #  If we're part of Canu, build with canu support and use Canu's copy of
 #  meryl-utility.  Otherwise, don't.
+
 ifneq ($(wildcard stores/sqStore.H), )
-  SRC_CXXFLAGS := -DCANU
-  SRC_INCDIRS  := ../../../utility/src/utility ../../../stores
 
-#  If we're part of something else, include the something else's
-#  utility directory.
-else ifneq ($(wildcard meryl/src/meryl/meryl.C), )
-  SRC_INCDIRS  := ../../../utility/src/utility
+SRC_CXXFLAGS := -DCANU
+
+SRC_INCDIRS  := . ../../../utility/src/utility ../../../stores
+
+TGT_LDFLAGS := -L${TARGET_DIR}/lib
+TGT_LDLIBS  := -lcanu
+TGT_PREREQS := libcanu.a
 
-#  Otherwise, we're building directly in the meryl repo.
 else
-  SRC_INCDIRS  := ../utility/src/utility
 
-endif
+TGT_LDFLAGS := -L${TARGET_DIR}/lib
+TGT_LDLIBS  := -lmeryl
+TGT_PREREQS := libmeryl.a
 
+endif
 
-TGT_LDFLAGS  := -L${TARGET_DIR}/lib
-TGT_LDLIBS   := -l${MODULE}
-TGT_PREREQS  := lib${MODULE}.a
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/meryl/meryl.C b/ext/meryl/src/meryl/meryl.C
index 963fd5f..fd24e79 100644
--- a/ext/meryl/src/meryl/meryl.C
+++ b/ext/meryl/src/meryl/meryl.C
@@ -30,23 +30,23 @@ main(int argc, char **argv) {
 
   argc = AS_configure(argc, argv);
 
-  std::vector<char *>  err;
+  vector<char *>  err;
   for (int32 arg=1; arg < argc; arg++) {
 
     //
     //  Scan for debug options and requests for help.
     //
 
-    if (strcmp(argv[arg], "dumpIndex") == 0) {         //  Report the index for the dataset.
-      arg++;                                           //  It's just the parameters used for encoding.
-      delete new merylFileReader(argv[arg++], true);   //  Expects a meryl db directory as a parameter.
-      exit(0);
+    if (strcmp(argv[arg], "dumpIndex") == 0) {               //  Report the index for the dataset.
+      arg++;                                                 //  It's just the parameters used for encoding.
+      delete new merylFileReader(argv[arg++], true);
+      continue;
     }
 
-    if (strcmp(argv[arg], "dumpFile") == 0) {          //  Dump the index for a single data file.
-      arg++;                                           //  Expects a meryl file prefix as a parameter.
-      dumpMerylDataFile(argv[arg++]);                  //  (e.g., db.meryl/0x000000)
-      exit(0);
+    if (strcmp(argv[arg], "dumpFile") == 0) {                //  Dump the index for a single data file.
+      arg++;
+      dumpMerylDataFile(argv[arg++]);
+      continue;
     }
 
     if ((strcmp(argv[arg], "-h")   == 0) ||
@@ -103,8 +103,6 @@ main(int argc, char **argv) {
     fprintf(stderr, "\n");
     fprintf(stderr, "  COMMANDS:\n");
     fprintf(stderr, "\n");
-    fprintf(stderr, "    statistics           display total, unique, distnict, present number of the kmers on the screen.  accepts exactly one input.\n");
-    fprintf(stderr, "    histogram            display kmer frequency on the screen as 'frequency<tab>count'.  accepts exactly one input.\n");
     fprintf(stderr, "    print                display kmers on the screen as 'kmer<tab>count'.  accepts exactly one input.\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "    count                Count the occurrences of canonical kmers in the input.  must have 'output' specified.\n");
@@ -124,7 +122,6 @@ main(int argc, char **argv) {
     fprintf(stderr, "    decrease X           subtract X from the count of each kmer.\n");
     fprintf(stderr, "    multiply X           multiply the count of each kmer by X.\n");
     fprintf(stderr, "    divide X             divide the count of each kmer by X.\n");
-    fprintf(stderr, "    divide-round X       divide the count of each kmer by X and round results. count < X will become 1.\n");
     fprintf(stderr, "    modulo X             set the count of each kmer to the remainder of the count divided by X.\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "    union                return kmers that occur in any input, set the count to the number of inputs with this kmer.\n");
@@ -137,8 +134,6 @@ main(int argc, char **argv) {
     fprintf(stderr, "    intersect-max        return kmers that occur in all inputs, set the count to the maximum count.\n");
     fprintf(stderr, "    intersect-sum        return kmers that occur in all inputs, set the count to the sum of the counts.\n");
     fprintf(stderr, "\n");
-    fprintf(stderr, "    subtract             return kmers that occur in the first input, subtracting counts from the other inputs\n");
-    fprintf(stderr, "\n");
     fprintf(stderr, "    difference           return kmers that occur in the first input, but none of the other inputs\n");
     fprintf(stderr, "    symmetric-difference return kmers that occur in exactly one input\n");
     fprintf(stderr, "\n");
@@ -241,7 +236,7 @@ main(int argc, char **argv) {
       continue;
 
     fprintf(stderr, "\n");
-    fprintf(stderr, "PROCESSING TREE #%u using %u thread%s.\n", rr+1, getMaxThreadsAllowed(), getMaxThreadsAllowed() == 1 ? "" : "s");
+    fprintf(stderr, "PROCESSING TREE #%u using %u thread%s.\n", rr+1, omp_get_max_threads(), omp_get_max_threads() == 1 ? "" : "s");
     B->printTree(root, 2);
 
 #pragma omp parallel for schedule(dynamic, 1)
diff --git a/ext/meryl/src/meryl/meryl.H b/ext/meryl/src/meryl/meryl.H
index 7c0e2f9..cf0c7d5 100644
--- a/ext/meryl/src/meryl/meryl.H
+++ b/ext/meryl/src/meryl/meryl.H
@@ -28,6 +28,7 @@
 #include <stack>
 #include <vector>
 #include <algorithm>
+using namespace std;
 
 
 class merylCommandBuilder {
@@ -45,7 +46,7 @@ public:
   bool    isPrinter(void);
 
   bool    isMerylInput(void);
-  bool    isCanuInput(std::vector<char *> &err);
+  bool    isCanuInput(vector<char *> &err);
   bool    isSequenceInput(void);
 
   void    finalize(void);
@@ -97,11 +98,12 @@ private:
   //
   //  _opList is a list of operations.
 
-  std::stack <merylOperation *>   _opStack;
-  std::vector<merylOperation *>   _opList;
-  merylOperation                **_thList[64] = { nullptr };   //  Mirrors opList
+  stack <merylOperation *>   _opStack;
+  vector<merylOperation *>   _opList;
+  merylOperation           **_thList[64] = { nullptr };   //  Mirrors opList
+
+  vector<uint32>             _opRoot;
 
-  std::vector<uint32>             _opRoot;
 };
 
 
diff --git a/ext/meryl/src/meryl/meryl.mk b/ext/meryl/src/meryl/meryl.mk
index f7541df..8aa77b9 100644
--- a/ext/meryl/src/meryl/meryl.mk
+++ b/ext/meryl/src/meryl/meryl.mk
@@ -1,3 +1,13 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET   := meryl
 SOURCES  := meryl.C \
             merylCommandBuilder.C \
@@ -10,26 +20,27 @@ SOURCES  := meryl.C \
             merylOp-nextMer.C \
             merylOp.C
 
-SRC_INCDIRS := .
+SRC_INCDIRS  := . ../utility/src/utility
 
 #  If we're part of Canu, build with canu support and use Canu's copy of
 #  meryl-utility.  Otherwise, don't.
+
 ifneq ($(wildcard stores/sqStore.H), )
-  SRC_CXXFLAGS := -DCANU
-  SRC_INCDIRS  := ../../../utility/src/utility ../../../stores
 
-#  If we're part of something else, include the something else's
-#  utility directory.
-else ifneq ($(wildcard meryl/src/meryl/meryl.C), )
-  SRC_INCDIRS  := ../../../utility/src/utility
+SRC_CXXFLAGS := -DCANU
+
+SRC_INCDIRS  := . ../../../utility/src/utility ../../../stores
+
+TGT_LDFLAGS := -L${TARGET_DIR}/lib
+TGT_LDLIBS  := -lcanu
+TGT_PREREQS := libcanu.a
 
-#  Otherwise, we're building directly in the meryl repo.
 else
-  SRC_INCDIRS  := ../utility/src/utility
 
-endif
+TGT_LDFLAGS := -L${TARGET_DIR}/lib
+TGT_LDLIBS  := -lmeryl
+TGT_PREREQS := libmeryl.a
 
+endif
 
-TGT_LDFLAGS  := -L${TARGET_DIR}/lib
-TGT_LDLIBS   := -l${MODULE}
-TGT_PREREQS  := lib${MODULE}.a
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/meryl/merylCommandBuilder.C b/ext/meryl/src/meryl/merylCommandBuilder.C
index 2755612..0523c54 100644
--- a/ext/meryl/src/meryl/merylCommandBuilder.C
+++ b/ext/meryl/src/meryl/merylCommandBuilder.C
@@ -57,8 +57,8 @@ isNumber(char *s, char dot='.') {
 
 //  Everything is initialized in the declaration.  Nothing really to do here.
 merylCommandBuilder::merylCommandBuilder() {
-  _allowedThreads = getMaxThreadsAllowed();   //  Absolute maximum limits on
-  _allowedMemory  = getMaxMemoryAllowed();    //  memory= and threads= values
+  _allowedThreads = omp_get_max_threads();     //  Absolute maximum limits on
+  _allowedMemory  = getPhysicalMemorySize();   //  memory= and threads= values.
 }
 
 
@@ -155,7 +155,7 @@ merylCommandBuilder::initialize(char *opt) {
 
   //  Save a few copies of the command line word.
 
-  strncpy(_inoutName, _optString, FILENAME_MAX + 1);
+  strncpy(_inoutName, _optString, FILENAME_MAX);
 
   snprintf(_indexName, FILENAME_MAX, "%s/merylIndex", _optString);
   snprintf(_sqInfName, FILENAME_MAX, "%s/info",       _optString);
@@ -214,22 +214,18 @@ merylCommandBuilder::processOptions(void) {
     return(true);
   }
 
-  //  If the string is entirely a number, treat it as either a threshold or a
-  //  constant, depending on the operation.  This is used for things like
-  //  "greater-than 45" and "divide 2".
-  //
-  //  If there is no operation, or it doesn't want a number, we fall trhough
-  //  and return 'false' when key/val is checked below.
-
-  bool  isNum = isNumber(_optString, 0);
-
-  if ((_opStack.top()->needsThreshold() == true) && (isNum == true)) {
-    _opStack.top()->setThreshold(strtouint64(_optString));
+  if (strncmp(_optString, "-E", 3) == 0) {
+#warning "-E not implemented."
+    //findMaxInputSizeForMemorySize(strtouint32(argv[arg+1]),
+    //                              (uint64)(1000000000 * strtodouble(argv[arg+2])));
     return(true);
   }
 
-  if ((_opStack.top()->needsConstant() == true) && (isNum == true)) {
-    _opStack.top()->setConstant(strtouint64(_optString));
+  //  If the string is entirely a number, treat it as a threshold.  This is
+  //  used for things like "greater-than 45".
+
+  if (isNumber(_optString, 0)) {
+    _opStack.top()->setThreshold(strtouint64(_optString));
     return(true);
   }
 
@@ -354,7 +350,6 @@ merylCommandBuilder::processOperation(void) {
   else if (0 == strcmp(_optString, "decrease"))               non = opDecrease;
   else if (0 == strcmp(_optString, "multiply"))               non = opMultiply;
   else if (0 == strcmp(_optString, "divide"))                 non = opDivide;
-  else if (0 == strcmp(_optString, "divide-round"))           non = opDivideRound;
   else if (0 == strcmp(_optString, "modulo"))                 non = opModulo;
 
   else if (0 == strcmp(_optString, "union"))                  non = opUnion;
@@ -367,8 +362,6 @@ merylCommandBuilder::processOperation(void) {
   else if (0 == strcmp(_optString, "intersect-max"))          non = opIntersectMax;
   else if (0 == strcmp(_optString, "intersect-sum"))          non = opIntersectSum;
 
-  else if (0 == strcmp(_optString, "subtract"))               non = opSubtract;
-  
   else if (0 == strcmp(_optString, "difference"))             non = opDifference;
   else if (0 == strcmp(_optString, "symmetric-difference"))   non = opSymmetricDifference;
 
@@ -507,7 +500,7 @@ merylCommandBuilder::isMerylInput(void) {
 }
 
 bool
-merylCommandBuilder::isCanuInput(std::vector<char *> &err) {
+merylCommandBuilder::isCanuInput(vector<char *> &err) {
 
   if ((fileExists(_sqInfName) == false) ||
       (fileExists(_sqRdsName) == false))
@@ -623,8 +616,6 @@ void
 merylCommandBuilder::spawnThreads(void) {
   uint32  indent = 0;
 
-  omp_set_num_threads(_allowedThreads);
-
   for (uint32 tt=0; tt<64; tt++) {
 
     //  Construct a list of operations for each thread.
diff --git a/ext/meryl/src/meryl/merylCountArray.C b/ext/meryl/src/meryl/merylCountArray.C
index 02beedf..e70a886 100644
--- a/ext/meryl/src/meryl/merylCountArray.C
+++ b/ext/meryl/src/meryl/merylCountArray.C
@@ -99,7 +99,7 @@ merylCountArray::merylCountArray(void) {
 
 
 uint64
-merylCountArray::initialize(uint64 prefix, uint32 width) {
+merylCountArray::initialize(uint64 prefix, uint32 width, uint32 segsize) {
   _sWidth       = width;
 
   _prefix       = prefix;
@@ -111,12 +111,9 @@ merylCountArray::initialize(uint64 prefix, uint32 width) {
   _bitsPerPage  = getPageSize() * 8;
   _nReAlloc     = 0;
 
-  //  Set the segment size, in bits, to be a multiple of the page size.
-  //  Reserve some space for OS allocator stuff (needs to be divisible by
-  //  64).
-  _segSize      = pagesPerSegment() * _bitsPerPage - 8 * 64;
-  _segAlloc     = 0;
-  _segments     = NULL;
+  _segSize      = 8 * (segsize * 1024 - 32);   //  Set the segment size to 'segsize' KB,
+  _segAlloc     = 0;                           //  in bits, reserving 32 bytes for
+  _segments     = NULL;                        //  allocator stuff that we don't control.
 
   _nBits        = 0;
   _nBitsTrigger = 0;
@@ -234,7 +231,7 @@ merylCountArray::removeSegments(void) {
   _nReAlloc  = 0;
 
   _segAlloc = 0;                          //  Don't forget to
-  _segments = NULL;                       //  forget about it.
+  _segments = NULL;                       //  foret about it.
 
   _nBits        = 0;                      //  Indicate that we've stored no data.
   _nBitsTrigger = 0;
@@ -255,18 +252,21 @@ void
 merylCountArray::addSegment(uint32 seg) {
 
   if (_segAlloc == 0) {
-    resizeArray(_segments, _segAlloc, _segAlloc, 64, _raAct::copyData | _raAct::clearNew);
+    resizeArray(_segments, _segAlloc, _segAlloc, 32, resizeArray_copyData | resizeArray_clearNew);
     _nReAlloc++;
   }
   if (seg >= _segAlloc) {
-    resizeArray(_segments, _segAlloc, _segAlloc, 2 * _segAlloc, _raAct::copyData | _raAct::clearNew);
+    resizeArray(_segments, _segAlloc, _segAlloc, 2 * _segAlloc, resizeArray_copyData | resizeArray_clearNew);
     _nReAlloc++;
   }
   assert(_segments[seg] == NULL);
 
+  //if (seg > 0)
+  //  fprintf(stderr, "Add segment %u\n", seg);
+
   _segments[seg] = new uint64 [_segSize / 64];
 
-  //memset(_segments[seg], 0, sizeof(uint64) * _segSize / 64);
+  memset(_segments[seg], 0, sizeof(uint64) * _segSize / 64);
 }
 
 
@@ -496,7 +496,7 @@ merylCountArray::add(kmdata suffix) {
   uint64  seg       = nBits / _segSize;    //  Which segment are we in?
   uint64  segPos    = nBits % _segSize;    //  Bit position in that segment.
 
-  _nBits += _sWidth;
+   _nBits += _sWidth;
 
   //  word position counts from high to low; 0 for the high bit and 63 for
   //  the bit that represents integer 1.
@@ -596,7 +596,7 @@ merylCountArray::add(kmdata suffix) {
 #endif
 
       _segments[seg][word+0] |= sta;
-      _segments[seg][word+1]  = end;
+      _segments[seg][word+1] |= end;
     }
 
     if (thrWord) {
@@ -618,8 +618,8 @@ merylCountArray::add(kmdata suffix) {
 #endif
 
       _segments[seg][word+0] |= sta;
-      _segments[seg][word+1]  = mid;
-      _segments[seg][word+2]  = end;
+      _segments[seg][word+1] |= mid;
+      _segments[seg][word+2] |= end;
     }
   }
 
@@ -704,7 +704,7 @@ merylCountArray::add(kmdata suffix) {
       assert(word+1 == _segSize/64-1);
 
       _segments[seg+0][word+0] |= sta;
-      _segments[seg+0][word+1]  = mid;
+      _segments[seg+0][word+1] |= mid;
     }
 
     //  Move kmer bits to one or two words in the next segment.
@@ -712,15 +712,15 @@ merylCountArray::add(kmdata suffix) {
     if (oneNext) {
       uint64  sta = (suffix << (64 - nextBits));
 
-      _segments[seg+1][0]  = sta;
+      _segments[seg+1][0] |= sta;
     }
 
     if (twoNext) {
       uint64  mid = (suffix >> (nextBits - 64));
       uint64  end = (suffix << (128 - nextBits));
 
-      _segments[seg+1][0]  = mid;
-      _segments[seg+1][1]  = end;
+      _segments[seg+1][0] |= mid;
+      _segments[seg+1][1] |= end;
     }
   }
 
@@ -763,8 +763,8 @@ merylCountArray::get(uint64 kk) {
 
   //  If the bits are entirely in one word, be done.
 
-  if (wordEnd <= 64) {
-    bits = (_segments[seg][word] >> (64 - wordEnd)) & buildLowBitMask<uint64>(_sWidth);
+  if ((wordBgn >= 0) && (wordEnd <= 64)) {
+    bits = (_segments[seg][word] >> (64 - wordEnd)) & uint64MASK(_sWidth);
 
     return(bits);
   }
diff --git a/ext/meryl/src/meryl/merylCountArray.H b/ext/meryl/src/meryl/merylCountArray.H
index 52285cc..1a15ff6 100644
--- a/ext/meryl/src/meryl/merylCountArray.H
+++ b/ext/meryl/src/meryl/merylCountArray.H
@@ -52,7 +52,7 @@ public:
   merylCountArray();
   ~merylCountArray();
 
-  uint64    initialize(uint64 prefix, uint32 width);
+  uint64    initialize(uint64 prefix, uint32 width, uint32 segsize=64);
 
   uint64    initializeValues(kmvalu maxValue=0);
 
@@ -98,50 +98,30 @@ public:
 
 
 public:
-  uint64           numBits(void)        {  return(_nBits);           };
-  uint64           numKmers(void)       {  return(_nBits / _sWidth); };
+  uint64           numBits(void)        {  return(_nBits);  };
 
+  //  Returns the number of bytes in pages touched by data in this object.
+  //  It's a pretty tight bound.  The extra 1 was added to make it
+  //  be an overestimate of what 'top' is reporting.  Without it,
+  //  it underestimates by a significant amount.
 
+  //  This is underestimating the actual resident memory usage.  The constant
+  //  +5 tries to adjust, but still underestimates on large data sets.
 
-public:
-  //  Using 1 here is probably not the most time efficient value, but the
-  //  memory usage estimate seems to be the most accurate with it.
-  //
-  static
-  uint32            pagesPerSegment(void)    { return(1); };
-
-  //  We're doing accounting ourself instead of asking the OS for the current
-  //  process size because some OSs (FreeBSD, probably MacOS) don't decrease
-  //  the size and we need to when these tables are too full opposed to
-  //  simply allocated.
-  //
-  //  If the memset in merylCountArray() is enabled, this calculation does
-  //  not represent the amount of resident memory.
-  //
-  //  The number of pages used for data is complicated.  We're allocating in
-  //  blocks of pagesPerSegment() but reserving a few words for OS overhead.
-  //
-  //    fSegmsUsed: the number of full segments allocated, _segSize bits in each.
-  //    pPagesUsed: the leftover bits, plus one partial page used
-  //
   uint64           usedSize(void) {
-    uint64 memUsed    = 0;
- 
-    uint64 fSegmsUsed = _nBits / _segSize;
-    uint64 pPagesUsed = (_nBits - fSegmsUsed * _segSize) / _bitsPerPage + 1;
+    uint64  fullSegs  = (_nBits / _segSize);        //  Number of fully filled segments
+    uint64  partSeg   = (_nBits % _segSize) + 64;   //  Number of bites (rounded to next word) in the last (partially filled) segment
 
-    uint64 pagesUsed  = fSegmsUsed * pagesPerSegment() + pPagesUsed;
+    uint64  pagesUsed = 0;
 
-    memUsed += sizeof(merylCountArray);         //  For our metadata
-    memUsed += pagesUsed * _bitsPerPage / 8;    //  For the packed kmer data
-    memUsed += _segAlloc * sizeof(uint64 **);   //  For pointers to segments
+    pagesUsed += fullSegs * (_segSize / _bitsPerPage) + fullSegs * (((_segSize % _bitsPerPage) == 0) ? 0 : 1);
+    pagesUsed +=            (partSeg  / _bitsPerPage) +            (((partSeg  % _bitsPerPage) == 0) ? 0 : 1);
+    pagesUsed += 5;
+    pagesUsed += _nReAlloc;
 
-    return(memUsed);
+    return(pagesUsed * _bitsPerPage / 8 + _segAlloc * sizeof(uint64 **) + sizeof(merylCountArray));
   };
 
-  //  Returns the change in size since the last call, but sets a threshold so
-  //  we don't spend a bunch of time calling usedSize().
-  //
   uint64           usedSizeDelta(void) {
 
     if (_nBits < _nBitsTrigger)
@@ -180,7 +160,6 @@ private:
   uint64           _bitsPerPage;
   uint64           _nReAlloc;
 
-private:
   uint32           _segSize;      //  Number of bits in each segment.
   uint32           _segAlloc;     //  Number of segments we're allowed to allocate  (size of the array below).
   uint64         **_segments;     //  An array of blocks of data.
diff --git a/ext/meryl/src/meryl/merylInput.H b/ext/meryl/src/meryl/merylInput.H
index c8fecc1..29076c5 100644
--- a/ext/meryl/src/meryl/merylInput.H
+++ b/ext/meryl/src/meryl/merylInput.H
@@ -72,9 +72,6 @@ public:
 
   bool   isMultiSet(void)         { return(_isMultiSet);        };
 
-  bool   isCompressedFile(void)   { return((_sequence != NULL) &&
-                                           (_sequence->isCompressed())); };
-
   merylOperation           *_operation;
   merylFileReader          *_stream;
   dnaSeqFile               *_sequence;
diff --git a/ext/meryl/src/meryl/merylOp-count.C b/ext/meryl/src/meryl/merylOp-count.C
index af63d88..59e1df1 100644
--- a/ext/meryl/src/meryl/merylOp-count.C
+++ b/ext/meryl/src/meryl/merylOp-count.C
@@ -20,6 +20,10 @@
 #include "strings.H"
 #include "system.H"
 
+//  The number of KB to use for a merylCountArray segment.
+#define SEGMENT_SIZE       64
+#define SEGMENT_SIZE_BITS  (SEGMENT_SIZE * 1024 * 8)
+
 
 //
 //  mcaSize       = sizeof(merylCountArray)  == 80
@@ -46,7 +50,7 @@
 //
 //  (nKmers / nPrefix+1) / mersPerSeg = (memory - mcaSize * nPrefix) / (ptrSize * nPrefix + segSize * nPrefix)
 //   nKmers / nPrefix+1  = mersPerSeg * (memory - mcaSize * nPrefix) / (ptrSize * nPrefix + segSize * nPrefix)
-#if 0
+
 uint64
 findMaxInputSizeForMemorySize(uint32 merSize, uint64 memSize) {
   uint64  mcaSize = sizeof(merylCountArray);
@@ -108,7 +112,7 @@ findMaxInputSizeForMemorySize(uint32 merSize, uint64 memSize) {
 
   exit(0);
 }
-#endif
+
 
 
 
@@ -166,18 +170,12 @@ findExpectedSimpleSize(uint64  nKmerEstimate,
 
 
 
-//  Returns bestPrefix_ and memoryUsed_ corresponding to the minimal memory
-//  estimate for the supplied nKmerEstimate.  If no estimate is below
-//  memoryAllowed, 0 and UINT64_MAX, respectively, are returned.
-//
 void
 findBestPrefixSize(uint64  nKmerEstimate,
                    uint64  memoryAllowed,
                    uint32 &bestPrefix_,
                    uint64 &memoryUsed_) {
-  uint32  merSize      = kmerTiny::merSize();
-  uint32  segSizeBits  = merylCountArray::pagesPerSegment() * getPageSize() * 8;
-  uint32  segSizeBytes = merylCountArray::pagesPerSegment() * getPageSize();
+  uint32  merSize    = kmerTiny::merSize();
 
   bestPrefix_  = 0;
   memoryUsed_  = UINT64_MAX;
@@ -195,18 +193,18 @@ findBestPrefixSize(uint64  nKmerEstimate,
   //  we end up with a prefix or a suffix of size zero.
 
   for (uint32 wp=1; wp < 2 * merSize - 1; wp++) {
-    uint64  nPrefix          = (uint64)1 << wp;                    //  Number of prefix == number of blocks of data
-    uint64  kmersPerPrefix   = nKmerEstimate / nPrefix + 1;        //  Expected number of kmers we need to store per prefix
-    uint64  kmersPerSeg      = segSizeBits / (2 * merSize - wp);   //  Kmers per segment
-    uint64  segsPerPrefix    = kmersPerPrefix / kmersPerSeg + 1;   //
+    uint64  nPrefix          = (uint64)1 << wp;                          //  Number of prefix == number of blocks of data
+    uint64  kmersPerPrefix   = nKmerEstimate / nPrefix + 1;              //  Expected number of kmers we need to store per prefix
+    uint64  kmersPerSeg      = SEGMENT_SIZE_BITS / (2 * merSize - wp);   //  Kmers per segment
+    uint64  segsPerPrefix    = kmersPerPrefix / kmersPerSeg + 1;         //
 
-    if (wp + countNumberOfBits64(segsPerPrefix) + countNumberOfBits64(segSizeBytes) >= 64)
+    if (wp + countNumberOfBits64(segsPerPrefix) + countNumberOfBits64(SEGMENT_SIZE) + 10 >= 64)
       break;   //  Otherwise, dataMemory overflows.
 
     uint64  structMemory     = ((sizeof(merylCountArray) * nPrefix) +                  //  Basic structs
                                 (sizeof(uint64 *)        * nPrefix * segsPerPrefix));  //  Pointers to segments
-    uint64  dataMemoryMin    = nPrefix *                 segSizeBytes;                 //  Minimum memory needed for this size.
-    uint64  dataMemory       = nPrefix * segsPerPrefix * segSizeBytes;                 //  Expected memory for full batch.
+    uint64  dataMemoryMin    = nPrefix *                 SEGMENT_SIZE * 1024;          //  Minimum memory needed for this size.
+    uint64  dataMemory       = nPrefix * segsPerPrefix * SEGMENT_SIZE * 1024;          //  Expected memory for full batch.
     uint64  totalMemory      = structMemory + dataMemory;
 
     //  Pick a larger prefix if it is dramatically smaller than what we have.
@@ -236,9 +234,7 @@ findBestValues(uint64  nKmerEstimate,
                uint64 &nPrefix_,
                uint32 &wData_,
                kmdata &wDataMask_) {
-  uint32  merSize      = kmerTiny::merSize();
-  uint32  segSizeBits  = merylCountArray::pagesPerSegment() * getPageSize() * 8;
-  uint32  segSizeBytes = merylCountArray::pagesPerSegment() * getPageSize();
+  uint32  merSize = kmerTiny::merSize();
 
   fprintf(stderr, "\n");
   fprintf(stderr, "\n");
@@ -252,16 +248,16 @@ findBestValues(uint64  nKmerEstimate,
   for (uint32 wp=1; wp < 2 * merSize - 1; wp++) {
     uint64  nPrefix          = (uint64)1 << wp;                          //  Number of prefix == number of blocks of data
     uint64  kmersPerPrefix   = nKmerEstimate / nPrefix + 1;              //  Expected number of kmers we need to store per prefix
-    uint64  kmersPerSeg      = segSizeBits / (2 * merSize - wp);         //  Kmers per segment
+    uint64  kmersPerSeg      = SEGMENT_SIZE_BITS / (2 * merSize - wp);   //  Kmers per segment
     uint64  segsPerPrefix    = kmersPerPrefix / kmersPerSeg + 1;         //
 
-    if (wp + countNumberOfBits64(segsPerPrefix) + countNumberOfBits64(segSizeBytes) >= 64)
+    if (wp + countNumberOfBits64(segsPerPrefix) + countNumberOfBits64(SEGMENT_SIZE) + 10 >= 64)
       break;   //  Otherwise, dataMemory overflows.
 
     uint64  structMemory     = ((sizeof(merylCountArray) * nPrefix) +                  //  Basic structs
                                 (sizeof(uint64 *)        * nPrefix * segsPerPrefix));  //  Pointers to segments
-    uint64  dataMemoryMin    = nPrefix *                 segSizeBytes;                 //  Minimum memory needed for this size.
-    uint64  dataMemory       = nPrefix * segsPerPrefix * segSizeBytes;                 //  Expected memory for full batch.
+    uint64  dataMemoryMin    = nPrefix *                 SEGMENT_SIZE * 1024;          //  Minimum memory needed for this size.
+    uint64  dataMemory       = nPrefix * segsPerPrefix * SEGMENT_SIZE * 1024;          //  Expected memory for full batch.
     uint64  totalMemory      = structMemory + dataMemory;
 
     fprintf(stderr, "%6" F_U32P "  %4" F_U64P " %cP  %4" F_U64P " %cB  %4" F_U64P " %cM  %4" F_U64P " %cS  %4" F_U64P " %cB  %4" F_U64P " %cB  %4" F_U64P " %cB",
@@ -296,6 +292,70 @@ findBestValues(uint64  nKmerEstimate,
 
 
 
+void
+reportNumberOfOutputs(uint64   nKmerEstimate,
+                      uint64   memoryUsed,        //  expected memory needed for counting in one block
+                      uint64   memoryAllowed,     //  memory the user said we can use
+                      bool     useSimple) {
+  uint32  nOutputsI      = memoryUsed / memoryAllowed + 1;
+  double  nOutputsD      = (double)memoryUsed / memoryAllowed - (nOutputsI - 1);
+
+
+  fprintf(stderr, "\n");
+  fprintf(stderr, "\n");
+  fprintf(stderr, "FINAL CONFIGURATION\n");
+  fprintf(stderr, "-------------------\n");
+
+  if (useSimple == true) {
+    assert(nOutputsI == 1);
+  }
+
+  else {
+    char    batchString[64] = { 0 };
+
+    if      (nOutputsD < 0.2) {
+      nOutputsI += 0;
+      snprintf(batchString, 42, "split into up to %u (possibly %u)", nOutputsI-1, nOutputsI);
+    }
+
+    else if (nOutputsD < 0.8) {
+      nOutputsI += 0;
+      snprintf(batchString, 42, "split into up to %u", nOutputsI);
+    }
+
+    else {
+      nOutputsI += 1;
+      snprintf(batchString, 42, "split into up to %u (possibly %u)", nOutputsI, nOutputsI+1);
+    }
+
+
+    if (nOutputsI > 1) {
+      fprintf(stderr, "\n");
+      fprintf(stderr, "WARNING:\n");
+      fprintf(stderr, "WARNING: Cannot fit into " F_U64 " %cB memory limit.\n", scaledNumber(memoryAllowed), scaledUnit(memoryAllowed));
+      fprintf(stderr, "WARNING: Will %s batches, and merge them at the end.\n", batchString);
+      fprintf(stderr, "WARNING:\n");
+    }
+
+    if (nOutputsI > 32) {
+      fprintf(stderr, "WARNING: Large number of batches.  Increase memory for better performance.\n");
+      fprintf(stderr, "WARNING:\n");
+    }
+  }
+
+  //  This is parsed by Canu.  Do not change.
+
+  fprintf(stderr, "\n");
+  fprintf(stderr, "Configured %s mode for %.3f GB memory per batch, and up to %u batch%s.\n",
+          (useSimple == true) ? "simple" : "complex",
+          ((memoryUsed < memoryAllowed) ? memoryUsed : memoryAllowed) / 1024.0 / 1024.0 / 1024.0,
+          nOutputsI,
+          (nOutputsI == 1) ? "" : "es");
+  fprintf(stderr, "\n");
+}
+
+
+
 void
 merylOperation::configureCounting(uint64   memoryAllowed,      //  Input:  Maximum allowed memory in bytes
                                   bool    &useSimple_,         //  Output: algorithm to use
@@ -346,12 +406,9 @@ merylOperation::configureCounting(uint64   memoryAllowed,      //  Input:  Maxim
 
   uint64   memoryUsedComplex = UINT64_MAX;
   uint32   bestPrefix        = 0;
-  uint32   nBatches          = 0;
 
-  for (nBatches=1; memoryUsedComplex > memoryAllowed; nBatches++)
-    findBestPrefixSize(_expNumKmers / nBatches, memoryAllowed, bestPrefix, memoryUsedComplex);
-
-  findBestValues(_expNumKmers / nBatches, bestPrefix, memoryUsedComplex, wPrefix_, nPrefix_, wData_, wDataMask_);
+  findBestPrefixSize(_expNumKmers, memoryAllowed, bestPrefix, memoryUsedComplex);
+  findBestValues(_expNumKmers, bestPrefix, memoryUsedComplex, wPrefix_, nPrefix_, wData_, wDataMask_);
 
   //
   //  Decide simple or complex.  useSimple_ is an output.
@@ -380,21 +437,7 @@ merylOperation::configureCounting(uint64   memoryAllowed,      //  Input:  Maxim
   //  Output the configuration.
   //
 
-  fprintf(stderr, "\n");
-  fprintf(stderr, "\n");
-  fprintf(stderr, "FINAL CONFIGURATION\n");
-  fprintf(stderr, "-------------------\n");
-  fprintf(stderr, "\n");
-  fprintf(stderr, "Estimated to require %lu %cB memory out of %lu %cB allowed.\n",
-          scaledNumber(memoryUsed),    scaledUnit(memoryUsed),
-          scaledNumber(memoryAllowed), scaledUnit(memoryAllowed));
-  fprintf(stderr, "Estimated to require %u batch%s.\n", nBatches, (nBatches == 1) ? "" : "es");
-  fprintf(stderr, "\n");
-  fprintf(stderr, "Configured %s mode for %.3f GB memory per batch, and up to %u batch%s.\n",      //  This is parsed
-          (useSimple_ == true) ? "simple" : "complex",                                             //  by Canu.
-          ((memoryUsed < memoryAllowed) ? memoryUsed : memoryAllowed) / 1024.0 / 1024.0 / 1024.0,  //  DO NOT CHANGE!
-          nBatches, (nBatches == 1) ? "" : "es");
-  fprintf(stderr, "\n");
+  reportNumberOfOutputs(_expNumKmers, memoryUsed, memoryAllowed, useSimple_);
 }
 
 
@@ -403,9 +446,9 @@ merylOperation::configureCounting(uint64   memoryAllowed,      //  Input:  Maxim
 //  rigorous went into the multipliers, just looked at a few sets of lambda reads.
 uint64
 merylOperation::guesstimateNumberOfkmersInInput_dnaSeqFile(dnaSeqFile *sequence) {
-  uint64       numMers = 0;
-  char const  *name    = sequence->filename();
-  uint32       len     = strlen(name);
+  uint64  numMers = 0;
+  char   *name    = sequence->filename();
+  uint32  len     = strlen(name);
 
   if ((name[0] == '-') && (len == 1))
     return(0);
@@ -511,7 +554,7 @@ merylOperation::count(uint32  wPrefix,
   memUsed = memBase;
 
   for (uint32 pp=0; pp<nPrefix; pp++)
-    memUsed += data[pp].initialize(pp, wData);
+    memUsed += data[pp].initialize(pp, wData, SEGMENT_SIZE);
 
   uint64          kmersAdded  = 0;
 
@@ -573,7 +616,7 @@ merylOperation::count(uint32  wPrefix,
 
       if (memUsed > _maxMemory) {
         fprintf(stderr, "Memory full.  Writing results to '%s', using " F_S32 " threads.\n",
-                _outputO->filename(), getMaxThreadsAllowed());
+                _outputO->filename(), omp_get_max_threads());
         fprintf(stderr, "\n");
 
 #pragma omp parallel for schedule(dynamic, 1)
@@ -616,7 +659,7 @@ merylOperation::count(uint32  wPrefix,
 
   fprintf(stderr, "\n");
   fprintf(stderr, "Writing results to '%s', using " F_S32 " threads.\n",
-          _outputO->filename(), getMaxThreadsAllowed());
+          _outputO->filename(), omp_get_max_threads());
 
   //for (uint64 pp=0; pp<nPrefix; pp++)
   //  fprintf(stderr, "Prefix 0x%016lx writes to file %u\n", pp, _outputO->fileNumber(pp));
diff --git a/ext/meryl/src/meryl/merylOp-countSimple.C b/ext/meryl/src/meryl/merylOp-countSimple.C
index 7d88ee9..65aeed6 100644
--- a/ext/meryl/src/meryl/merylOp-countSimple.C
+++ b/ext/meryl/src/meryl/merylOp-countSimple.C
@@ -106,14 +106,14 @@ merylOperation::countSimple(void) {
 
         for (uint32 hib=0; hib < 64; hib++) {
           if (highBits[hib].isAllocated() == false) {
-            fprintf(stderr, "Increasing to %u-bit storage (for kmer 0x%s).\n",
-                    lowBitsSize + hib + 1, toHex(kidx));
+            fprintf(stderr, "Increasing to %u-bit storage (for kmer 0x%016lx).\n",
+                    lowBitsSize + hib + 1, kidx);
             highBits[hib].allocate(nEntries);
           }
 
-          if (highBits[hib].flipBit(kidx) == 0) {         //  If not set, set it,
-            highBitMax = std::max(highBitMax, hib + 1);   //  remember the possible maximum bit set,
-            break;                                        //  and stop.
+          if (highBits[hib].flipBit(kidx) == 0) {    //  If not set, set it,
+            highBitMax = max(highBitMax, hib + 1);   //  remember the possible maximum bit set,
+            break;                                   //  and stop.
           }
         }
       }
@@ -183,7 +183,7 @@ merylOperation::countSimple(void) {
 
   fprintf(stderr, "\n");
   fprintf(stderr, "Writing results to '%s', using " F_S32 " threads.\n",
-          _outputO->filename(), getMaxThreadsAllowed());
+          _outputO->filename(), omp_get_max_threads());
   fprintf(stderr, "             [ file ][  prefix ][  suffix ][ count-suffix ]\n");
   fprintf(stderr, "   widths    [    6 ][ %7u ][ %7u ][ %12u ]\n", wPrefix - 6, wSuffix, 2 * _countSuffixLength);
   fprintf(stderr, "   number    [   64 ][ %7lu ][ %7lu ][ %12s ]\n", nPrefix / 64, nSuffix, _countSuffixString);
@@ -207,7 +207,6 @@ merylOperation::countSimple(void) {
       uint64  kEnd   = (bp << wSuffix) | sMask;
       uint64  nKmers = 0;
 
-//CJ: avoid this print statment
 #if 0
       fprintf(stderr, "thread %2d working on block 0x%08lx<0x%08lx<0x%08lx %8lu kmers between 0x%016lx and 0x%016lx\n",
               omp_get_thread_num(),
diff --git a/ext/meryl/src/meryl/merylOp-countThreads.C b/ext/meryl/src/meryl/merylOp-countThreads.C
index 2742fed..1521be2 100644
--- a/ext/meryl/src/meryl/merylOp-countThreads.C
+++ b/ext/meryl/src/meryl/merylOp-countThreads.C
@@ -23,18 +23,21 @@
 
 #include <atomic>
 
+//  The number of KB to use for a merylCountArray segment.
+#define SEGMENT_SIZE       64
+#define SEGMENT_SIZE_BITS  (SEGMENT_SIZE * 1024 * 8)
+
+
 
 class mcGlobalData {
 public:
-  mcGlobalData(std::vector<merylInput *> &inputs,
-               merylOp                    op,
-               uint64                     nPrefix,
-               uint32                     wData,
-               kmdata                     wDataMask,
-               uint64                     maxMemory,
-               uint32                     maxThreads,
-               uint64                     bufferSize,
-               merylFileWriter           *output) : _inputs(inputs) {
+  mcGlobalData(vector<merylInput *>   &inputs,
+               merylOp                 op,
+               uint64                  nPrefix,
+               uint32                  wData,
+               kmdata                  wDataMask,
+               uint64                  maxMemory,
+               merylFileWriter        *output) : _inputs(inputs) {
     _operation      = op;
     _nPrefix        = nPrefix;
     _wData          = wData;
@@ -52,22 +55,20 @@ public:
     _memUsed        = _memBase;
     _memReported    = 0;
 
-    _maxThreads     = maxThreads;
-    _loadThreads    = 1;
-
-    _bufferSize     = bufferSize;
-
     _kmersAdded     = 0;
-    _kmersAddedMax  = 0;
 
     _inputPos       = 0;
+    //_inputs         = inputs;
 
     for (uint32 ii=0; ii<65; ii++)
       _lastBuffer[ii] = 0;
 
+    _computeWait    = 0;
+    _numComputing   = 0;
+
     for (uint32 pp=0; pp<_nPrefix; pp++) {      //  Initialize each bucket.
       _lock[pp].clear();
-      _memUsed += _data[pp].initialize(pp, wData);
+      _memUsed += _data[pp].initialize(pp, wData, SEGMENT_SIZE);
     }
   };
 
@@ -77,59 +78,65 @@ public:
     delete [] _writer;
   };
 
-  merylOp                     _operation;        //  Parameters.
-  uint64                      _nPrefix;
-  uint32                      _wData;
-  kmdata                      _wDataMask;
+  merylOp                _operation;        //  Parameters.
+  uint64                 _nPrefix;
+  uint32                 _wData;
+  kmdata                 _wDataMask;
 
-  bool                        _dumping;
+  bool                   _dumping;
 
-  std::atomic_flag           *_lock;
-  merylCountArray            *_data;             //  Data for counting.
-  merylFileWriter            *_output;
-  merylBlockWriter           *_writer;           //  Data for writing.
+  std::atomic_flag      *_lock;
+  merylCountArray       *_data;             //  Data for counting.
+  merylFileWriter       *_output;
+  merylBlockWriter      *_writer;           //  Data for writing.
 
-  uint64                      _maxMemory;        //  Maximum memory we can use.
-  uint64                      _memBase;          //  Overhead memory.
-  uint64                      _memUsed;          //  Sum of actual memory used.
-  uint64                      _memReported;      //  Memory usage at last report.
+  uint64                 _maxMemory;        //  Maximum memory we can use.
+  uint64                 _memBase;          //  Overhead memory.
+  uint64                 _memUsed;          //  Sum of actual memory used.
+  uint64                 _memReported;      //  Memory usage at last report.
 
-  uint32                      _maxThreads;       //  The max number of CPUs we can use.
-  uint32                      _loadThreads;      //  The number of CPUs used for reading input.
+  uint64                 _kmersAdded;       //  Boring statistics for the user.
 
-  uint64                      _bufferSize;       //  Maximum size of a computation input buffer.
+  uint32                 _inputPos;         //  Input files.
+  vector<merylInput *>  &_inputs;
 
-  uint64                      _kmersAdded;       //  Number of kmers added; boring statistics for the user.
-  uint64                      _kmersAddedMax;    //  Max kmers in any single merylCountArray; not boring.
-  
-  uint32                      _inputPos;         //  Input files.
-  std::vector<merylInput *>  &_inputs;
+  char                   _lastBuffer[65];   //  Wrap-around from the last buffer.
 
-  char                        _lastBuffer[65];   //  Wrap-around from the last buffer.
+  uint32                 _computeWait;
+  uint32                 _numComputing;
 };
 
 
 
+#define BUF_MAX  (1 * 1024 * 1024)
+
 class mcComputation {
 public:
-  mcComputation(uint64 bufmax) {
-    _bufferMax     = bufmax;
-    _buffer        = new char [_bufferMax];
+  mcComputation() {
+    _bufferMax  = BUF_MAX;
+    _bufferLen  = 0;
+
+    _memUsed    = 0;
+    _kmersAdded = 0;
   };
 
   ~mcComputation() {
-    delete [] _buffer;
   };
 
-  uint64        _bufferMax  = 0;       //  Input data
-  uint64        _bufferLen  = 0;
-  char         *_buffer     = nullptr;
+  //  Data for input sequences.
+  uint64        _bufferMax;
+  uint64        _bufferLen;
+  char          _buffer[BUF_MAX];
 
-  kmerIterator  _kiter;                //  Sequence to kmer conversion
+  //  Data for converting sequence to kmers.
+  kmerIterator  _kiter;
 
-  uint64        _memUsed       = 0;    //  Output statistics on kmers added to
-  uint64        _kmersAdded    = 0;    //  the merylCountArray but this block.
-  uint64        _kmersAddedMax = 0;
+  //  Data for debugging.
+  //char          _fstr[65];      //  For debugging
+  //char          _rstr[65];
+
+  uint64        _memUsed;
+  uint64        _kmersAdded;
 };
 
 
@@ -138,13 +145,12 @@ public:
 void *
 loadBases(void *G) {
   mcGlobalData     *g  = (mcGlobalData  *)G;
-  mcComputation    *s  = new mcComputation(g->_bufferSize);
+  mcComputation    *s  = new mcComputation();
   uint32            kl = kmerTiny::merSize() - 1;
 
   //  Copy the end of the last block into our buffer.
 
   assert(s->_bufferLen == 0);
-  assert(s->_bufferMax > kl);
 
   if (g->_lastBuffer[0] != 0) {
     memcpy(s->_buffer, g->_lastBuffer, sizeof(char) * kl);
@@ -159,14 +165,6 @@ loadBases(void *G) {
   if (g->_inputPos >= g->_inputs.size())
     return(NULL);
 
-  //  Update the number of threads used for loading.  If the input is
-  //  compressed, reserve 2 threads, otherwise reserve 1.
-
-  if (g->_inputs[g->_inputPos]->isCompressedFile())
-    g->_loadThreads = 2;
-  else
-    g->_loadThreads = 1;
-
   //  Try to load bases.  Keep loading until the buffer is filled
   //  or we exhaust the file.
 
@@ -248,13 +246,13 @@ insertKmers(void *G, void *T, void *S) {
     if (useF == true) {
       pp = (kmdata)s->_kiter.fmer() >> g->_wData;
       mm = (kmdata)s->_kiter.fmer()  & g->_wDataMask;
-      //fprintf(stderr, "useF F=%s R=%s ms=%u pp %llu mm %llu\n", s->_kiter.fmer().toString(fstr), s->_kiter.rmer().toString(rstr), s->_kiter.fmer().merSize(), pp, mm);
+      //fprintf(stderr, "useF F=%s R=%s ms=%u pp %lu mm %lu\n", s->_kiter.fmer().toString(fstr), s->_kiter.rmer().toString(rstr), s->_kiter.fmer().merSize(), pp, mm);
     }
 
     else {
       pp = (kmdata)s->_kiter.rmer() >> g->_wData;
       mm = (kmdata)s->_kiter.rmer()  & g->_wDataMask;
-      //fprintf(stderr, "useR F=%s R=%s ms=%u pp %llu mm %llu\n", s->_kiter.fmer().toString(fstr), s->_kiter.rmer().toString(rstr), s->_kiter.rmer().merSize(), pp, mm);
+      //fprintf(stderr, "useR F=%s R=%s ms=%u pp %lu mm %lu\n", s->_kiter.fmer().toString(fstr), s->_kiter.rmer().toString(rstr), s->_kiter.rmer().merSize(), pp, mm);
     }
 
     assert(pp < g->_nPrefix);
@@ -271,9 +269,8 @@ insertKmers(void *G, void *T, void *S) {
     while (g->_lock[pp].test_and_set(std::memory_order_relaxed) == true)
       ;
 
-    s->_memUsed        += g->_data[pp].add(mm);
-    s->_kmersAdded     += 1;
-    s->_kmersAddedMax   = std::max(s->_kmersAddedMax, g->_data[pp].numKmers());
+    s->_memUsed    += g->_data[pp].add(mm);
+    s->_kmersAdded += 1;
 
     g->_lock[pp].clear(std::memory_order_relaxed);
   }
@@ -290,41 +287,32 @@ writeBatch(void *G, void *S) {
   //  Udpate memory used and kmers added.  There's only one writer thread,
   //  so this is thread safe!
 
-  g->_memUsed       += s->_memUsed;
-  g->_kmersAdded    += s->_kmersAdded;
-  g->_kmersAddedMax  = std::max(s->_kmersAddedMax, g->_kmersAddedMax);
-
-  //  Free the input buffer.  All the data is loaded into merylCountArrays,
-  //  and all we needed to get from this is the stats above.
-
-  delete s;
-
-  //  Estimate, poorly, how much memory we'll need to sort the arrays.  It's
-  //  a poor estimate because we'll never have all threads sorting the
-  //  maximum number of kmers at the same time, but it's a safe poor
-  //  estimate.
+  g->_memUsed    += s->_memUsed;
+  g->_kmersAdded += s->_kmersAdded;
 
-  uint64  sortMem = g->_maxThreads * g->_kmersAddedMax * sizeof(kmdata);
+  //  Do some accounting.
 
-  //  Write a log every 128 MB of memory growth.
+  if (g->_memUsed - g->_memReported > (uint64)128 * 1024 * 1024) {
+    g->_memReported = g->_memUsed;
 
-  if (g->_memUsed + sortMem - g->_memReported > (uint64)128 * 1024 * 1024) {
-    g->_memReported = g->_memUsed + sortMem;
-
-    fprintf(stderr, "Used %3.3f GB / %3.3f GB to store %12lu kmers; need %3.3f GB to sort %12lu kmers\n",
+    fprintf(stderr, "Used %3.3f GB out of %3.3f GB to store %12lu kmers.\n",
             g->_memUsed   / 1024.0 / 1024.0 / 1024.0,
             g->_maxMemory / 1024.0 / 1024.0 / 1024.0,
-            g->_kmersAdded,
-            sortMem / 1024.0 / 1024.0 / 1024.0, g->_kmersAddedMax);
+            g->_kmersAdded);
   }
 
+  //  Free the input buffer.
+
+  delete s;
+
   //  If we haven't hit the memory limit yet, just return.
+  //  Otherwise, dump data.
 
-  if (g->_memUsed + sortMem < g->_maxMemory)
+  if (g->_memUsed < g->_maxMemory)
     return;
 
   //  Tell all the threads to pause, then grab all the locks to ensure nobody
-  //  is still adding kmers to a merylCountArray.
+  //  is still writing.
 
   g->_dumping = true;
 
@@ -332,26 +320,17 @@ writeBatch(void *G, void *S) {
     while (g->_lock[pp].test_and_set(std::memory_order_relaxed) == true)
       ;
 
-  //  Write data!  For reasons I don't understand, we need to reset the max
-  //  number of threads to use.  Something is resetting it to the number of
-  //  CPUs on the machine.
-  //
-  //  Since we still have a sequence loader around, we need to leave threads
-  //  for it.
-
-  uint32  wThreads = (g->_maxThreads > g->_loadThreads) ? (g->_maxThreads - g->_loadThreads) : 1;
-  uint32  lThreads =                   g->_loadThreads;
+  //  Write data!
 
-  fprintf(stderr, "Memory full.  Writing results to '%s', using %u thread%s (%u thread%s still doing input).\n",
-          g->_output->filename(),
-          wThreads, (wThreads == 1) ? "" : "s",
-          lThreads, (lThreads == 1) ? "" : "s");
+  fprintf(stderr, "Memory full.  Writing results to '%s', using " F_S32 " threads.\n",
+          g->_output->filename(), omp_get_max_threads());
   fprintf(stderr, "\n");
 
-  omp_set_num_threads(wThreads);
-
 #pragma omp parallel for schedule(dynamic, 1)
   for (uint32 ff=0; ff<g->_output->numberOfFiles(); ff++) {
+    //fprintf(stderr, "thread %2u writes file %2u with prefixes 0x%016lx to 0x%016lx\n",
+    //        omp_get_thread_num(), ff, g->_output->firstPrefixInFile(ff), g->_output->lastPrefixInFile(ff));
+
     for (uint64 pp=g->_output->firstPrefixInFile(ff); pp <= g->_output->lastPrefixInFile(ff); pp++) {
       g->_data[pp].countKmers();                   //  Convert the list of kmers into a list of (kmer, count).
       g->_data[pp].dumpCountedKmers(g->_writer);   //  Write that list to disk.
@@ -368,8 +347,7 @@ writeBatch(void *G, void *S) {
   for (uint32 pp=0; pp<g->_nPrefix; pp++)
     g->_memUsed += g->_data[pp].usedSize();
 
-  g->_kmersAdded    = 0;
-  g->_kmersAddedMax = 0;
+  g->_kmersAdded = 0;
 
   //  Signal that threads can proceeed.
 
@@ -404,53 +382,37 @@ merylOperation::countThreads(uint32  wPrefix,
   _outputO->initialize(wPrefix);
 
   //  Initialize the counter.
-  //
-  //  Tell it to use _maxMemory, but carve out space for the input buffers.
-  //  At 2MB each, 16 per thread, and 16 threads, that's 512 MB.  Not huge,
-  //  but a big chunk of our (expected 16 or so GB total).  The extra buffers
-  //  are generally filled when a batch is dumped to disk.
-
-  uint64  inputBufferSize = 2 * 1024 * 1024;
-
-  mcGlobalData  *g = new mcGlobalData(_inputs,
-                                      _operation,
-                                      nPrefix,
-                                      wData,
-                                      wDataMask,
-                                      _maxMemory - inputBufferSize * 4 * _maxThreads,
-                                      _maxThreads,
-                                      inputBufferSize,
-                                      _outputO);
-
-  //  Set up a sweatShop and run it.  We'll reserve one thread for input, one
-  //  for gzip and use the remaining for counting -- unless there are no
-  //  remaining, then we'll just use one.
 
-  sweatShop    *ss = new sweatShop(loadBases, insertKmers, writeBatch);
+  mcGlobalData  *g = new mcGlobalData(_inputs, _operation, nPrefix, wData, wDataMask, _maxMemory, _outputO);
 
-  uint32 nw = (_maxThreads > 2) ? (_maxThreads - 2) : 1;
+  //  Set up a sweatShop and run it.
 
-  ss->setLoaderBatchSize(1);            //  Load this many things before appending to input list
-  ss->setLoaderQueueSize(nw * 16);      //  Allow this many things on the input list before stalling the input
-  ss->setWriterQueueSize(nw);           //  Allow this many things on the output list before stalling the compute
-  ss->setNumberOfWorkers(nw);           //  Use this many worker CPUs; leave one for input and one for gzip.
+  sweatShop    *ss = new sweatShop(loadBases, insertKmers, writeBatch);
+  uint32        nt = omp_get_max_threads();
+
+  ss->setLoaderBatchSize(1 * nt);
+  ss->setLoaderQueueSize(2 * nt);
+  ss->setWriterQueueSize(1 * nt);
+  ss->setNumberOfWorkers(1 * nt);
 
   ss->run(g, false);
 
   delete ss;
 
-  //  All data loaded.  Write the output.  Reset threads before starting (see
-  //  above) to the maximum possible since there is no loader threads around
-  //  anymore.
+  //  All data loaded.  Write the output.
 
   fprintf(stderr, "\n");
-  fprintf(stderr, "Input complete.  Writing results to '%s', using %u thread%s.\n",
-          _outputO->filename(), _maxThreads, (_maxThreads == 1) ? "" : "s");
+  fprintf(stderr, "Writing results to '%s', using " F_S32 " threads.\n",
+          _outputO->filename(), omp_get_max_threads());
 
-  omp_set_num_threads(_maxThreads);
+  //for (uint64 pp=0; pp<nPrefix; pp++)
+  //  fprintf(stderr, "Prefix 0x%016lx writes to file %u\n", pp, _outputO->fileNumber(pp));
 
 #pragma omp parallel for schedule(dynamic, 1)
   for (uint32 ff=0; ff<_outputO->numberOfFiles(); ff++) {
+    //fprintf(stderr, "thread %2u writes file %2u with prefixes 0x%016lx to 0x%016lx\n",
+    //        omp_get_thread_num(), ff, _outputO->firstPrefixInFile(ff), _outputO->lastPrefixInFile(ff));
+
     for (uint64 pp=_outputO->firstPrefixInFile(ff); pp <= _outputO->lastPrefixInFile(ff); pp++) {
       g->_data[pp].countKmers();                   //  Convert the list of kmers into a list of (kmer, count).
       g->_data[pp].dumpCountedKmers(g->_writer);   //  Write that list to disk.
diff --git a/ext/meryl/src/meryl/merylOp-histogram.C b/ext/meryl/src/meryl/merylOp-histogram.C
index 0f9efbe..0834c54 100644
--- a/ext/meryl/src/meryl/merylOp-histogram.C
+++ b/ext/meryl/src/meryl/merylOp-histogram.C
@@ -62,7 +62,7 @@ merylOperation::reportStatistics(void) {
 
   //  Now just dump it.
 
-  uint64  nUniverse = buildLowBitMask<uint64>(kmer::merSize() * 2) + 1;
+  uint64  nUniverse = uint64MASK(kmer::merSize() * 2) + 1;
   uint64  sDistinct = 0;
   uint64  sTotal    = 0;
 
diff --git a/ext/meryl/src/meryl/merylOp-nextMer.C b/ext/meryl/src/meryl/merylOp-nextMer.C
index 5970bbc..f09c710 100644
--- a/ext/meryl/src/meryl/merylOp-nextMer.C
+++ b/ext/meryl/src/meryl/merylOp-nextMer.C
@@ -17,7 +17,7 @@
  */
 
 #include "meryl.H"
-#include <cmath>
+
 
 
 void
@@ -48,19 +48,6 @@ merylOperation::findSumCount(void) {
 }
 
 
-void
-merylOperation::subtractCount(void) {
-  _value = _actCount[0];
-  for (uint32 ii=1; ii<_actLen; ii++) {
-    if ( _value > _actCount[ii] )
-      _value -= _actCount[ii];
-    else {
-      _value = 0;
-      return;
-    }
-  }
-}
-
 
 void
 merylOperation::initializeThreshold(void) {
@@ -222,7 +209,7 @@ merylOperation::doCounting(void) {
   //   - add the counted output as an input
 
   if (_outputO)
-    strncpy(name, _outputO->filename(), FILENAME_MAX + 1);   //  know which input to open later.
+    strncpy(name, _outputO->filename(), FILENAME_MAX);   //  know which input to open later.
 
   delete _outputO;
   _outputO = NULL;
@@ -539,16 +526,6 @@ merylOperation::nextMer(void) {
       else
         _value = _actCount[0] / _mathConstant;
       break;
-    case opDivideRound:
-      if (_mathConstant == 0)
-        _value = 0;             //  DIVIDE BY ZERO!
-      else {
-        if (_actCount[0] < _mathConstant)
-          _value = 1;
-        else
-          _value = round (_actCount[0] / (double) _mathConstant);
-      }
-      break;
 
     case opModulo:
       if (_mathConstant == 0)
@@ -593,15 +570,6 @@ merylOperation::nextMer(void) {
         findSumCount();
       break;
 
-    case opSubtract:
-      if (_actIndex[0] == 0) {
-        if (_actLen == 1)
-          _value = _actCount[0];
-        else if (_actLen > 1)
-          subtractCount();
-      }
-      break;
-
     case opDifference:
       if ((_actLen == 1) && (_actIndex[0] == 0))
         _value = _actCount[0];
diff --git a/ext/meryl/src/meryl/merylOp.C b/ext/meryl/src/meryl/merylOp.C
index 31497b3..00b9840 100644
--- a/ext/meryl/src/meryl/merylOp.C
+++ b/ext/meryl/src/meryl/merylOp.C
@@ -315,7 +315,6 @@ toString(merylOp op) {
     case opDecrease:             return("opDecrease");             break;
     case opMultiply:             return("opMultiply");             break;
     case opDivide:               return("opDivide");               break;
-    case opDivideRound:          return("opDivideRound");          break;
     case opModulo:               return("opModulo");               break;
 
     case opUnion:                return("opUnion");                break;
@@ -328,8 +327,6 @@ toString(merylOp op) {
     case opIntersectMax:         return("opIntersectMax");         break;
     case opIntersectSum:         return("opIntersectSum");         break;
 
-    case opSubtract:             return("opSubtract");             break;
-
     case opDifference:           return("opDifference");           break;
     case opSymmetricDifference:  return("opSymmetricDifference");  break;
 
diff --git a/ext/meryl/src/meryl/merylOp.H b/ext/meryl/src/meryl/merylOp.H
index 390a2d9..4959358 100644
--- a/ext/meryl/src/meryl/merylOp.H
+++ b/ext/meryl/src/meryl/merylOp.H
@@ -46,7 +46,6 @@ enum merylOp {
   opDecrease,
   opMultiply,
   opDivide,
-  opDivideRound,
   opModulo,
 
   opUnion,
@@ -62,8 +61,6 @@ enum merylOp {
   opDifference,
   opSymmetricDifference,
 
-  opSubtract,	// if count(a) >= count(b), keep count(a)-count(b). else, discard.
-
   opHistogram,
   opStatistics,
 
@@ -157,21 +154,17 @@ public:
     return(isCounting() == false);
   };
 
-  bool    needsThreshold(void) {
+  bool    needsParameter(void) {
     return((_operation == opLessThan)     ||
            (_operation == opGreaterThan)  ||
            (_operation == opAtLeast)      ||
            (_operation == opAtMost)       ||
            (_operation == opEqualTo)      ||
-           (_operation == opNotEqualTo));
-  };
-
-  bool    needsConstant(void) {
-    return((_operation == opIncrease)     ||
+           (_operation == opNotEqualTo)   ||
+           (_operation == opIncrease)     ||
            (_operation == opDecrease)     ||
            (_operation == opMultiply)     ||
            (_operation == opDivide)       ||
-           (_operation == opDivideRound)  ||
            (_operation == opModulo));
   };
 
@@ -234,9 +227,8 @@ private:
   void    findMinCount(void);
   void    findMaxCount(void);
   void    findSumCount(void);
-  void    subtractCount(void);
 
-  std::vector<merylInput *>      _inputs;
+  vector<merylInput *>           _inputs;
   bool                           _isMultiSet = false;
 
   merylOp                        _operation = opNothing;
diff --git a/ext/meryl/src/tests/merylCountArrayTest.C b/ext/meryl/src/tests/merylCountArrayTest.C
index 4aa6186..f1549ea 100644
--- a/ext/meryl/src/tests/merylCountArrayTest.C
+++ b/ext/meryl/src/tests/merylCountArrayTest.C
@@ -24,7 +24,7 @@ mtRandom  *mt = NULL;
 
 
 void
-display(char const *l, kmdata s) {
+display(char *l, kmdata s) {
   uint64 a = (s >> 64);
   uint64 b =  s;
 
diff --git a/ext/meryl/src/tests/merylCountArrayTest.mk b/ext/meryl/src/tests/merylCountArrayTest.mk
index 4605ce3..d0fee14 100644
--- a/ext/meryl/src/tests/merylCountArrayTest.mk
+++ b/ext/meryl/src/tests/merylCountArrayTest.mk
@@ -1,8 +1,21 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET   := merylCountArrayTest
-SOURCES  := merylCountArrayTest.C ../meryl/merylCountArray.C
+SOURCES  := merylCountArrayTest.C \
+            ../meryl/merylCountArray.C
 
 SRC_INCDIRS  := . ../utility/src/utility ../meryl
 
 TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
+TGT_LDLIBS  := -lmeryl
+TGT_PREREQS := libmeryl.a
+
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/tests/merylExactLookupTest.C b/ext/meryl/src/tests/merylExactLookupTest.C
deleted file mode 100644
index 80ed3ad..0000000
--- a/ext/meryl/src/tests/merylExactLookupTest.C
+++ /dev/null
@@ -1,285 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl, a genomic k-kmer counter with nice features.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "runtime.H"
-
-#include "kmers.H"
-#include "sequence.H"
-#include "bits.H"
-
-
-void
-loadLookup(char const         *inputDBname,
-           uint64              minV,
-           uint64              maxV,
-           merylExactLookup   &lookup) {
-
-  fprintf(stderr, "==\n");
-  fprintf(stderr, "==  Create merylExactLookup from '%s'.\n", inputDBname);
-  fprintf(stderr, "==\n");
-
-  merylFileReader   *merylDB = new merylFileReader(inputDBname);
-
-  lookup.load(merylDB, 16.0, true, false, minV, maxV);
-
-  fprintf(stderr, "\n");
-
-  delete merylDB;
-}
-
-
-void
-loadMap(char const             *inputDBname,
-        uint64                  minV,
-        uint64                  maxV,
-        std::map<kmer, kmvalu> &lookup) {
-
-  fprintf(stderr, "==\n");
-  fprintf(stderr, "==  Create merylExactLookup from '%s'.\n", inputDBname);
-  fprintf(stderr, "==\n");
-
-  merylFileReader   *merylDB = new merylFileReader(inputDBname);
-
-  uint64     nKmers = 0;
-  uint64     nSkips = 0;
-
-  while (merylDB->nextMer() == true) {
-    kmer    kmer  = merylDB->theFMer();
-    uint32  value = merylDB->theValue();
-
-    if ((minV <= value) &&
-        (value <= maxV)) {
-      lookup[kmer] = value;
-      nKmers++;
-    } else {
-      nSkips++;
-    }
-
-    if (((nKmers + nSkips) % 100000) == 0)
-      fprintf(stderr, "==    Loaded %lu kmers; ignored %lu.\r", nKmers, nSkips);
-  }
-
-  fprintf(stderr, "==    Loaded %lu kmers; ignored %lu; map size %lu.\n", nKmers, nSkips, lookup.size());
-  fprintf(stderr, "\n");
-
-  delete merylDB;
-}
-
-
-
-int
-main(int argc, char **argv) {
-  char   *inputSeqName = nullptr;
-  char   *inputDBname  = nullptr;
-  uint64  minV         = 0;
-  uint64  maxV         = uint64max;
-  uint32  threads      = 1;
-
-  argc = AS_configure(argc, argv);
-
-  std::vector<char const *>  err;
-  int                        arg = 1;
-  while (arg < argc) {
-    if        (strcmp(argv[arg], "-sequence") == 0) {   //  INPUT READS and RANGE TO PROCESS
-      inputSeqName = argv[++arg];
-
-    } else if (strcmp(argv[arg], "-mers") == 0) {
-      inputDBname = argv[++arg];
-
-      //} else if (strcmp(argv[arg], "-min") == 0) {
-      //  minV = strtouint64(argv[++arg]);
-
-      //} else if (strcmp(argv[arg], "-max") == 0) {
-      //  maxV = strtouint64(argv[++arg]);
-
-      //} else if (strcmp(argv[arg], "-threads") == 0) {
-      //  threads = strtouint32(argv[++arg]);
-
-    } else {
-      char *s = new char [1024];
-      snprintf(s, 1024, "Unknown option '%s'.\n", argv[arg]);
-      err.push_back(s);
-    }
-
-    arg++;
-  }
-
-  if (inputSeqName == nullptr)   err.push_back("No input sequences (-sequence) supplied.\n");
-  if (inputDBname  == nullptr)   err.push_back("No query meryl database (-mers) supplied.\n");
-
-  if (err.size() > 0) {
-    fprintf(stderr, "usage: %s ...\n", argv[0]);
-    fprintf(stderr, "  -sequence    X.fasta\n");
-    fprintf(stderr, "  -mers        X.meryl\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "Loads kmers in X.meryl into a merylExactLookup table and a standard\n");
-    fprintf(stderr, "C++ associative map.  Verifies that every kmer present in X.fasta is\n");
-    fprintf(stderr, "present in both the merylExactLookup and the associative map, and that\n");
-    fprintf(stderr, "the value returned by both is the same.\n");
-    fprintf(stderr, "\n");
-
-    for (uint32 ii=0; ii<err.size(); ii++)
-      if (err[ii])
-        fputs(err[ii], stderr);
-
-    exit(1);
-  }
-
-
-
-  merylExactLookup        kmerLookup;
-  std::map<kmer,kmvalu>   kmerValue;
-  std::map<kmer,kmvalu>   kmerCheck;
-
-
-  loadLookup(inputDBname, minV, maxV, kmerLookup);
-  loadMap   (inputDBname, minV, maxV, kmerValue);
-
-  fprintf(stderr, "==\n");
-  fprintf(stderr, "==  Copy kmerValue to kmerCheck.\n");
-  fprintf(stderr, "==\n");
-
-  kmerCheck = kmerValue;
-
-  //
-
-  fprintf(stderr, "\n");
-  fprintf(stderr, "==\n");
-  fprintf(stderr, "==  Stream kmers from '%s'.\n", inputSeqName);
-  fprintf(stderr, "==\n");
-
-  dnaSeqFile  *seqFile    = new dnaSeqFile(inputSeqName);
-
-  {
-    dnaSeq   seq;
-    char     fString[64];
-    char     rString[64];
-    uint64   nTest = 0;
-
-    while (seqFile->loadSequence(seq)) {
-      kmerIterator  kiter(seq.bases(), seq.length());
-
-      while (kiter.nextMer()) {
-        kmer     fMer  = kiter.fmer();
-        kmer     rMer  = kiter.rmer();
-        kmer     cMer  = (fMer < rMer) ? fMer : rMer;
-        kmvalu   value;
-
-        if (kmerLookup.exists(cMer) == false) {
-          fprintf(stdout, "%s\t%s\t%s MISSING from kmerLookup::exists()\n",
-                  seq.ident(),
-                  kiter.fmer().toString(fString),
-                  kiter.rmer().toString(rString));
-          exit(1);
-#ifdef SHOW_SUCCESS
-        } else {
-          fprintf(stdout, "%s\t%s\t%s FOUND in kmerLookup::exists()\n",
-                  seq.ident(),
-                  kiter.fmer().toString(fString),
-                  kiter.rmer().toString(rString));
-#endif
-        }
-
-        if (kmerLookup.exists(cMer, value) == false) {
-          fprintf(stdout, "%s\t%s\t%s MISSING from kmerLookup::exists(mer, value) - (not found)\n",
-                  seq.ident(),
-                  kiter.fmer().toString(fString),
-                  kiter.rmer().toString(rString));
-          exit(1);
-        }
-        if (value != kmerValue[cMer]) {
-          fprintf(stdout, "%s\t%s\t%s MISSING from kmerLookup::exists(mer, value) -  kmerLookup=%u kmerValue=%u\n",
-                  seq.ident(),
-                  kiter.fmer().toString(fString),
-                  kiter.rmer().toString(rString),
-                  kmerLookup.value(cMer),
-                  kmerCheck[cMer]);
-          exit(1);
-#ifdef SHOW_SUCCESS
-        } else {
-          fprintf(stdout, "%s\t%s\t%s FOUND in kmerLookup::exists(mer, value)\n",
-                  seq.ident(),
-                  kiter.fmer().toString(fString),
-                  kiter.rmer().toString(rString));
-#endif
-        }
-
-        if (kmerLookup.value(cMer) != kmerValue[cMer]) {
-          fprintf(stdout, "%s\t%s\t%s MISSING from kmerLookup::value() -- kmerLookup=%u kmerValue=%u\n",
-                  seq.ident(),
-                  kiter.fmer().toString(fString),
-                  kiter.rmer().toString(rString),
-                  kmerLookup.value(cMer),
-                  kmerCheck[cMer]);
-          exit(1);
-#ifdef SHOW_SUCCESS
-        } else {
-          fprintf(stdout, "%s\t%s\t%s FOUND in kmerLookup::value()\n",
-                  seq.ident(),
-                  kiter.fmer().toString(fString),
-                  kiter.rmer().toString(rString));
-#endif
-        }
-
-        //  Subtract one from the kmer check counters.  If this is zero, the
-        //  kmerIterator returned too many kmers.
-
-        if (kmerCheck[cMer] == 0) {
-          fprintf(stdout, "%s\t%s\t%s ZERO\n",
-                  seq.ident(),
-                  kiter.fmer().toString(fString),
-                  kiter.rmer().toString(rString));
-          exit(1);
-        }
-
-        --kmerCheck[cMer];
-
-        //  Log.
-
-        if ((++nTest % 100000) == 0)
-          fprintf(stderr, "==    Tested %lu kmers.\r", nTest);
-      }
-    }
-  }
-
-  delete seqFile;
-
-  //  Check that all values are zero.
-
-  fprintf(stderr, "\n");
-  fprintf(stderr, "==\n");
-  fprintf(stderr, "==  Checking all kmers were seen.\n");
-  fprintf(stderr, "==\n");
-
-  for (auto it=kmerCheck.begin(); it != kmerCheck.end(); it++) {
-    kmer    k = it->first;
-    uint32  v = it->second;
-
-    if (v != 0) {
-      char   kmerString[64];
-
-      fprintf(stderr, "%s\t%u\n", k.toString(kmerString), v);
-    }
-  }
-
-  fprintf(stderr, "\n");
-  fprintf(stderr, "Success!\n");
-  fprintf(stderr, "\n");
-
-  exit(0);
-}
diff --git a/ext/meryl/src/tests/merylExactLookupTest.mk b/ext/meryl/src/tests/merylExactLookupTest.mk
deleted file mode 100644
index 6273728..0000000
--- a/ext/meryl/src/tests/merylExactLookupTest.mk
+++ /dev/null
@@ -1,8 +0,0 @@
-TARGET   := merylExactLookupTest
-SOURCES  := merylExactLookupTest.C \
-
-SRC_INCDIRS  := . ../utility/src/utility
-
-TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
diff --git a/ext/meryl/src/utility/README.licenses b/ext/meryl/src/utility/README.licenses
index 05c4804..1b646a6 100644
--- a/ext/meryl/src/utility/README.licenses
+++ b/ext/meryl/src/utility/README.licenses
@@ -137,108 +137,3 @@ For libbacktrace:
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
---
-For SSW Library (align-ssw.C and align-ssw.H):
-
-  The MIT License
-
-  Copyright (c) 2012-2015 Boston College.
-
-  Permission is hereby granted, free of charge, to any person obtaining
-  a copy of this software and associated documentation files (the
-  "Software"), to deal in the Software without restriction, including
-  without limitation the rights to use, copy, modify, merge, publish,
-  distribute, sublicense, and/or sell copies of the Software, and to
-  permit persons to whom the Software is furnished to do so, subject to
-  the following conditions:
-
-  The above copyright notice and this permission notice shall be
-  included in all copies or substantial portions of the Software.
-
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-  SOFTWARE.
-
-
-  The 2-clause BSD License
-
-  Copyright 2006 Michael Farrar.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions are
-  met:
-
-  1. Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-  2. Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
---
-For parasail:
-
-  Pairwise Sequence Alignment Library (parasail)
-
-  Copyright (c) 2015, Battelle Memorial Institute
-
-  1.  Battelle Memorial Institute (hereinafter Battelle) hereby grants
-      permission to any person or entity lawfully obtaining a copy of this
-      software and associated documentation files (hereinafter “the
-      Software”) to redistribute and use the Software in source and binary
-      forms, with or without modification.  Such person or entity may use,
-      copy, modify, merge, publish, distribute, sublicense, and/or sell
-      copies of the Software, and may permit others to do so, subject to
-      the following conditions:
-
-      - Redistributions of source code must retain the above copyright
-        notice, this list of conditions and the following disclaimers.
-
-      - Redistributions in binary form must reproduce the above copyright
-        notice, this list of conditions and the following disclaimer in
-        the documentation and/or other materials provided with the
-        distribution.
-
-      - Other than as used herein, neither the name Battelle Memorial
-        Institute or Battelle may be used in any form whatsoever without
-        the express written consent of Battelle.
-
-      - Redistributions of the software in any form, and publications
-        based on work performed using the software should include the
-        following citation as a reference:
-
-      Daily, Jeff. (2016). Parasail: SIMD C library for global,
-      semi-global, and local pairwise sequence alignments. *BMC
-      Bioinformatics*, 17(1), 1-11.  doi:10.1186/s12859-016-0930-z
-
-  2.  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-      "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-      LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-      FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BATTELLE
-      OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-      SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-      LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-      USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-      ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-      OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-      OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-      SUCH DAMAGE.
-
diff --git a/ext/meryl/src/utility/scripts/version_update.pl b/ext/meryl/src/utility/scripts/version_update.pl
index 8dd8f3f..57c782b 100755
--- a/ext/meryl/src/utility/scripts/version_update.pl
+++ b/ext/meryl/src/utility/scripts/version_update.pl
@@ -44,8 +44,6 @@
 my $branch   = "master";
 my $version  = "v$major.$minor";
 
-my @submodules;
-
 my $commits  = undef;
 my $hash1    = undef;          #  This from 'git describe'
 my $hash2    = undef;          #  This from 'git rev-list'
@@ -87,12 +85,7 @@
 
             $version = "v$major.$minor";
         } else {
-            $major   = "0";
-            $minor   = "0";
-            $commits = "0";
-            $hash1   = $_;
-
-            $version = "v$major.$minor";
+            die "Failed to parse describe string '$_'.\n";
         }
     }
     close(F);
@@ -139,16 +132,6 @@
         $label   = "branch";
         $version = $branch;
     }
-
-
-    #  Get information on any submodules here.
-    open(F, "git submodule status |");
-    while (<F>) {
-        if (m/^(.*)\s+(.*)\s+\((.*)\)$/) {
-            push @submodules, "$2 $3 $1";
-        }
-    }
-    close(F);
 }
 
 
@@ -168,12 +151,11 @@
 #  Report what we found.  This is really for the gmake output.
 
 if (defined($commits)) {
-    print "\$(info Building $label $version +$commits changes (r$revCount $hash1) ($dirty))\n";
-    foreach my $s (@submodules) {
-        print "\$(info \$(space)         $s)\n";
-    }
+    print STDERR "Building $label $version +$commits changes (r$revCount $hash1) ($dirty)\n";
+    print STDERR "\n";
 } else {
-    print "\$(info Building $label $version)\n";
+    print STDERR "Building $label $version\n";
+    print STDERR "\n";
 }
 
 #  Dump a new file, but don't overwrite the original.
diff --git a/ext/meryl/src/utility/src/Makefile b/ext/meryl/src/utility/src/Makefile
index 6939127..641014e 100644
--- a/ext/meryl/src/utility/src/Makefile
+++ b/ext/meryl/src/utility/src/Makefile
@@ -27,6 +27,11 @@
 #       instances of "$" within them need to be escaped with a second "$" to
 #       accomodate the double expansion that occurs when eval is invoked.
 
+#  Before doing ANYTHING, initialize submodules.
+#ifeq ($(wildcard utility/src/Makefile), )
+#  $(info $(shell git submodule update --init utility))
+#  $(info $(space))
+#endif
 
 # ADD_CLEAN_RULE - Parameterized "function" that adds a new rule and phony
 #   target for cleaning the specified target (removing its build-generated
@@ -338,9 +343,8 @@ DIR_STACK :=
 INCDIRS :=
 TGT_STACK :=
 
-# Discover our OS and architecture.  These were previously used to set
-# BUILD_DIR and TARGET_DIR to allow multi-platform builds.  DESTDIR will do
-# that for us too.
+# Discover our OS and architecture.  These are used to set the BUILD_DIR and TARGET_DIR to
+# something more useful than 'build' and '.'.
 
 OSTYPE      := $(shell echo `uname`)
 OSVERSION   := $(shell echo `uname -r`)
@@ -365,18 +369,31 @@ ifeq (${OSTYPE}, SunOS)
   endif
 endif
 
-#  Set paths for building and installing.  If DESTDIR doesn't exist, use the
-#  directory just above us.
+#  Some filesystems cannot use < or > in file names, but for reasons unknown
+#  (or, at least, reasons we're not going to admit to), files in the overlap
+#  store are named ####<###>.  Enabling POSIX_FILE_NAMES Will change the
+#  names to ####.###.
+#
+#  Be aware this will break object store compatibility.
+#
+ifeq ($(POSIX_FILE_NAMES), 1)
+  CXXFLAGS += -DPOSIX_FILE_NAMES
 
-ifeq "$(strip ${DESTDIR})" ""
-  BUILD_DIR    := $(realpath ..)/build/obj
-  TARGET_DIR   := $(realpath ..)/build
 else
-  BUILD_DIR    := $(DESTDIR)/$(MODULE)/build/obj
-  TARGET_DIR   := $(DESTDIR)/$(MODULE)/build
+  #  Try to create non-<posix> file names.  It's tempting to use 'wildcard' instead
+  #  of the 'ls', but it doesn't work.
+  $(shell touch "non-<posix>-name" > /dev/null 2>&1)
+
+  ifeq (non-<posix>-name, $(shell ls "non-<posix>-name" 2> /dev/null))
+    #$(info Extended POSIX filenames allowed.)
+  else
+    #$(info POSIX filenames required.)
+    CXXFLAGS += -DPOSIX_FILE_NAMES
+  endif
+
+  $(shell rm -f "non-<posix>-name")
 endif
 
-#
 #  Set compiler and flags based on discovered hardware
 #
 #  By default, debug symbols are included in all builds (even optimized).
@@ -393,6 +410,7 @@ endif
 #  BUILDJEMALLOC will enable jemalloc library support.
 #
 
+
 ifeq ($(origin CXXFLAGS), undefined)
   ifeq ($(BUILDOPTIMIZED), 1)
   else
@@ -421,15 +439,6 @@ ifeq ($(origin CXXFLAGS), undefined)
   CXXFLAGS += -Wno-deprecated-declarations
   CXXFLAGS += -Wno-format-truncation
   CXXFLAGS += -std=c++11
-
-  CFLAGS += -Wall -Wextra -Wformat
-  CFLAGS += -Wno-char-subscripts
-  CFLAGS += -Wno-sign-compare
-  CFLAGS += -Wno-unused-function
-  CFLAGS += -Wno-unused-parameter
-  CFLAGS += -Wno-unused-variable
-  CFLAGS += -Wno-deprecated-declarations
-  CFLAGS += -Wno-format-truncation
 else
   CXXFLAGSUSER := ${CXXFLAGS}
 endif
@@ -472,26 +481,6 @@ ifeq (${OSTYPE}, Darwin)
     endif
   endif
 
-  ifeq ($(CC), cc)
-    CC8    := $(shell echo `which gcc-mp-8`)
-    CXX8   := $(shell echo `which g++-mp-8`)
-
-    ifdef CXX8
-      CC  := $(CC8)
-      CXX := $(CXX8)
-    endif
-  endif
-
-  ifeq ($(CC), cc)
-    CC9    := $(shell echo `which gcc-mp-9`)
-    CXX9   := $(shell echo `which g++-mp-9`)
-
-    ifdef CXX9
-      CC  := $(CC9)
-      CXX := $(CXX9)
-    endif
-  endif
-
   ifeq ($(CC), cc)
     CC8    := $(shell echo `which gcc-7`)
 	  CXX8   := $(shell echo `which g++-7`)
@@ -512,16 +501,6 @@ ifeq (${OSTYPE}, Darwin)
     endif
   endif
 
-  ifeq ($(CC), cc)
-    CC9    := $(shell echo `which gcc-9`)
-	  CXX9   := $(shell echo `which g++-9`)
-
-    ifdef CXX9
-      CC  := $(CC9)
-      CXX := $(CXX9)
-    endif
-  endif
-
   ifneq ($(shell echo `$(CXX) --version 2>&1 | grep -c clang`), 0)
      CPATH := $(shell echo `which $(CXX)`)
      CLANG := $(shell echo `$(CXX) --version 2>&1 | grep clang`)
@@ -560,12 +539,9 @@ ifeq (${CANU_BUILD_ENV}, ports)
 
 else
 
-  # Ignore the gmake default 'c++' and force g++9.
-	ifeq ($(origin CXX), default)
-    CC    = gcc9
-    CXX   = g++9
-    CCLIB = -rpath /usr/local/lib/gcc9
-  endif
+  CC       ?= gcc6
+  CXX      ?= g++6
+  CCLIB    ?= -rpath /usr/local/lib/gcc6
 
   #  GCC
   CXXFLAGS  += -I/usr/local/include -pthread -fopenmp -fPIC
@@ -730,14 +706,16 @@ $(foreach TGT,${ALL_TGTS},\
 $(foreach TGT,${ALL_TGTS},\
   $(eval -include ${${TGT}_DEPS}))
 
+	@if [ ! -e ${TARGET_DIR}/bin ]                ; then mkdir -p ${TARGET_DIR}/bin                ; fi
+
 #  Makefile processed.  Regenerate the version number file, make some
 #  directories, and report that we're starting the build.
 
-$(eval $(shell ../scripts/version_update.pl $(MODULE) utility/version.H))
+$(shell ../scripts/version_update.pl meryl-utility utility/version.H)
 
 $(shell mkdir -p ${TARGET_DIR}/bin)
 
-$(info For '${OSTYPE}' '${OSVERSION}' as '${MACHINETYPE}' into '${TARGET_DIR}/{bin,obj}'.)
+$(info For '${OSTYPE}' '${OSVERSION}' as '${MACHINETYPE}' into '${DESTDIR}${PREFIX}/$(OSTYPE)-$(MACHINETYPE)/{bin,obj}'.)
 $(info Using '$(shell which ${CXX})' version '${GXX_VV}'.)
 ifneq ($(origin CXXFLAGSUSER), undefined)
 $(info Using user-supplied CXXFLAGS '${CXXFLAGSUSER}'.)
diff --git a/ext/meryl/src/utility/src/main.mk b/ext/meryl/src/utility/src/main.mk
index ad3c585..c607e16 100644
--- a/ext/meryl/src/utility/src/main.mk
+++ b/ext/meryl/src/utility/src/main.mk
@@ -1,13 +1,31 @@
-MODULE       :=    meryl-utility
-TARGET       := libmeryl-utility.a
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+
+ifeq "$(strip ${DESTDIR})" ""
+  DESTDIR      :=
+endif
+
+ifeq "$(strip ${PREFIX})" ""
+  ifeq "$(strip ${DESTDIR})" ""
+    PREFIX     := $(realpath ..)
+  else
+    PREFIX     := /canu
+  endif
+endif
+
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := $(DESTDIR)$(PREFIX)/$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := $(DESTDIR)$(PREFIX)/$(OSTYPE)-$(MACHINETYPE)
+endif
+
+TARGET       := libcanu.a
+
 SOURCES      := utility/runtime.C \
                 \
-                utility/align-ksw2-extz.C \
-                utility/align-ksw2-extz2-sse.C \
-                utility/align-ksw2-driver.C \
-                utility/align-ssw.C \
-                utility/align-ssw-driver.C \
-                utility/align-parasail-driver.C \
                 utility/edlib.C \
                 \
                 utility/files.C \
@@ -36,22 +54,13 @@ SOURCES      := utility/runtime.C \
                 utility/kmers.C \
                 \
                 utility/bits.C \
-                utility/bits-wordArray.C \
                 \
                 utility/hexDump.C \
                 utility/md5.C \
                 utility/mt19937ar.C \
+                utility/objectStore.C \
                 utility/speedCounter.C \
-                utility/sweatShop.C \
-                \
-                parasail/cpuid.c \
-                parasail/memory.c \
-                parasail/sg.c \
-                parasail/sg_trace.c \
-                parasail/sg_qx_dispatch.c \
-                parasail/sg_qb_de_dispatch.c \
-                parasail/sg_qe_db_dispatch.c \
-                parasail/cigar.c
+                utility/sweatShop.C
 
 
 ifeq (${BUILDSTACKTRACE}, 1)
@@ -73,24 +82,14 @@ endif
 
 
 SRC_INCDIRS  := . \
-                utility \
-                parasail
+                utility
 
 SUBMAKEFILES := 
 
 ifeq ($(BUILDTESTS), 1)
-SUBMAKEFILES += tests/alignTest-ssw.mk \
-                tests/alignTest-ksw2.mk \
-                tests/bitsTest.mk \
+SUBMAKEFILES += tests/bitsTest.mk \
                 tests/filesTest.mk \
                 tests/intervalListTest.mk \
-                tests/intervalsTest.mk \
                 tests/loggingTest.mk \
-                tests/magicNumber.mk \
-                tests/parasailTest.mk \
-                tests/readLines.mk \
-                tests/sequenceTest.mk \
-                tests/stddevTest.mk \
-                tests/systemTest.mk \
-                tests/typesTest.mk
+                tests/stddevTest.mk
 endif
diff --git a/ext/meryl/src/utility/src/tests/alignTest-ksw2.C b/ext/meryl/src/utility/src/tests/alignTest-ksw2.C
index 0eab0b3..1e6504a 100644
--- a/ext/meryl/src/utility/src/tests/alignTest-ksw2.C
+++ b/ext/meryl/src/utility/src/tests/alignTest-ksw2.C
@@ -49,8 +49,8 @@ int
 main(int argc, char **argv) {
   dnaSeqFile  *fileA, *fileB;
   dnaSeq       dseqA,  dseqB;
-  char const  *seqA = nullptr;
-  char const  *seqB = nullptr;
+  char        *seqA = nullptr;
+  char        *seqB = nullptr;
 
   //fprintf(stderr, "A -> %2u -> %c\n", encode2bitBase('A'), decode2bitBase(0));
   assert(encode2bitBase('A') == 0);
diff --git a/ext/meryl/src/utility/src/tests/bitsTest.C b/ext/meryl/src/utility/src/tests/bitsTest.C
index 4403d6d..48b8b31 100644
--- a/ext/meryl/src/utility/src/tests/bitsTest.C
+++ b/ext/meryl/src/utility/src/tests/bitsTest.C
@@ -18,7 +18,6 @@
  */
 
 #include "bits.H"
-#include "strings.H"
 #include "mt19937ar.H"
 
 char          b1[65];
@@ -26,25 +25,6 @@ char          b2[65];
 char          b3[65];
 
 
-void
-testMasks(void) {
-  uint128  m128;
-  uint64   m64;
-  uint32   m32;
-  uint16   m16;
-  uint8    m8;
-
-  for (uint32 ii=0; ii<=128; ii++) {
-    fprintf(stderr, "%3d: %s %s  %s %s  %s %s  %s %s  %s %s\n", ii,
-            toHex(buildLowBitMask<uint128>(ii)), toHex(buildHighBitMask<uint128>(ii)),
-            toHex(buildLowBitMask<uint64> (ii)), toHex(buildHighBitMask<uint64> (ii)),
-            toHex(buildLowBitMask<uint32> (ii)), toHex(buildHighBitMask<uint32> (ii)),
-            toHex(buildLowBitMask<uint16> (ii)), toHex(buildHighBitMask<uint16> (ii)),
-            toHex(buildLowBitMask<uint8>  (ii)), toHex(buildHighBitMask<uint8>  (ii)));
-  }
-}
-
-
 void
 testLogBaseTwo(void) {
   uint64  val = 0;
@@ -127,7 +107,7 @@ testBitArray(uint64 maxLength) {
 
 void
 testWordArray(uint64 wordSize) {
-  wordArray  *wa = new wordArray(wordSize, 8 * 64, false);
+  wordArray  *wa = new wordArray(wordSize, 8 * 64);
 
   for (uint32 ii=0; ii<1000; ii++)
     wa->set(ii, 0xffffffff);
@@ -138,9 +118,7 @@ testWordArray(uint64 wordSize) {
   wa->show();
 
   for (uint32 ii=0; ii<1000; ii++)
-    assert(wa->get(ii) == (ii & buildLowBitMask<uint64>(wordSize)));
-
-  fprintf(stderr, "Passed!\n");
+    assert(wa->get(ii) == (ii & uint64MASK(wordSize)));
 
   delete wa;
 }
@@ -294,7 +272,7 @@ testPrefixFree(uint32 type) {
     length     += width[ii];
     histo[width[ii]]++;
 
-    random[ii]  =  mt.mtRandom64() & buildLowBitMask<uint64>(width[ii]);
+    random[ii]  =  mt.mtRandom64() & uint64MASK(width[ii]);
 
     if (random[ii] == 0)
       ii--;
@@ -451,10 +429,6 @@ main(int argc, char **argv) {
       err++;
     }
 
-    else if (strcmp(argv[arg], "-masks") == 0) {
-      testMasks();
-    }
-
     else if (strcmp(argv[arg], "-logbasetwo") == 0) {
       testLogBaseTwo();
     }
@@ -464,24 +438,19 @@ main(int argc, char **argv) {
     }
 
     else if (strcmp(argv[arg], "-bitarray") == 0) {
-      if (++arg >= argc)
-        fprintf(stderr, "ERROR: -bitarray needs word-size argument.\n"), exit(1);
+      uint64  maxLength = strtouint64(argv[++arg]);
 
-      testBitArray(strtouint64(argv[arg]));
+      testBitArray(maxLength);
     }
 
     else if (strcmp(argv[arg], "-wordarray") == 0) {
-      if (++arg >= argc)
-        fprintf(stderr, "ERROR: -wordarray needs word-size argument.\n"), exit(1);
+      uint64  wordSize = strtouint64(argv[++arg]);
 
-      testWordArray(strtouint64(argv[arg]));
+      testWordArray(wordSize);
     }
 
     else if (strcmp(argv[arg], "-unary") == 0) {
-      if (++arg >= argc)
-        fprintf(stderr, "ERROR: -unary needs max-size argument.\n"), exit(1);
-
-      uint32  maxSize = strtouint32(argv[arg]);
+      uint32  maxSize = strtouint32(argv[++arg]);
 
 #pragma omp parallel for
       for (uint32 xx=1; xx<=maxSize; xx++) {
diff --git a/ext/meryl/src/utility/src/tests/bitsTest.mk b/ext/meryl/src/utility/src/tests/bitsTest.mk
index 6d7a430..a29822a 100644
--- a/ext/meryl/src/utility/src/tests/bitsTest.mk
+++ b/ext/meryl/src/utility/src/tests/bitsTest.mk
@@ -1,8 +1,20 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET   := bitsTest
 SOURCES  := bitsTest.C
 
 SRC_INCDIRS := .. ../utility
 
 TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
+TGT_LDLIBS  := -lcanu
+TGT_PREREQS := libcanu.a
+
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/utility/src/tests/filesTest.mk b/ext/meryl/src/utility/src/tests/filesTest.mk
index db88fa6..469a199 100644
--- a/ext/meryl/src/utility/src/tests/filesTest.mk
+++ b/ext/meryl/src/utility/src/tests/filesTest.mk
@@ -1,8 +1,20 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET   := filesTest
 SOURCES  := filesTest.C
 
 SRC_INCDIRS := .. ../utility
 
 TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
+TGT_LDLIBS  := -lcanu
+TGT_PREREQS := libcanu.a
+
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/utility/src/tests/intervalListTest.mk b/ext/meryl/src/utility/src/tests/intervalListTest.mk
index 2a785cc..4ecfd65 100644
--- a/ext/meryl/src/utility/src/tests/intervalListTest.mk
+++ b/ext/meryl/src/utility/src/tests/intervalListTest.mk
@@ -1,8 +1,20 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET   := intervalListTest
 SOURCES  := intervalListTest.C
 
 SRC_INCDIRS := .. ../utility
 
 TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
+TGT_LDLIBS  := -lcanu
+TGT_PREREQS := libcanu.a
+
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/utility/src/tests/intervalsTest.C b/ext/meryl/src/utility/src/tests/intervalsTest.C
deleted file mode 100644
index 66501bb..0000000
--- a/ext/meryl/src/utility/src/tests/intervalsTest.C
+++ /dev/null
@@ -1,229 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl-utility, a collection of miscellaneous code
- *  used by Meryl, Canu and others.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "runtime.H"
-#include "intervals.H"
-
-#include "mt19937ar.H"
-
-void
-boringTest(void) {
-  bool              errors = false;
-  intervals<int32>  t1;
-
-  t1.add_span(11, -4);
-  t1.add_position(0, 10);
-  t1.add_span(8, 12);
-
-  errors |= ((t1.size() != 3) ||
-             (t1.bgn(0) != 7) || (t1.end(0) != 11) ||
-             (t1.bgn(1) != 0) || (t1.end(1) != 10) ||
-             (t1.bgn(2) != 8) || (t1.end(2) != 20));
-
-  if (errors) {
-    fprintf(stderr, "BEFORE:\n");
-    for (uint32 ii=0; ii<t1.size(); ii++)
-      fprintf(stderr, "%2d %3d-%3d\n", ii, t1.bgn(ii), t1.end(ii));
-  }
-
-  t1.squash(-1);
-
-  errors |= ((t1.size() != 1) ||
-             (t1.bgn(0) != 0) || (t1.end(0) != 20));
-
-  if (errors) {
-    fprintf(stderr, "AFTER:\n");
-    for (uint32 ii=0; ii<t1.size(); ii++)
-      fprintf(stderr, "%2d %3d-%3d\n", ii, t1.bgn(ii), t1.end(ii));
-  }
-
-  if (errors)
-    fprintf(stderr, "FAIL.\n");
-  else
-    fprintf(stderr, "Success!\n");
-}
-
-
-
-void
-invertTest(void) {
-  bool              errors = false;
-  intervals<int32>  t1;
-  intervals<int32>  t2;
-
-  t1.add_position(-30, -10);
-  t1.add_position( -5,  5);
-  t1.add_position( 10,  30);
-
-  t2.setToInversion(-20, 20, t1);
-
-  errors |= ((t2.size() != 4) ||
-             (t2.bgn(0) != -10) || (t2.end(0) !=  20) ||
-             (t2.bgn(1) != -20) || (t2.end(1) != -5) ||
-             (t2.bgn(2) !=   5) || (t2.end(2) !=  20) ||
-             (t2.bgn(3) != -20) || (t2.end(3) !=  10));
-
-  if (errors) {
-    fprintf(stderr, "BEFORE:\n");
-    for (uint32 ii=0; ii<t1.size(); ii++)
-      fprintf(stderr, "%2d %3d-%3d\n", ii, t1.bgn(ii), t1.end(ii));
-
-    fprintf(stderr, "AFTER:\n");
-    for (uint32 ii=0; ii<t2.size(); ii++)
-      fprintf(stderr, "%2d %3d-%3d\n", ii, t2.bgn(ii), t2.end(ii));
-  }
-
-  if (errors)
-    fprintf(stderr, "FAIL.\n");
-  else
-    fprintf(stderr, "Success!\n");
-}
-
-
-
-void
-expensiveTest(uint32 seed) {
-  mtRandom  mt(seed);
-
-  //  About 6.5 minutes per million, so this should be about an hour.
-  uint32  iterMax = 935000;
-
-  for (uint32 iter=0; iter<iterMax; iter++) {
-    uint32  numIntervals =     mt.mtRandom32() % 5000;
-    uint32  maxLen       = 1 + mt.mtRandom32() % 1000;
-    uint32  maxBgn       = 1 + mt.mtRandom32() % 50000;
-    uint32 *depth        = new uint32 [maxBgn + maxLen];
-
-    memset(depth, 0, sizeof(uint32) * (maxBgn + maxLen));
-
-    if (iter % 1000 == 0)
-      fprintf(stderr, "%10u/%10u: %3u intervals, each up to %4u long, coords up to %4u\n",
-              iter, iterMax,
-              numIntervals, maxLen, maxBgn);
-
-    intervals<uint32>  il;
-
-    //  Add intervals to the list.
-    //  Sum depths explicitly.
-    for (uint32 ii=0; ii<numIntervals; ii++) {
-      uint32  bgn = mt.mtRandom32() % maxBgn;   //  bgn between 0 and maxBgn
-      uint32  len = mt.mtRandom32() % maxLen;   //  len between 0 and maxLen
-      uint32  end = bgn + len;
-
-      if (mt.mtRandom32() < uint32max / 2)
-        il.add_span(bgn, len);
-      else
-        il.add_position(bgn, end);
-
-      for (uint32 xx=bgn; xx<end; xx++)
-        depth[xx]++;
-
-      //fprintf(stderr, "IL %u - %u\n", bgn, bgn+len);
-    }
-
-    //  Convert intervals to depths.
-
-    intervalsDepth<uint32>  de(il);
-
-    //  Over all the depth regions, subtract the computed depth from
-    //  the explicit depth.
-    for (uint32 xx=0; xx<de.size(); xx++) {
-      uint32  bgn = de.bgn(xx);
-      uint32  end = de.end(xx);
-      uint32  dpt = de.depth(xx);
-
-      //fprintf(stderr, "ID %u - %u depth %u\n", bgn, end, dpt);
-
-      for (uint32 cc=bgn; cc<end; cc++) {
-        //if (cc < 30)
-        //  fprintf(stderr, "depth[%u] = %u -> %u\n", cc, depth[cc], depth[cc] - dpt);
-        depth[cc] -= dpt;
-      }
-    }
-
-    //  Every explicit depth should now be zero, even the ones
-    //  not covered.
-    for (uint32 cc=0; cc<maxBgn + maxLen; cc++) {
-      if (depth[cc] != 0)
-        fprintf(stderr, "ERROR: depth[%u] = %u in iter %u\n", cc, depth[cc], iter);
-      assert(depth[cc] == 0);
-    }
-
-
-    delete [] depth;
-  }
-
-  fprintf(stderr, "Success!\n");
-}
-
-
-
-int
-main(int argc, char **argv) {
-  bool    doBoring      = false;
-  bool    doInvert      = false;
-  bool    doExpensive   = false;
-  uint32  expensiveSeed = 9;
-
-  AS_configure(argc, argv);
-
-  int arg=1;
-  int err=0;
-  while (arg < argc) {
-    if      (strcmp(argv[arg], "-boring") == 0) {
-      doBoring = true;
-    }
-
-    else if (strcmp(argv[arg], "-invert") == 0) {
-      doInvert = true;
-    }
-
-    else if (strcmp(argv[arg], "-expensive") == 0) {
-      doExpensive = true;
-
-      if (++arg < argc)
-        expensiveSeed = strtouint32(argv[arg]);
-    }
-
-    else {
-      err++;
-    }
-
-    arg++;
-  }
-
-  if ((doBoring    == false) &&
-      (doInvert    == false) &&
-      (doExpensive == false))
-    err++;
-
-  if (err) {
-    fprintf(stderr, "usage: %s [-boring] [-expensive seed]\n", argv[0]);
-    fprintf(stderr, "  -boring\n");
-    fprintf(stderr, "  -invert\n");
-    fprintf(stderr, "  -expensive [seed]\n");
-  }
-
-
-  if (doBoring)     boringTest();
-  if (doInvert)     invertTest();
-  if (doExpensive)  expensiveTest(expensiveSeed);
-
-  exit(0);
-}
diff --git a/ext/meryl/src/utility/src/tests/intervalsTest.mk b/ext/meryl/src/utility/src/tests/intervalsTest.mk
deleted file mode 100644
index b873410..0000000
--- a/ext/meryl/src/utility/src/tests/intervalsTest.mk
+++ /dev/null
@@ -1,8 +0,0 @@
-TARGET   := intervalsTest
-SOURCES  := intervalsTest.C
-
-SRC_INCDIRS := .. ../utility
-
-TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
diff --git a/ext/meryl/src/utility/src/tests/loggingTest.mk b/ext/meryl/src/utility/src/tests/loggingTest.mk
index b4c734b..9a095b6 100644
--- a/ext/meryl/src/utility/src/tests/loggingTest.mk
+++ b/ext/meryl/src/utility/src/tests/loggingTest.mk
@@ -1,8 +1,20 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET   := loggingTest
 SOURCES  := loggingTest.C
 
 SRC_INCDIRS := .. ../utility
 
 TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
+TGT_LDLIBS  := -lcanu
+TGT_PREREQS := libcanu.a
+
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/utility/src/tests/magicNumber.C b/ext/meryl/src/utility/src/tests/magicNumber.C
deleted file mode 100644
index 6980562..0000000
--- a/ext/meryl/src/utility/src/tests/magicNumber.C
+++ /dev/null
@@ -1,84 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl-utility, a collection of miscellaneous code
- *  used by Meryl, Canu and others.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "types.H"
-
-int
-main(int argc, char **argv) {
-
-  if (argc != 2) {
-    fprintf(stderr, "usage: %s text-file\n", argv[0]);
-    fprintf(stderr, "  Converts the input text-file into 64-bit and 32-bit integer\n");
-    fprintf(stderr, "  constants, for use as magic numbers in data files.  If you then\n");
-    fprintf(stderr, "  write this integer constant to a file, it'll appear as readable\n");
-    fprintf(stderr, "  text in the file.  The input file is limited to 4 KB.\n");
-    return(1);
-  }
-
-  uint32  ccLen = 0;
-  uint32  ccMax = 4096;
-  char   *cc    = new char [ccMax];
-  FILE   *F;
-
-  memset(cc, 0, ccMax);
-
-  F = fopen(argv[1], "r");
-  ccLen = fread(cc, sizeof(char), 4096, F);
-  fclose(F);
-
-  F = fopen(argv[1], "r");
-  for (uint32 ii=0, nn=0; ii<ccLen; ii += 8, nn++) {
-    uint64 u64;
-
-    fread(&u64, sizeof(uint64), 1, F);       //  You can get away with only the char array,
-    assert(u64 == *((uint64 *)(cc + ii)));   //  but I'm not sure what will happen on big-endian.
-
-    fprintf(stdout, "uint64 u64_%02u = 0x%016lxllu;  //  %c%c%c%c%c%c%c%c\n",
-            nn, u64, 
-            integerToLetter(cc[ii+0]),
-            integerToLetter(cc[ii+1]),
-            integerToLetter(cc[ii+2]),
-            integerToLetter(cc[ii+3]),
-            integerToLetter(cc[ii+4]),
-            integerToLetter(cc[ii+5]),
-            integerToLetter(cc[ii+6]),
-            integerToLetter(cc[ii+7]));
-  }
-  fclose(F);
-
-  F = fopen(argv[1], "r");
-  for (uint32 ii=0, nn=0; ii<ccLen; ii += 4, nn++) {
-    uint32 u32;
-
-    fread(&u32, sizeof(uint32), 1, F);
-    assert(u32 == *((uint32 *)(cc + ii)));
-
-    fprintf(stdout, "uint32 u32_%02u = 0x%08xlu;  //  %c%c%c%c\n",
-            nn, u32,
-            integerToLetter(cc[ii+0]),
-            integerToLetter(cc[ii+1]),
-            integerToLetter(cc[ii+2]),
-            integerToLetter(cc[ii+3]));
-  }
-  fclose(F);
-
-  delete [] cc;
-
-  return(0);
-}
diff --git a/ext/meryl/src/utility/src/tests/magicNumber.mk b/ext/meryl/src/utility/src/tests/magicNumber.mk
deleted file mode 100644
index 826611a..0000000
--- a/ext/meryl/src/utility/src/tests/magicNumber.mk
+++ /dev/null
@@ -1,8 +0,0 @@
-TARGET   := magicNumber
-SOURCES  := magicNumber.C
-
-SRC_INCDIRS := .. ../utility
-
-TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
diff --git a/ext/meryl/src/utility/src/tests/parasailTest.C b/ext/meryl/src/utility/src/tests/parasailTest.C
index e6ba5e1..c760090 100644
--- a/ext/meryl/src/utility/src/tests/parasailTest.C
+++ b/ext/meryl/src/utility/src/tests/parasailTest.C
@@ -104,7 +104,6 @@ explicitCallParasail(char const *seqA, uint32 lenA,
   printCigar(result, matrix, seqA, lenA, seqB, lenB);
   parasail_result_free(result);
 
-#if 0
   fprintf(stderr, "\n--\n");
   fprintf(stderr, "vector\n");
   bgn = getTime();
@@ -136,7 +135,7 @@ explicitCallParasail(char const *seqA, uint32 lenA,
   fprintf(stderr, "%.3f seconds  score %d\n", getTime() - bgn, parasail_result_get_score(result));
   printCigar(result, matrix, seqA, lenA, seqB, lenB);
   parasail_result_free(result);
-#endif
+
 
   parasail_matrix_free(matrix);
 }
diff --git a/ext/meryl/src/utility/src/tests/readLines.mk b/ext/meryl/src/utility/src/tests/readLines.mk
deleted file mode 100644
index f6e35f8..0000000
--- a/ext/meryl/src/utility/src/tests/readLines.mk
+++ /dev/null
@@ -1,8 +0,0 @@
-TARGET   := readLines
-SOURCES  := readLines.C
-
-SRC_INCDIRS := .. ../utility
-
-TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
diff --git a/ext/meryl/src/utility/src/tests/sequenceTest.C b/ext/meryl/src/utility/src/tests/sequenceTest.C
deleted file mode 100644
index efcc88f..0000000
--- a/ext/meryl/src/utility/src/tests/sequenceTest.C
+++ /dev/null
@@ -1,66 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl-utility, a collection of miscellaneous code
- *  used by Meryl, Canu and others.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "sequence.H"
-
-int
-main(int argc, char **argv) {
-  FILE *O;
-
-  O = AS_UTL_openOutputFile("sequenceTest.data.fasta");
-  fprintf(O, ">name\n");
-  fprintf(O, "ACGT\n");
-  fprintf(O, ">  name   \n");
-  fprintf(O, "ACGT\n");
-  fprintf(O, ">  name   flags\n");
-  fprintf(O, "ACGT\n");
-  fprintf(O, ">  name   f l a g s    \n");
-  fprintf(O, "ACGT\n");
-  AS_UTL_closeFile(O);
-
-  dnaSeqFile  F("sequenceTest.data.fasta");
-  dnaSeq      S;
-
-  F.loadSequence(S);
-  assert(strcmp(S.ident(), "name")      == 0);
-  assert(strcmp(S.flags(), "")          == 0);
-  assert(strcmp(S.bases(), "ACGT")      == 0);
-
-  F.loadSequence(S);
-  assert(strcmp(S.ident(), "name")      == 0);
-  assert(strcmp(S.flags(), "")          == 0);
-  assert(strcmp(S.bases(), "ACGT")      == 0);
-
-  F.loadSequence(S);
-  assert(strcmp(S.ident(), "name")      == 0);
-  assert(strcmp(S.flags(), "flags")     == 0);
-  assert(strcmp(S.bases(), "ACGT")      == 0);
-
-  F.loadSequence(S);
-  assert(strcmp(S.ident(), "name")      == 0);
-  assert(strcmp(S.flags(), "f l a g s") == 0);
-  assert(strcmp(S.bases(), "ACGT")      == 0);
-
-  AS_UTL_unlink("sequenceTest.data.fasta");
-
-  fprintf(stderr, "Success!\n");
-
-  return(0);
-}
-
diff --git a/ext/meryl/src/utility/src/tests/sequenceTest.mk b/ext/meryl/src/utility/src/tests/sequenceTest.mk
deleted file mode 100644
index ebd7f16..0000000
--- a/ext/meryl/src/utility/src/tests/sequenceTest.mk
+++ /dev/null
@@ -1,8 +0,0 @@
-TARGET   := sequenceTest
-SOURCES  := sequenceTest.C
-
-SRC_INCDIRS := .. ../utility
-
-TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
diff --git a/ext/meryl/src/utility/src/tests/stddevTest.C b/ext/meryl/src/utility/src/tests/stddevTest.C
index cda859e..184cfd6 100644
--- a/ext/meryl/src/utility/src/tests/stddevTest.C
+++ b/ext/meryl/src/utility/src/tests/stddevTest.C
@@ -108,6 +108,7 @@ testBig(uint32 nSamples) {
 
   fprintf(stderr, "\n");
   fprintf(stderr, "testBig for nSamples %u\n", nSamples);
+  fprintf(stderr, "\n");
 
   for (uint32 ii=0; ii<nSamples; ii++) {
     uint32  val = (mt.mtRandom32() % 10);
@@ -120,79 +121,9 @@ testBig(uint32 nSamples) {
 
   hist.finalizeData();
 
-  fprintf(stderr, "  size:   %lu\n", hist.numberOfObjects());
-  fprintf(stderr, "  mean:   %f +- %f\n", hist.mean(), hist.stddev());
-  fprintf(stderr, "  median: %lu +- %lu\n", hist.median(), hist.mad());
-}
-
-
-
-//  This is testing for an odd bit of apparent numerical instability where
-//  the second to last remove() resulted in a negative variance.  d=0.0019
-//  was the original case, but many others failed too.
-void
-testStability(void) {
-  double sum = 0.0;
-
-  fprintf(stderr, "\n");
-  fprintf(stderr, "testStability (shouldn't crash)\n");
-
-  for (double d = 0.0000; d < 0.5000; d += 0.0001) {
-    stdDev<double>  sd;
-
-    sd.insert(0.000000);
-    sd.insert(d);
-    sd.insert(0.000000);
-
-    sd.remove(0.000000);
-    sd.remove(0.000000);  //  Fails here; the two if's in remove() resolve.
-
-    sum += sd.mean();     //  Add d.
-
-    assert(d - 0.00001 <= sd.mean());
-    assert(sd.mean() <= d + 0.00001);
-    assert(sd.variance() == 0.0);
-
-    sd.remove(d);
-
-    sum += sd.mean();     //  Add zero.
-
-    assert(sd.mean()     == 0.0);
-    assert(sd.variance() == 0.0);
-  }
-
-  fprintf(stderr, "  %18.16f\n", sum);
-}
-
-
-
-//  Same idea, but this one fails before we hit the
-//  reset for one item.  Grrrr!
-void
-testStability2(uint32 n) {
-  double sum = 0.0;
-  stdDev<double>  sd;
-
-  if (n == 1) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "testStability2 (values should be positive zero)\n");
-  }
-
-  for (uint32 ii=0; ii<n; ii++)
-    sd.insert(0.000000);
-
-  sd.insert(0.000190);
-  sd.remove(0.000190);
-  fprintf(stderr, "%2u  %26.24f\n", n, sd.variance());
-  assert(sd.variance() >= 0.0);
-
-  sd.insert(0.000220);
-  sd.remove(0.000220);
-  fprintf(stderr, "%2u  %26.24f\n", n, sd.variance());
-  assert(sd.variance() >= 0.0);
-
-  for (uint32 ii=0; ii<n; ii++)
-    sd.remove(0.000000);
+  fprintf(stderr, "size:   %lu\n", hist.numberOfObjects());
+  fprintf(stderr, "mean:   %f +- %f\n", hist.mean(), hist.stddev());
+  fprintf(stderr, "median: %lu +- %lu\n", hist.median(), hist.mad());
 }
 
 
@@ -212,15 +143,5 @@ main(int argc, char **argv) {
   testBig(100);
   testBig(1000);
 
-  testStability();
-
-  testStability2(1);
-  testStability2(2);
-  testStability2(3);
-  testStability2(4);
-
-  fprintf(stderr, "\n");
-  fprintf(stderr, "Success!\n");
-
   exit(0);
 }
diff --git a/ext/meryl/src/utility/src/tests/stddevTest.mk b/ext/meryl/src/utility/src/tests/stddevTest.mk
index df94ab2..5678fa4 100644
--- a/ext/meryl/src/utility/src/tests/stddevTest.mk
+++ b/ext/meryl/src/utility/src/tests/stddevTest.mk
@@ -1,8 +1,20 @@
+
+#  If 'make' isn't run from the root directory, we need to set these to
+#  point to the upper level build directory.
+ifeq "$(strip ${BUILD_DIR})" ""
+  BUILD_DIR    := ../$(OSTYPE)-$(MACHINETYPE)/obj
+endif
+ifeq "$(strip ${TARGET_DIR})" ""
+  TARGET_DIR   := ../$(OSTYPE)-$(MACHINETYPE)
+endif
+
 TARGET   := stddevTest
 SOURCES  := stddevTest.C
 
 SRC_INCDIRS := .. ../utility
 
 TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
+TGT_LDLIBS  := -lcanu
+TGT_PREREQS := libcanu.a
+
+SUBMAKEFILES :=
diff --git a/ext/meryl/src/utility/src/tests/typesTest.C b/ext/meryl/src/utility/src/tests/typesTest.C
deleted file mode 100644
index 572d7f9..0000000
--- a/ext/meryl/src/utility/src/tests/typesTest.C
+++ /dev/null
@@ -1,135 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl-utility, a collection of miscellaneous code
- *  used by Meryl, Canu and others.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "types.H"
-#include "strings.H"
-
-char const *minu128 = "0";
-char const *maxu128 = "340282366920938463463374607431768211455";
-
-char const *minu64 = "0";
-char const *maxu64 = "18446744073709551615";
-
-char const *minu32 = "0";
-char const *maxu32 = "4294967295";
-
-char const *minu16 = "0";
-char const *maxu16 = "65535";
-
-char const *minu8 = "0";
-char const *maxu8 = "255";
-
-
-char const *min128 = "-170141183460469231731687303715884105728";
-char const *max128 = "+170141183460469231731687303715884105727";
-
-char const *min64 = "-9223372036854775808";
-char const *max64 = "+9223372036854775807";
-
-char const *min32 = "-2147483648";
-char const *max32 =  "2147483647";
-
-char const *min16 = "-32768";
-char const *max16 =  "32767";
-
-char const *min8 = "-128";
-char const *max8 =  "127";
-
-
-
-
-bool
-test_strto(void) {
-
-  fprintf(stderr, "Testing conversion of string to unsigned integers.\n");
-
-  assert(strtouint128(minu128) == uint128min);
-  assert(strtouint128(maxu128) == uint128max);
-
-  assert(strtouint64(minu64) == uint64min);
-  assert(strtouint64(maxu64) == uint64max);
-
-  assert(strtouint32(minu32) == uint32min);
-  assert(strtouint32(maxu32) == uint32max);
-
-  assert(strtouint16(minu16) == uint16min);
-  assert(strtouint16(maxu16) == uint16max);
-
-  assert(strtouint8(minu8) == uint8min);
-  assert(strtouint8(maxu8) == uint8max);
-
-  fprintf(stderr, "Testing conversion of string to signed integers.\n");
-
-  assert(strtoint128(min128) == int128min);
-  assert(strtoint128(max128) == int128max);
-
-  assert(strtoint64(min64) == int64min);
-  assert(strtoint64(max64) == int64max);
-
-  assert(strtoint32(min32) == int32min);
-  assert(strtoint32(max32) == int32max);
-
-  assert(strtoint16(min16) == int16min);
-  assert(strtoint16(max16) == int16max);
-
-  assert(strtoint8(min8) == int8min);
-  assert(strtoint8(max8) == int8max);
-
-  fprintf(stderr, "Tests passed.\n");
-
-  return(true);
-}
-
-
-
-
-
-
-
-int
-main(int argc, char **argv) {
-  int32 arg=1;
-  int32 err=0;
-
-  omp_set_num_threads(1);
-
-  while (arg < argc) {
-    if      (strcmp(argv[arg], "-h") == 0) {
-      err++;
-    }
-
-    else if (strcmp(argv[arg], "-something") == 0) {
-      //testSomething();
-    }
-
-    else {
-      err++;
-    }
-
-    arg++;
-  }
-
-  if (err)
-    fprintf(stderr, "ERROR: didn't parse command line.\n"), exit(1);
-
-
-  test_strto();
-
-  exit(0);
-}
diff --git a/ext/meryl/src/utility/src/tests/typesTest.mk b/ext/meryl/src/utility/src/tests/typesTest.mk
deleted file mode 100644
index 2464b65..0000000
--- a/ext/meryl/src/utility/src/tests/typesTest.mk
+++ /dev/null
@@ -1,8 +0,0 @@
-TARGET   := typesTest
-SOURCES  := typesTest.C
-
-SRC_INCDIRS := .. ../utility
-
-TGT_LDFLAGS := -L${TARGET_DIR}/lib
-TGT_LDLIBS  := -l${MODULE}
-TGT_PREREQS := lib${MODULE}.a
diff --git a/ext/meryl/src/utility/src/utility/align-ksw2-driver.C b/ext/meryl/src/utility/src/utility/align-ksw2-driver.C
index 4ec0332..c9671a9 100644
--- a/ext/meryl/src/utility/src/utility/align-ksw2-driver.C
+++ b/ext/meryl/src/utility/src/utility/align-ksw2-driver.C
@@ -137,8 +137,8 @@ ksw2Lib::align(char const *seqA_, uint32 seqlenA_, int32 bgnA_, int32 endA_,
 
   //  Allocate space for at least lenA (lenB) things.
 
-  resizeArray(_intA, 0, _maxA, _lenA);
-  resizeArray(_intB, 0, _maxB, _lenB);
+  resizeArray(_intA, 0, _maxA, _lenA, resizeArray_doNothing);
+  resizeArray(_intB, 0, _maxB, _lenB, resizeArray_doNothing);
 
   //  Convert the input sequences into integers.
 
@@ -200,7 +200,7 @@ ksw2Lib::align(char const *seqA_, uint32 seqlenA_, int32 bgnA_, int32 endA_,
 
   //  Make space for the alignment, and copy it over.
 
-  resizeArrayPair(_cigarCode, _cigarValu, 0, _cigarMax, ez.n_cigar + 1);
+  resizeArrayPair(_cigarCode, _cigarValu, _cigarLen, _cigarMax, (uint32)(ez.n_cigar + 1), resizeArray_doNothing);
 
   for (int32 cc=0; cc<ez.n_cigar; ++cc) {
     _cigarCode[cc] = "MIDNSHP=X"[ez.cigar[cc] & 0xf];
@@ -260,13 +260,13 @@ ksw2Lib::analyzeAlignment(void) {
 
   //  Compute the same erate as overlapper does.
 
-  _erate = (double)(_aMis + _aGap) / std::min((_endA - _bgnA), (_endB - _bgnB));
+  _erate = (double)(_aMis + _aGap) / min((_endA - _bgnA), (_endB - _bgnB));
 
   //  Allocate stuff for building a map between the A and B sequences and the
   //  cigar string.
 
-  resizeArrayPair(_cigarMapBgn, _cigarMapEnd, 0, _cigarMapMax, _cigarLen);
-  resizeArray    (_aMap,                      0, _aMapMax,     _aLen);
+  resizeArrayPair(_cigarMapBgn, _cigarMapEnd, 0, _cigarMapMax, _cigarLen, resizeArray_doNothing);
+  resizeArray    (_aMap,                      0, _aMapMax,     _aLen,     resizeArray_doNothing);
 
   uint32       apos = _bgnA;
   uint32       bpos = _bgnB;
diff --git a/ext/meryl/src/utility/src/utility/align-parasail-driver.C b/ext/meryl/src/utility/src/utility/align-parasail-driver.C
index 7fdcb1b..4c7f041 100644
--- a/ext/meryl/src/utility/src/utility/align-parasail-driver.C
+++ b/ext/meryl/src/utility/src/utility/align-parasail-driver.C
@@ -143,7 +143,7 @@ parasailLib::align(char const *seqA_, uint32 seqlenA_, int32 bgnA_, int32 endA_,
   uint32  pcPos = 0;   //  Parasail Cigar position
   uint32  pcLen = cigar->len;
 
-  resizeArrayPair(_cigarCode, _cigarValu, 0, _cigarMax, cigar->len + 1);
+  resizeArrayPair(_cigarCode, _cigarValu, _cigarLen, _cigarMax, (uint32)(cigar->len + 1), resizeArray_doNothing);
 
   //  If the alignment begins with a gap, remove it and adjust the positions.
 
@@ -342,13 +342,13 @@ parasailLib::analyzeAlignment(void) {
 
   //  Compute the same erate as overlapper does.
 
-  _erate = (double)(_aMis + _aGap) / std::min((_endA - _bgnA), (_endB - _bgnB));
+  _erate = (double)(_aMis + _aGap) / min((_endA - _bgnA), (_endB - _bgnB));
 
   //  Allocate stuff for building a map between the A and B sequences and the
   //  cigar string.
 
-  resizeArrayPair(_cigarMapBgn, _cigarMapEnd, 0, _cigarMapMax, _cigarLen);
-  resizeArray    (_aMap,                      0, _aMapMax,     _aLen);
+  resizeArrayPair(_cigarMapBgn, _cigarMapEnd, 0, _cigarMapMax, _cigarLen, resizeArray_doNothing);
+  resizeArray    (_aMap,                      0, _aMapMax,     _aLen,     resizeArray_doNothing);
 
   uint32       apos = _bgnA;
   uint32       bpos = _bgnB;
diff --git a/ext/meryl/src/utility/src/utility/align-parasail-driver.H b/ext/meryl/src/utility/src/utility/align-parasail-driver.H
index a8e66a4..816c7d2 100644
--- a/ext/meryl/src/utility/src/utility/align-parasail-driver.H
+++ b/ext/meryl/src/utility/src/utility/align-parasail-driver.H
@@ -49,13 +49,13 @@ public:
   bool     alignDovetail(char const *seqA, uint32 lenA,
                          char const *seqB, uint32 lenB, bool verbose=false) {
     return(align(seqA, lenA, 0, lenA,
-                 seqB, lenB, 0, lenB, verbose, parasail_sg_qb_de_trace /*_striped_32*/));
+                 seqB, lenB, 0, lenB, verbose, parasail_sg_qb_de_trace_striped_32));
   };
 
   bool     alignDovetail(char const *seqA, uint32 lenA, int32 bgnA, int32 endA,
                          char const *seqB, uint32 lenB, int32 bgnB, int32 endB, bool verbose=false) {
     return(align(seqA, lenA, bgnA, endA,
-                 seqB, lenB, bgnB, endB, verbose, parasail_sg_qb_de_trace /*_striped_32*/));
+                 seqB, lenB, bgnB, endB, verbose, parasail_sg_qb_de_trace_striped_32));
   };
 
   //  Align with free gaps on either end of s2.
diff --git a/ext/meryl/src/utility/src/utility/align-ssw-driver.C b/ext/meryl/src/utility/src/utility/align-ssw-driver.C
index 9468c2d..2a554bc 100644
--- a/ext/meryl/src/utility/src/utility/align-ssw-driver.C
+++ b/ext/meryl/src/utility/src/utility/align-ssw-driver.C
@@ -135,8 +135,8 @@ sswLib::align(char const *seqA_, uint32 seqlenA_, int32 bgnA_, int32 endA_,
 
   //  Allocate space for at least lenA (lenB) things.
 
-  resizeArray(_intA, 0, _maxA, _lenA);
-  resizeArray(_intB, 0, _maxB, _lenB);
+  resizeArray(_intA, 0, _maxA, _lenA, resizeArray_doNothing);
+  resizeArray(_intB, 0, _maxB, _lenB, resizeArray_doNothing);
 
   //  Convert the input sequences into integers.
 
@@ -193,7 +193,7 @@ sswLib::align(char const *seqA_, uint32 seqlenA_, int32 bgnA_, int32 endA_,
 
   //  Make space for the alignment, and copy it over.
 
-  resizeArrayPair(_cigarCode, _cigarValu, 0, _cigarMax, result->cigarLen + 1);
+  resizeArrayPair(_cigarCode, _cigarValu, _cigarLen, _cigarMax, (uint32)(result->cigarLen + 1), resizeArray_doNothing);
 
   for (int32 cc=0; cc<result->cigarLen; ++cc) {
     _cigarCode[cc] = "MIDNSHP=X"[result->cigar[cc] & 0xf];
@@ -359,13 +359,13 @@ sswLib::analyzeAlignment(void) {
 
   //  Compute the same erate as overlapper does.
 
-  _erate = (double)(_aMis + _aGap) / std::min((_endA - _bgnA), (_endB - _bgnB));
+  _erate = (double)(_aMis + _aGap) / min((_endA - _bgnA), (_endB - _bgnB));
 
   //  Allocate stuff for building a map between the A and B sequences and the
   //  cigar string.
 
-  resizeArrayPair(_cigarMapBgn, _cigarMapEnd, 0, _cigarMapMax, _cigarLen);
-  resizeArray    (_aMap,                      0, _aMapMax,     _aLen);
+  resizeArrayPair(_cigarMapBgn, _cigarMapEnd, 0, _cigarMapMax, _cigarLen, resizeArray_doNothing);
+  resizeArray    (_aMap,                      0, _aMapMax,     _aLen,     resizeArray_doNothing);
 
   uint32       apos = _bgnA;
   uint32       bpos = _bgnB;
diff --git a/ext/meryl/src/utility/src/utility/arrays.H b/ext/meryl/src/utility/src/utility/arrays.H
index a34af0b..4e30b55 100644
--- a/ext/meryl/src/utility/src/utility/arrays.H
+++ b/ext/meryl/src/utility/src/utility/arrays.H
@@ -21,85 +21,29 @@
 #define ARRAYS_H
 
 #include "types.H"
-#include <algorithm>
-
-
-enum class _raAct {
-  doNothing        = 0x00,
-  copyData         = 0x01,
-  clearNew         = 0x02,
-  copyDataClearNew = 0x03,
-};
-
-
-inline   //  Combine two _raAct into one.
-_raAct
-operator|(_raAct a, _raAct b) {
-
-  if (a == _raAct::doNothing)  return(b);
-  if (b == _raAct::doNothing)  return(a);
-
-  if ((a == _raAct::copyData) && (b == _raAct::copyData))   return(_raAct::copyData);
-  if ((a == _raAct::copyData) && (b == _raAct::clearNew))   return(_raAct::copyDataClearNew);
-  if ((a == _raAct::clearNew) && (b == _raAct::copyData))   return(_raAct::copyDataClearNew);
-  if ((a == _raAct::clearNew) && (b == _raAct::clearNew))   return(_raAct::clearNew);
 
-  if (a == _raAct::copyDataClearNew)  return(_raAct::copyDataClearNew);
-  if (b == _raAct::copyDataClearNew)  return(_raAct::copyDataClearNew);
-
-  assert(0);
-  return(_raAct::doNothing);
-}
-
-
-inline   //  Return true if _raAct a has property b set.
-bool
-operator&(_raAct a, _raAct b) {
+#include <algorithm>
 
-  if ((a == _raAct::copyData)         && (b == _raAct::copyData))           return(true);
-  if ((a == _raAct::copyDataClearNew) && (b == _raAct::copyData))           return(true);
+using namespace std;
 
-  if ((a == _raAct::clearNew)         && (b == _raAct::clearNew))           return(true);
-  if ((a == _raAct::copyDataClearNew) && (b == _raAct::clearNew))           return(true);
 
-  if ((a == _raAct::copyDataClearNew) && (b == _raAct::copyDataClearNew))   return(true);
+const uint32  resizeArray_doNothing = 0x00;
+const uint32  resizeArray_copyData  = 0x01;
+const uint32  resizeArray_clearNew  = 0x02;
 
-  return(false);
-}
 
 
-//  Allocate an array of size 'allocSize', and set 'arrayMax' to that value.
-//  By default. clear the array.
 template<typename TT, typename LL>
 void
-allocateArray(TT*& array, LL &arrayMax, uint64 allocSize, _raAct op=_raAct::clearNew) {
+allocateArray(TT*& array, LL arrayMax, uint32 op=resizeArray_clearNew) {
 
   if (array != NULL)
     delete [] array;
 
-  arrayMax = allocSize;
-  array    = new TT [allocSize];
-
-  assert(arrayMax == allocSize);   //  Make sure we don't truncate the value!
+  array = new TT [arrayMax];
 
-  if (op == _raAct::clearNew)
-    memset(array, 0, sizeof(TT) * allocSize);
-}
-
-
-//  Allocate an array of size 'allocSize'.
-//  By default, clear the array.
-template<typename TT>
-void
-allocateArray(TT*& array, uint64 allocSize, _raAct op=_raAct::clearNew) {
-
-  if (array != NULL)
-    delete [] array;
-
-  array    = new TT [allocSize];
-
-  if (op == _raAct::clearNew)
-    memset(array, 0, sizeof(TT) * allocSize);
+  if (op == resizeArray_clearNew)
+    memset(array, 0, sizeof(TT) * arrayMax);
 }
 
 
@@ -142,78 +86,55 @@ duplicateArray(TT*& to, LL &toLen, LL &toMax, TT const *fr, LL frLen, LL frMax=0
 }
 
 
-//  Set the array size to 'newMax'.
-//  No guards, the array will ALWAYS be reallocated.
-//
+//  Set the array size to 'newMax'.  No guards, the array will ALWAYS be reallocated.
+
 template<typename TT, typename LL>
 void
-setArraySize(TT*& array, uint64 arrayLen, LL &arrayMax, uint64 newMax, _raAct op=_raAct::copyData) {
+setArraySize(TT*& array, uint64 arrayLen, LL &arrayMax, uint64 newMax, uint32 op=resizeArray_copyData) {
 
-  arrayMax =          newMax;
-  arrayLen = std::min(newMax, arrayLen);
+  arrayMax =     newMax;
+  arrayLen = min(newMax, arrayLen);
 
   TT *copy = new TT [arrayMax];
 
-  if ((array != nullptr) &&
-      (arrayLen > 0) &&
-      ((op == _raAct::copyData) ||
-       (op == _raAct::copyDataClearNew)))
-    for (uint32 ii=0; ii<arrayLen; ii++)
-      copy[ii] = array[ii];
+  if ((op & resizeArray_copyData) && (array != NULL) && (arrayLen > 0))
+    memcpy(copy, array, sizeof(TT) * arrayLen);
 
   delete [] array;
   array = copy;
 
-  if ((op == _raAct::clearNew) ||
-      (op == _raAct::copyDataClearNew))
-    for (uint32 ii=arrayLen; ii<arrayMax; ii++)
-      copy[ii] = TT();
+  if ((op & resizeArray_clearNew) && (arrayMax > arrayLen))
+    memset(array + arrayLen, 0, sizeof(TT) * (arrayMax - arrayLen));
 }
 
 
 
+
 //  Ensure that there is enough space to hold one more element in the array.
 //  Increase the array by 'moreSpace' if needed.
-//
-//  With the array used as a stack, a call of
-//    increaseArray(arr, arrLen, arrMax, 32)
-//  will allocate 32 more elements if arrLen == arrMax, and do nothing
-//  otherwise.  After the call, array element arr[arrLen] is guaranteed to
-//  exist.  If arrLen > arrMax, see below.
-//
-//  With the array used for random access, a call of
-//    increaseArray(arr, idx, arrMax, 32)
-//  will do nothing if idx < arrMax, and resize the array to have idx+32
-//  elements otherwise.
-//  
-//  In both cases, if 'moreSpace' is 0, it is reset to 1.
-//
-//  If the array is reallocated, the contents of the entire array are copied
-//  to the new space.  New elements are NOT cleared to zero; override op as
-//  desired.
-//
+
 template<typename TT, typename LL>
 void
-increaseArray(TT*& array, uint64 idx, LL &arrayMax, uint64 moreSpace, _raAct op=_raAct::copyData) {
-  uint64  newMax = idx + ((moreSpace == 0) ? 1 : moreSpace);
+increaseArray(TT*& array, uint64 arrayLen, LL &arrayMax, uint64 moreSpace) {
+  uint64  newMax = arrayMax + ((moreSpace == 0) ? 1 : moreSpace);
 
-  if (idx < arrayMax)
+  if (arrayLen < arrayMax)
     return;
 
-  setArraySize(array, arrayMax, arrayMax, newMax, op);
+  setArraySize(array, arrayLen, arrayMax, newMax, resizeArray_copyData);
 }
 
 
 template<typename T1, typename T2, typename LL>
 void
-increaseArrayPair(T1*& array1, T2*& array2, uint64 idx, LL &arrayMax, uint64 moreSpace, _raAct op=_raAct::copyData) {
-  uint64  newMax = idx + ((moreSpace == 0) ? 1 : moreSpace);
+increaseArrayPair(T1*& array1, T2*& array2, uint64 arrayLen, LL &arrayMax, uint64 moreSpace) {
+  uint64  newMax = arrayMax + ((moreSpace == 0) ? 1 : moreSpace);
 
-  if (idx < arrayMax)
+  if (arrayLen < arrayMax)
     return;
 
-  setArraySize(array1, arrayMax, arrayMax, newMax, op);
-  setArraySize(array2, arrayMax, arrayMax, newMax, op);
+  setArraySize(array1, arrayLen, arrayMax, newMax, resizeArray_copyData);
+  setArraySize(array2, arrayLen, arrayMax, newMax, resizeArray_copyData);
 }
 
 
@@ -222,7 +143,7 @@ increaseArrayPair(T1*& array1, T2*& array2, uint64 idx, LL &arrayMax, uint64 mor
 
 template<typename TT, typename LL>
 void
-resizeArray(TT*& array, uint64 arrayLen, LL &arrayMax, uint64 newMax, _raAct op=_raAct::copyData) {
+resizeArray(TT*& array, uint64 arrayLen, LL &arrayMax, uint64 newMax, uint32 op=resizeArray_copyData) {
 
   if (newMax <= arrayMax)
     return;
@@ -233,7 +154,7 @@ resizeArray(TT*& array, uint64 arrayLen, LL &arrayMax, uint64 newMax, _raAct op=
 
 template<typename T1, typename T2, typename LL>
 void
-resizeArrayPair(T1*& array1, T2*& array2, uint64 arrayLen, LL &arrayMax, uint64 newMax, _raAct op=_raAct::copyData) {
+resizeArrayPair(T1*& array1, T2*& array2, uint64 arrayLen, LL &arrayMax, LL newMax, uint32 op=resizeArray_copyData) {
 
   if (newMax <= arrayMax)
     return;
diff --git a/ext/meryl/src/utility/src/utility/bits-wordArray.C b/ext/meryl/src/utility/src/utility/bits-wordArray.C
deleted file mode 100644
index f4f79cc..0000000
--- a/ext/meryl/src/utility/src/utility/bits-wordArray.C
+++ /dev/null
@@ -1,153 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl-utility, a collection of miscellaneous code
- *  used by Meryl, Canu and others.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "bits.H"
-
-
-//
-//  At the default segmentSize of 64 KB = 524288 bits, we'll allocate 4096
-//  128-bit words per segment.  With _wordsPerLock = 64, we'll then have
-//  4096 / 64 = 64+1 locks per segment.
-//
-//  Note that 'values' refers to the user-supplied data of some small size,
-//  while 'words' are the 128-bit machine words used to store the data.
-//
-
-wordArray::wordArray(uint32 valueWidth, uint64 segmentSizeInBits, bool useLocks) {
-
-  _valueWidth       = valueWidth;          //  In bits.
-  _valueMask        = buildLowBitMask<uint128>(_valueWidth);
-  _segmentSize      = segmentSizeInBits;   //  In bits.
-
-  _valuesPerSegment = _segmentSize / _valueWidth;
-
-  _wordsPerSegment  = _segmentSize / 128;
-  _wordsPerLock     = (useLocks == false) ? (0) : (64);
-  _locksPerSegment  = (useLocks == false) ? (0) : (_segmentSize / 128 / _wordsPerLock + 1);
-
-  _numValues        = 0;
-  _numValuesLock.clear();
-
-  _segmentsLen      = 0;
-  _segmentsMax      = 16;
-  _segments         = new uint128 *          [_segmentsMax];
-  _segLocks         = new std::atomic_flag * [_segmentsMax];
-
-  for (uint32 ss=0; ss<_segmentsMax; ss++) {
-    _segments[ss] = nullptr;
-    _segLocks[ss] = nullptr;
-  }
-}
-
-
-
-wordArray::~wordArray() {
-  for (uint32 i=0; i<_segmentsLen; i++) {
-    delete [] _segments[i];
-    delete [] _segLocks[i];
-  }
-
-  delete [] _segments;
-  delete [] _segLocks;
-}
-
-
-
-void
-wordArray::clear(void) {
-  _numValues   = 0;
-  _segmentsLen = 0;
-}
-
-
-
-void
-wordArray::allocate(uint64 nElements) {
-  uint64 segmentsNeeded = nElements / _valuesPerSegment + 1;
-
-#pragma omp critical (wordArrayAllocate)
-  {
-
-  if (segmentsNeeded >= _segmentsMax)
-    resizeArrayPair(_segments,
-                    _segLocks,
-                    _segmentsLen, _segmentsMax, segmentsNeeded,
-                    _raAct::copyData | _raAct::clearNew);
-
-  for (uint32 seg=_segmentsLen; seg<segmentsNeeded; seg++) {
-    if (_segments[seg] != nullptr)
-      continue;
-
-    //  Allocate the segment and (optionally, for debug and test)
-    //  initialize to non-zero values.
-
-    _segments[seg] = new uint128 [ _wordsPerSegment ];
-
-    //memset(_segments[seg], 0xff, sizeof(uint128) * _segmentSize / 128);
-
-    //  Allocate any needed locks, and open them.
-
-    if (_locksPerSegment > 0) {
-      _segLocks[seg] = new std::atomic_flag [ _locksPerSegment ];
-
-      for (uint32 ll=0; ll<_locksPerSegment; ll++)
-        _segLocks[seg][ll].clear();
-    }
-  }
-
-  _segmentsLen = segmentsNeeded;
-
-  }  //  end critical
-}
-
-
-
-void
-wordArray::show(void) {
-  uint64  lastBit = _numValues * _valueWidth;
-
-  fprintf(stderr, "wordArray:\n");
-  fprintf(stderr, "  numValues        %10lu values\n", _numValues);
-  fprintf(stderr, "  valueWidth       %10lu bits\n",   _valueWidth);
-  fprintf(stderr, "  segmentSize      %10lu bits\n",   _segmentSize);
-  fprintf(stderr, "  valuesPerSegment %10lu values\n", _valuesPerSegment);
-  fprintf(stderr, "\n");
-
-  //  For each segment, dump full words, until we hit the end of data.
-
-  for (uint64 ss=0; ss<_segmentsLen; ss++) {
-    fprintf(stderr, "Segment %lu:\n", ss);
-
-    uint64 bitPos = ss * _valuesPerSegment * _valueWidth;
-
-    for (uint64 ww=0; (ww < _wordsPerSegment) && (bitPos < lastBit); ww += 4) {
-      fprintf(stderr, "%5lu: %s %s %s %s\n",
-              ww,
-              (bitPos + 128 * 0 < lastBit) ? toHex(_segments[ss][ww+0]) : "",
-              (bitPos + 128 * 1 < lastBit) ? toHex(_segments[ss][ww+1]) : "",
-              (bitPos + 128 * 2 < lastBit) ? toHex(_segments[ss][ww+2]) : "",
-              (bitPos + 128 * 3 < lastBit) ? toHex(_segments[ss][ww+3]) : "");
-
-      bitPos += 128 * 4;
-    }
-  }
-
-  fprintf(stderr, "\n");
-  fprintf(stderr, "\n");
-}
diff --git a/ext/meryl/src/utility/src/utility/bits-wordArray.H b/ext/meryl/src/utility/src/utility/bits-wordArray.H
deleted file mode 100644
index 18f3a66..0000000
--- a/ext/meryl/src/utility/src/utility/bits-wordArray.H
+++ /dev/null
@@ -1,194 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl-utility, a collection of miscellaneous code
- *  used by Meryl, Canu and others.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-//  To be included only by bits.H
-#ifndef BITS_IMPLEMENTATIONS
-#error Include bits.H instead of bits-wordArray.H
-#endif
-
-
-inline
-uint128
-wordArray::get(uint64 eIdx) {
-  uint64  seg =                eIdx / _valuesPerSegment;    //  Which segment are we in?
-  uint64  pos = _valueWidth * (eIdx % _valuesPerSegment);   //  Bit position of the start of the value.
-
-  uint64  wrd = pos / 128;   //  The word we start in.
-  uint64  bit = pos % 128;   //  Starting at this bit.
-
-  uint128 val = 0;
-
-  if (eIdx >= _numValues)
-    fprintf(stderr, "wordArray::get()-- eIdx %lu >= _numValues %lu\n", eIdx, _numValues);
-  assert(eIdx < _numValues);
-
-  //  If the value is all in one word, just shift that word to the right to
-  //  put the proper bits in the proper position.
-  //
-  //  Otherwise, the value spans two words.
-  //   - Shift the first word left to place the right-most bits at the left end of the return value.
-  //   - Shift the second word right so the left-most bits are at the right end of the return value.
-  //
-  //                       ssssssssssss <- second shift amount
-  //  [--first-word--][--second-word--]
-  //             [--value--]
-  //                   fffff <- first shift amount
-
-  if (bit + _valueWidth <= 128) {
-    val = _segments[seg][wrd] >> (128 - _valueWidth - bit);
-  }
-  else {
-    uint32  fShift = _valueWidth - (128 - bit);
-    uint32  sShift = 128 - fShift;
-
-    val  = _segments[seg][wrd+0] << fShift;
-    val |= _segments[seg][wrd+1] >> sShift;
-  }
-
-  //  Finally, mask off the stuff we don't care about.
-
-  val &= _valueMask;
-
-  return(val);
-}
-
-
-
-inline
-void
-wordArray::setLock(uint64 seg, uint64 lockW1, uint64 lockW2) {
-
-  if (lockW1 == lockW2) {
-    while (_segLocks[seg][lockW1].test_and_set(std::memory_order_relaxed) == true)
-      ;
-  }
-
-  else {
-    while (_segLocks[seg][lockW1].test_and_set(std::memory_order_relaxed) == true)
-      ;
-    while (_segLocks[seg][lockW2].test_and_set(std::memory_order_relaxed) == true)
-      ;
-  }
-}
-
-
-
-inline
-void
-wordArray::relLock(uint64 seg, uint64 lockW1, uint64 lockW2) {
-
-  if (lockW1 == lockW2) {
-    _segLocks[seg][lockW1].clear();
-  }
-  else {
-    _segLocks[seg][lockW2].clear();
-    _segLocks[seg][lockW1].clear();
-  }
-}
-
-
-
-inline
-void
-wordArray::setNval(uint32 eIdx) {
-
-  while (_numValuesLock.test_and_set(std::memory_order_relaxed) == true)
-    ;
-
-  if (eIdx >= _numValues)
-    _numValues = eIdx + 1;
-
-  _numValuesLock.clear();
-}
-
-
-inline
-void
-wordArray::set(uint64 eIdx, uint128 value) {
-  uint64 seg =                eIdx / _valuesPerSegment;     //  Which segment are we in?
-  uint64 pos = _valueWidth * (eIdx % _valuesPerSegment);    //  Which word in the segment?
-
-  uint64 wrd = pos / 128;         //  The word we start in.
-  uint64 bit = pos % 128;         //  Starting at this bit.
-
-  uint64 lockW1 = 0;   //  Address of locks, computed inline with the
-  uint64 lockW2 = 0;   //  setLock() function call below.
-
-  //  Allocate more segment pointers and any missing segments.
-
-  if (seg >= _segmentsLen)
-    allocate(eIdx);
-
-  //  Mask the value, just in case.
-
-  value &= _valueMask;
-
-  //  Grab the locks for the two words we're going to be accessing.
-
-  if (_wordsPerLock > 0)
-    setLock(seg,
-            lockW1 = (wrd + 0) / _wordsPerLock,
-            lockW2 = (wrd + 1) / _wordsPerLock);
-
-  //  Remember the largest element set.  Used for:
-  //   - failing if get() accesses something out of bounds....but doesn't
-  //     catch if we access something unset in the middle.
-  //   - debug usage in show()
-
-  if (_wordsPerLock > 0)
-    setNval(eIdx);
-  else if (eIdx >= _numValues)
-    _numValues = eIdx+1;
-
-  //  Set the value in one word....
-  //
-  //          [--------------------]
-  //                 [value]
-  //           lSave           rSave
-  //
-  //  Or split the value across two words.
-  //
-  //            --lSave--               --rSave--
-  //  [--word--][--first-word--][--second-word--][--word--]
-  //                     [----value---=]
-  //                      lSize  rSize
-
-  if (bit + _valueWidth <= 128) {
-    uint32   lSave = bit;
-    uint32   rSave = 128 - _valueWidth - bit;
-
-    _segments[seg][wrd] = (saveLeftBits(_segments[seg][wrd], lSave) |
-                           (value << rSave)                         |
-                           saveRightBits(_segments[seg][wrd], rSave));
-  }
-
-  else {
-    uint32   lSave =       bit,   rSave = 128 - _valueWidth - bit;
-    uint32   lSize = 128 - bit,   rSize = _valueWidth - (128 - bit);
-
-    _segments[seg][wrd+0] = saveLeftBits(_segments[seg][wrd+0], lSave) | (value >> rSize);
-    _segments[seg][wrd+1] = (value << rSave) | saveRightBits(_segments[seg][wrd+1], rSave);
-  }
-
-  //  Release the locks.
-
-  if (_wordsPerLock > 0)
-    relLock(seg, lockW1, lockW2);
-}
-
diff --git a/ext/meryl/src/utility/src/utility/bits.C b/ext/meryl/src/utility/src/utility/bits.C
index dbf59f3..1d3ca6d 100644
--- a/ext/meryl/src/utility/src/utility/bits.C
+++ b/ext/meryl/src/utility/src/utility/bits.C
@@ -21,7 +21,6 @@
 #include "files.H"
 
 
-
 stuffedBits::stuffedBits(uint64 nBits) {
 
   _dataBlockLenMaxB =             nBits;
@@ -257,7 +256,7 @@ stuffedBits::loadFromBuffer(readBuffer *B) {
     _dataBlockBgn  = new uint64 [inLen];
     _dataBlockLen  = new uint64 [inLen];
 
-    resizeArray(_dataBlocks, _dataBlocksLen, _dataBlocksMax, inLen, _raAct::copyData | _raAct::clearNew);
+    resizeArray(_dataBlocks, _dataBlocksLen, _dataBlocksMax, inLen, resizeArray_copyData | resizeArray_clearNew);
   }
 
   //  Update the parameters.
@@ -359,7 +358,7 @@ stuffedBits::loadFromFile(FILE *F) {
     _dataBlockBgn  = new uint64 [inLen];
     _dataBlockLen  = new uint64 [inLen];
 
-    resizeArray(_dataBlocks, _dataBlocksLen, _dataBlocksMax, inLen, _raAct::copyData | _raAct::clearNew);
+    resizeArray(_dataBlocks, _dataBlocksLen, _dataBlocksMax, inLen, resizeArray_copyData | resizeArray_clearNew);
   }
 
   //  Update the parameters.
diff --git a/ext/meryl/src/utility/src/utility/bits.H b/ext/meryl/src/utility/src/utility/bits.H
index b2f070b..4c2c372 100644
--- a/ext/meryl/src/utility/src/utility/bits.H
+++ b/ext/meryl/src/utility/src/utility/bits.H
@@ -25,7 +25,15 @@
 #include "files.H"
 
 #include <algorithm>
-#include <atomic>
+
+//  Define this to enable testing that the width of the data element is greater than zero.  The
+//  uint64MASK() macro (bri.h) does not generate a mask for 0.  Compiler warnings are issued,
+//  because you shouldn't use this in production code.
+//
+//  As it's expensive, emit a warning if it's enabled.
+//
+//#define CHECK_WIDTH
+//#warning CHECK_WIDTH is EXPENSIVE
 
 
 //  Writing in the middle of data is toublesome.
@@ -59,64 +67,22 @@ displayWord(uint64 word, char *buffer=NULL) {
 };
 
 
-
-//  Generate a bit mask on the low (0x000fff) or high bits (0xfff000).
-//
-//  Algorithm:
-//   - set the return value to all 1's
-//   - shift left or right to keep the desired numBits in the word
-//   - reset to all 0's if the numBits is zero
-//     (if     zero, 'r & -0' == 'r & 0000..000)
-//     (if not zero, 'r & -1' == 'r & 1111..111)
-//   - reset to all 1's if the numBits is larger than the number of bits in the word
-//
-template<typename uintType>
-uintType
-buildLowBitMask(uint32 numBits) {
-  uintType  r;
-
-  r   = ~((uintType)0);
-  r >>= 8 * sizeof(uintType) - numBits;
-  r  &= -(uintType)(numBits != 0);
-  r  |= -(uintType)(numBits  > 8 * sizeof(uintType));
-
-  return(r);
-}
-
-template<typename uintType>
-uintType
-buildHighBitMask(uint32 numBits) {
-  uintType  r;
-
-  r   = ~((uintType)0);
-  r <<= 8 * sizeof(uintType) - numBits;
-  r  &= -(uintType)(numBits != 0);
-  r  |= -(uintType)(numBits  > 8 * sizeof(uintType));
-
-  return(r);
-}
-
-
-
 //  Return bits in a word:
 //    Keeping the rightmost 64-n bits (mask out the leftmost  n bits)
 //    Keeping the leftmost  64-n bits (mask out the rigthmost n bits)
 //
-inline uint64   clearLeftBits  (uint64 v,  uint32 l)  {  if (l >=  64) return(0);  return(v & (uint64max  >>        l));   };
-inline uint64    saveLeftBits  (uint64 v,  uint32 l)  {  if (l ==   0) return(0);  return(v & (uint64max  << (64  - l)));  };
-inline uint64   clearRightBits (uint64 v,  uint32 r)  {  if (r >=  64) return(0);  return(v & (uint64max  <<        r));   };
-inline uint64    saveRightBits (uint64 v,  uint32 r)  {  if (r ==   0) return(0);  return(v & (uint64max  >> (64  - r)));  };
-
-inline uint64   clearMiddleBits(uint64  v, uint32 l, uint32 r)  { return( saveRightBits(v, r) |  saveLeftBits(v, l)); };
-inline uint64    saveMiddleBits(uint64  v, uint32 l, uint32 r)  { return(clearRightBits(v, r) & clearLeftBits(v, l)); };
+inline uint64   clearLeftBits  (uint64 v, uint32 l)  {  if (l >= 64) return(0);  return(v & (0xffffffffffffffffllu >>       l));   };
+inline uint64    saveLeftBits  (uint64 v, uint32 l)  {  if (l ==  0) return(0);  return(v & (0xffffffffffffffffllu << (64 - l)));  };
+inline uint64   clearRightBits (uint64 v, uint32 r)  {  if (r >= 64) return(0);  return(v & (0xffffffffffffffffllu <<       r));   };
+inline uint64    saveRightBits (uint64 v, uint32 r)  {  if (r ==  0) return(0);  return(v & (0xffffffffffffffffllu >> (64 - r)));  };
 
-inline uint128  clearLeftBits  (uint128 v, uint32 l)  {  if (l >= 128) return(0);  return(v & (uint128max >>        l));   };
-inline uint128   saveLeftBits  (uint128 v, uint32 l)  {  if (l ==   0) return(0);  return(v & (uint128max << (128 - l)));  };
-inline uint128  clearRightBits (uint128 v, uint32 r)  {  if (r >= 128) return(0);  return(v & (uint128max <<        r));   };
-inline uint128   saveRightBits (uint128 v, uint32 r)  {  if (r ==   0) return(0);  return(v & (uint128max >> (128 - r)));  };
+inline uint64   clearMiddleBits(uint64 v, uint32 l, uint32 r) {
+  return(saveRightBits(v, r) | saveLeftBits(v, l));
+};
 
-inline uint128  clearMiddleBits(uint128 v, uint32 l, uint32 r)  { return( saveRightBits(v, r) |  saveLeftBits(v, l)); };
-inline uint128   saveMiddleBits(uint128 v, uint32 l, uint32 r)  { return(clearRightBits(v, r) & clearLeftBits(v, l)); };
+inline uint64    saveMiddleBits(uint64 v, uint32 l, uint32 r) {
+  return(clearRightBits(v, r) & clearLeftBits(v, l));
+};
 
 
 
@@ -128,23 +94,23 @@ inline uint128   saveMiddleBits(uint128 v, uint32 l, uint32 r)  { return(clearRi
 inline
 uint64
 reverseBits64(uint64 x) {
-  x = ((x >>  1) & 0x5555555555555555llu) | ((x <<  1) & 0xaaaaaaaaaaaaaaaallu);
-  x = ((x >>  2) & 0x3333333333333333llu) | ((x <<  2) & 0xccccccccccccccccllu);
-  x = ((x >>  4) & 0x0f0f0f0f0f0f0f0fllu) | ((x <<  4) & 0xf0f0f0f0f0f0f0f0llu);
-  x = ((x >>  8) & 0x00ff00ff00ff00ffllu) | ((x <<  8) & 0xff00ff00ff00ff00llu);
-  x = ((x >> 16) & 0x0000ffff0000ffffllu) | ((x << 16) & 0xffff0000ffff0000llu);
-  x = ((x >> 32) & 0x00000000ffffffffllu) | ((x << 32) & 0xffffffff00000000llu);
+  x = ((x >>  1) & uint64NUMBER(0x5555555555555555)) | ((x <<  1) & uint64NUMBER(0xaaaaaaaaaaaaaaaa));
+  x = ((x >>  2) & uint64NUMBER(0x3333333333333333)) | ((x <<  2) & uint64NUMBER(0xcccccccccccccccc));
+  x = ((x >>  4) & uint64NUMBER(0x0f0f0f0f0f0f0f0f)) | ((x <<  4) & uint64NUMBER(0xf0f0f0f0f0f0f0f0));
+  x = ((x >>  8) & uint64NUMBER(0x00ff00ff00ff00ff)) | ((x <<  8) & uint64NUMBER(0xff00ff00ff00ff00));
+  x = ((x >> 16) & uint64NUMBER(0x0000ffff0000ffff)) | ((x << 16) & uint64NUMBER(0xffff0000ffff0000));
+  x = ((x >> 32) & uint64NUMBER(0x00000000ffffffff)) | ((x << 32) & uint64NUMBER(0xffffffff00000000));
   return(x);
 }
 
 inline
 uint32
 reverseBits32(uint32 x) {
-  x = ((x >>  1) & 0x55555555lu) | ((x <<  1) & 0xaaaaaaaalu);
-  x = ((x >>  2) & 0x33333333lu) | ((x <<  2) & 0xcccccccclu);
-  x = ((x >>  4) & 0x0f0f0f0flu) | ((x <<  4) & 0xf0f0f0f0lu);
-  x = ((x >>  8) & 0x00ff00fflu) | ((x <<  8) & 0xff00ff00lu);
-  x = ((x >> 16) & 0x0000fffflu) | ((x << 16) & 0xffff0000lu);
+  x = ((x >>  1) & uint32NUMBER(0x55555555)) | ((x <<  1) & uint32NUMBER(0xaaaaaaaa));
+  x = ((x >>  2) & uint32NUMBER(0x33333333)) | ((x <<  2) & uint32NUMBER(0xcccccccc));
+  x = ((x >>  4) & uint32NUMBER(0x0f0f0f0f)) | ((x <<  4) & uint32NUMBER(0xf0f0f0f0));
+  x = ((x >>  8) & uint32NUMBER(0x00ff00ff)) | ((x <<  8) & uint32NUMBER(0xff00ff00));
+  x = ((x >> 16) & uint32NUMBER(0x0000ffff)) | ((x << 16) & uint32NUMBER(0xffff0000));
   return(x);
 }
 
@@ -152,17 +118,17 @@ reverseBits32(uint32 x) {
 inline
 uint64
 uint64Swap(uint64 x) {
-  x = ((x >>  8) & 0x00ff00ff00ff00ffllu) | ((x <<  8) & 0xff00ff00ff00ff00llu);
-  x = ((x >> 16) & 0x0000ffff0000ffffllu) | ((x << 16) & 0xffff0000ffff0000llu);
-  x = ((x >> 32) & 0x00000000ffffffffllu) | ((x << 32) & 0xffffffff00000000llu);
+  x = ((x >>  8) & uint64NUMBER(0x00ff00ff00ff00ff)) | ((x <<  8) & uint64NUMBER(0xff00ff00ff00ff00));
+  x = ((x >> 16) & uint64NUMBER(0x0000ffff0000ffff)) | ((x << 16) & uint64NUMBER(0xffff0000ffff0000));
+  x = ((x >> 32) & uint64NUMBER(0x00000000ffffffff)) | ((x << 32) & uint64NUMBER(0xffffffff00000000));
   return(x);
 }
 
 inline
 uint32
 uint32Swap(uint32 x) {
-  x = ((x >>  8) & 0x00ff00fflu) | ((x <<  8) & 0xff00ff00lu);
-  x = ((x >> 16) & 0x0000fffflu) | ((x << 16) & 0xffff0000lu);
+  x = ((x >>  8) & uint32NUMBER(0x00ff00ff)) | ((x <<  8) & uint32NUMBER(0xff00ff00));
+  x = ((x >> 16) & uint32NUMBER(0x0000ffff)) | ((x << 16) & uint32NUMBER(0xffff0000));
   return(x);
 }
 
@@ -177,23 +143,23 @@ uint16Swap(uint16 x) {
 inline
 uint32
 countNumberOfSetBits32(uint32 x) {
-  x = ((x >>  1) & 0x55555555lu) + (x & 0x55555555lu);
-  x = ((x >>  2) & 0x33333333lu) + (x & 0x33333333lu);
-  x = ((x >>  4) & 0x0f0f0f0flu) + (x & 0x0f0f0f0flu);
-  x = ((x >>  8) & 0x00ff00fflu) + (x & 0x00ff00fflu);
-  x = ((x >> 16) & 0x0000fffflu) + (x & 0x0000fffflu);
+  x = ((x >>  1) & uint32NUMBER(0x55555555)) + (x & uint32NUMBER(0x55555555));
+  x = ((x >>  2) & uint32NUMBER(0x33333333)) + (x & uint32NUMBER(0x33333333));
+  x = ((x >>  4) & uint32NUMBER(0x0f0f0f0f)) + (x & uint32NUMBER(0x0f0f0f0f));
+  x = ((x >>  8) & uint32NUMBER(0x00ff00ff)) + (x & uint32NUMBER(0x00ff00ff));
+  x = ((x >> 16) & uint32NUMBER(0x0000ffff)) + (x & uint32NUMBER(0x0000ffff));
   return(x);
 }
 
 inline
 uint64
 countNumberOfSetBits64(uint64 x) {
-  x = ((x >>  1) & 0x5555555555555555llu) + (x & 0x5555555555555555llu);
-  x = ((x >>  2) & 0x3333333333333333llu) + (x & 0x3333333333333333llu);
-  x = ((x >>  4) & 0x0f0f0f0f0f0f0f0fllu) + (x & 0x0f0f0f0f0f0f0f0fllu);
-  x = ((x >>  8) & 0x00ff00ff00ff00ffllu) + (x & 0x00ff00ff00ff00ffllu);
-  x = ((x >> 16) & 0x0000ffff0000ffffllu) + (x & 0x0000ffff0000ffffllu);
-  x = ((x >> 32) & 0x00000000ffffffffllu) + (x & 0x00000000ffffffffllu);
+  x = ((x >>  1) & uint64NUMBER(0x5555555555555555)) + (x & uint64NUMBER(0x5555555555555555));
+  x = ((x >>  2) & uint64NUMBER(0x3333333333333333)) + (x & uint64NUMBER(0x3333333333333333));
+  x = ((x >>  4) & uint64NUMBER(0x0f0f0f0f0f0f0f0f)) + (x & uint64NUMBER(0x0f0f0f0f0f0f0f0f));
+  x = ((x >>  8) & uint64NUMBER(0x00ff00ff00ff00ff)) + (x & uint64NUMBER(0x00ff00ff00ff00ff));
+  x = ((x >> 16) & uint64NUMBER(0x0000ffff0000ffff)) + (x & uint64NUMBER(0x0000ffff0000ffff));
+  x = ((x >> 32) & uint64NUMBER(0x00000000ffffffff)) + (x & uint64NUMBER(0x00000000ffffffff));
   return(x);
 }
 
@@ -422,62 +388,208 @@ private:
 
 ////////////////////////////////////////
 //
-//  wordArray - An array that efficiently stores non-machine-word size
-//  integer words by packing the bits into machine-size words.  The array is
-//  variable length but not sparse - accessing element 1,000,000 will
-//  allocate elements 0 through 999,999.
+//  wordArray
+//
+//  An array that efficiently stores non-machine-word size integer words by
+//  packing the bits into machine-size words.
 //
-//  The size, in bits, of each element is set at construction time.  All
-//  elements must be the same size.
+//  The array is variable length, but not sparse.  Accessing element
+//  1,000,000 will allocate elements 0 through 999,999.
 //
-//  The elements are stored in a set of fixed-size blocks.  The block size
-//  can also be set at construction time.  Note that this is specified IN
-//  BITS.  The default size is 64 KB per block.  Decrease this if you know
-//  you only need a few KB to store all values, or if you are storing several
-//  GB of data.  There is no real performance loss/gain; it just adjusts the
-//  number of blocks allocated.  There might be a slight degradation in
-//  performance of the memory management system if millions of blocks are
-//  allocated.
+//  No array operator can be provided since we cannot return a reference to
+//  values across machine words, let alone a reference to a value inside a
+//  machine word.
+//
+//  The constructor needs to know the size of the words being stored,
+//  and how many bits to store per allocation.
 //
 class wordArray {
 public:
-  wordArray(uint32 valueWidth, uint64 segmentsSizeInBits, bool useLocks);
-  ~wordArray();
+  wordArray(uint32 wordWidth, uint32 segmentSize = 65536 * 8) {
+    _valueWidth       = wordWidth;
+    _segmentSize      = segmentSize;
+    _valuesPerSegment = (uint64)_segmentSize / (uint64)_valueWidth;
 
-  void     clear(void);                   //  Reset the array to zero, doesn't deallocate space.
+    _nextElement      = 0;
 
-  void     allocate(uint64 nElements);    //  Pre-allocate space for nElements.
+    _segmentsLen      = 0;
+    _segmentsMax      = 16;
+    _segments         = new uint64 * [_segmentsMax];
 
-  uint128  get(uint64 eIdx);              //  Get the value of element eIdx.
-  void     set(uint64 eIdx, uint128 v);   //  Set the value of element eIdx to v.
+    for (uint32 ss=0; ss<_segmentsMax; ss++)
+      _segments[ss] = NULL;
+  }
 
-public:
-  void     show(void);                    //  Dump the wordArray to the screen; debugging.
+  ~wordArray() {
+    for (uint32 i=0; i<_segmentsLen; i++)
+      delete [] _segments[i];
 
-private:
-  void     setLock(uint64 seg, uint64 lockW1, uint64 lockW2);
-  void     relLock(uint64 seg, uint64 lockW1, uint64 lockW2);
-  void     setNval(uint32 eIdx);
+    delete [] _segments;
+  };
 
-private:
-  uint64              _valueWidth       = 0;         //  Width of the values stored.
-  uint64              _valueMask        = 0;         //  Mask the low _valueWidth bits
-  uint64              _segmentSize      = 0;         //  Size, in bits, of each block of data.
+  void     clear(void) {
+    _nextElement = 0;
+    _segmentsLen = 0;
+  };
+
+  void     allocate(uint64 nElements) {
+    uint64 nSegs = nElements / _valuesPerSegment + 1;
+
+    //fprintf(stderr, "wordArray::allocate()-- allocating space for " F_U64 " elements, in " F_U64 " segments.\n",
+    //        nElements, nSegs);
+
+    assert(_segmentsLen == 0);
+
+    resizeArray(_segments, _segmentsLen, _segmentsMax, nSegs, resizeArray_copyData | resizeArray_clearNew);
+
+    for (uint32 seg=0; seg<nSegs; seg++) {
+      if (_segments[seg] == NULL)
+        _segments[seg] = new uint64 [_segmentSize / 64];
+
+      memset(_segments[seg], 0xff, sizeof(uint64) * _segmentSize / 64);
+    }
+
+    _segmentsLen = nSegs;
+  };
+
+  uint64   get(uint64 element) {
+    uint64 seg =                element / _valuesPerSegment;     //  Which segment are we in?
+    uint64 pos = _valueWidth * (element % _valuesPerSegment);    //  Bit position of the start of the value.
+
+    uint64 wrd = pos / 64;   //  The word we start in.
+    uint64 bit = pos % 64;   //  Starting at this bit.
+
+    uint64 val = 0;
 
-  uint64              _valuesPerSegment = 0;         //  Number of values in each block.
+    assert(element < _nextElement);
 
-  uint64              _wordsPerSegment  = 0;         //  Number of 128-bit words in each segment
-  uint64              _wordsPerLock     = 0;         //  How many words are covered by each lock.
-  uint64              _locksPerSegment  = 0;         //  Number of locks per segment
+    //  If the value is all in one word, just shift that word to the right to
+    //  put the proper bits in the proper position.  We'll clean up the extra
+    //  bits in just a moment.
 
-  uint64              _numValues        = 0;         //  Number of values stored in the array.
-  std::atomic_flag    _numValuesLock;                //  Lock on the above.
+    if (bit + _valueWidth <= 64) {
+      val   = _segments[seg][wrd] >> (64 - _valueWidth - bit);
+    }
+
+    //  Otherwise, the value spans two words.  First, shift the first word so
+    //  the end of it is at the start of the value.  Then shift the second
+    //  word to the start of it is at the end of the value.
+    //
+    //                                 ssssssssssssssssssssss <- second shift
+    //  [--word--][--first-word--][--second-word--][--word--]
+    //                      [--value--]
+    //                            fffff <- first shift
+    //
+    else {
+      uint32  fShift = _valueWidth - (64 - bit);
+      uint32  sShift = 64 - fShift;
+
+      val   = _segments[seg][wrd+0] << fShift;
+      val  |= _segments[seg][wrd+1] >> sShift;
+    }
+
+    //  Finally, mask off the stuff we don't care about.
+
+    val  &= uint64MASK(_valueWidth);
+
+    return(val);
+  };
+
+  void     set(uint64 element, uint64 value) {
+    uint64 seg =                element / _valuesPerSegment;     //  Which segment are we in?
+    uint64 pos = _valueWidth * (element % _valuesPerSegment);    //  Which word in the segment?
+
+    uint64 wrd = pos / 64;   //  The word we start in.
+    uint64 bit = pos % 64;   //  Starting at this bit.
+
+    if (element >= _nextElement)
+      _nextElement = element+1;
+
+    if (seg >= _segmentsMax)
+      resizeArray(_segments, _segmentsLen, _segmentsMax, seg + 16, resizeArray_copyData | resizeArray_clearNew);
+
+    while (_segmentsLen <= seg) {
+      _segments[_segmentsLen] = new uint64 [_segmentSize / 64];
+
+      memset(_segments[_segmentsLen], 0xff, sizeof(uint64) * _segmentSize / 64);
+
+      _segmentsLen++;
+    }
+
+    //  Mask the value, just in case.
+
+    value &= uint64MASK(_valueWidth);
+
+    //  Set the value in the segment.
+
+    //          [--------------------]
+    //                 [value]
+    //           lSave           rSave
+    //
+    if (bit + _valueWidth <= 64) {
+      uint32   lSave = bit;
+      uint32   rSave = 64 - _valueWidth - bit;
+
+      _segments[seg][wrd] = (saveLeftBits(_segments[seg][wrd], lSave) |
+                             (value << rSave)                         |
+                             saveRightBits(_segments[seg][wrd], rSave));
+    }
+
+    //            --lSave--               --rSave--
+    //  [--word--][--first-word--][--second-word--][--word--]
+    //                     [----value---=]
+    //                      lSize  rSize
+    //
+    else {
+      uint32   lSave =      bit,   rSave = 128 - _valueWidth - bit;
+      uint32   lSize = 64 - bit,   rSize = _valueWidth - (64 - bit);
+
+      _segments[seg][wrd+0] = saveLeftBits(_segments[seg][wrd+0], lSave) | (value >> rSize);
+      _segments[seg][wrd+1] = (value << rSave) | saveRightBits(_segments[seg][wrd+1], rSave);
+    }
+  };
+
+  void     show(void) {
+    fprintf(stderr, "wordArray:   valueWidth  %2" F_U32P "\n", _valueWidth);
+    fprintf(stderr, "wordArray:   segmentSize %8" F_U64P "   valuesPerSegment %8" F_U64P "\n", _segmentSize, _valuesPerSegment);
+    fprintf(stderr, "\n");
+
+    uint32  bit  = 64;
+    uint32  word = 0;
+    char    bits[65];
+
+    for (uint32 ss=0; ss<_segmentsLen; ss++) {
+      fprintf(stderr, "Segment %u:\n", ss);
+
+      for(uint32 wrd=0, bit=0; bit<_valuesPerSegment * _valueWidth; bit++) {
+        if ((bit % 64) == 0) {
+          displayWord(_segments[ss][wrd++], bits);
+        }
+
+        if ((bit % _valueWidth) == 0)
+          fprintf(stderr, "word %2u: ", wrd);
+
+        fprintf(stderr, "%c", bits[bit % 64]);
 
-  uint64              _segmentsLen      = 0;         //  Number of blocks in use.
-  uint64              _segmentsMax      = 0;         //  Number of block pointers allocated.
-  uint128           **_segments         = nullptr;   //  List of blocks allocated.
+        if ((bit % _valueWidth) == _valueWidth - 1)
+          fprintf(stderr, "\n");
+      }
+    }
+
+    fprintf(stderr, "\n");
+    fprintf(stderr, "\n");
+  }
+
+private:
+  uint32   _valueWidth;
+  uint64   _segmentSize;
+  uint64   _valuesPerSegment;
 
-  std::atomic_flag  **_segLocks         = nullptr;   //  Locks on pieces of the segments.
+  uint64   _nextElement;  //  the first invalid element
+
+  uint64   _segmentsLen;
+  uint64   _segmentsMax;
+  uint64 **_segments;
 };
 
 
@@ -753,13 +865,5 @@ private:
 };
 
 
-//  Implementations.
-
-#define BITS_IMPLEMENTATIONS
-
-#include "bits-wordArray.H"
-
-#undef BITS_IMPLEMENTATIONS
-
 
 #endif  //  LIBBITS_H
diff --git a/ext/meryl/src/utility/src/utility/edlib.C b/ext/meryl/src/utility/src/utility/edlib.C
index 8120c73..9658e74 100644
--- a/ext/meryl/src/utility/src/utility/edlib.C
+++ b/ext/meryl/src/utility/src/utility/edlib.C
@@ -437,7 +437,7 @@ void edlibAlignmentToStrings(const unsigned char* alignment, int alignmentLength
 }
 
 void
-edlibAlignmentToStrings(EdlibAlignResult const &result,
+edlibAlignmentToStrings(EdlibAlignResult result,
                         const char *qry, const int qryLength,
                         const char *tgt, const int tgtLength,
                         char *qryAln,
diff --git a/ext/meryl/src/utility/src/utility/edlib.H b/ext/meryl/src/utility/src/utility/edlib.H
index 2b10740..07050a0 100644
--- a/ext/meryl/src/utility/src/utility/edlib.H
+++ b/ext/meryl/src/utility/src/utility/edlib.H
@@ -270,7 +270,7 @@ void edlibAlignmentToStrings(const unsigned char* alignment, int alignmentLength
                              char *tgt_aln_str,
                              char *qry_aln_str);
 
-void edlibAlignmentToStrings(EdlibAlignResult const &result,
+void edlibAlignmentToStrings(EdlibAlignResult result,
                              const char *qry, const int qryLength,
                              const char *tgt, const int tgtLength,
                              char *qryAln,
diff --git a/ext/meryl/src/utility/src/utility/files-buffered.C b/ext/meryl/src/utility/src/utility/files-buffered.C
index 9d96fe8..5c5a568 100644
--- a/ext/meryl/src/utility/src/utility/files-buffered.C
+++ b/ext/meryl/src/utility/src/utility/files-buffered.C
@@ -71,9 +71,10 @@ readBuffer::initialize(const char *filename, uint64 bufferMax) {
   _ignoreCR    = true;
 
   _bufferBgn   = 0;
+  _bufferLen   = 0;
 
   _bufferPos   = 0;
-  _bufferLen   = 0;
+
   _bufferMax   = (bufferMax == 0) ? 32 * 1024 : bufferMax;
   _buffer      = new char [_bufferMax + 1];
 
@@ -110,9 +111,10 @@ readBuffer::readBuffer(FILE *file, uint64 bufferMax) {
   _ignoreCR    = true;
 
   _bufferBgn   = 0;
+  _bufferLen   = 0;
 
   _bufferPos   = 0;
-  _bufferLen   = 0;
+
   _bufferMax   = (bufferMax == 0) ? 32 * 1024 : bufferMax;
   _buffer      = new char [_bufferMax + 1];
 
@@ -142,17 +144,17 @@ readBuffer::~readBuffer() {
 
 
 void
-readBuffer::fillBuffer(void) {
+readBuffer::fillBuffer(uint64 extra) {
 
   //  If there is still stuff in the buffer, no need to fill.
 
-  if (_bufferPos < _bufferLen)
+  if (_bufferPos + extra < _bufferLen)
     return;
 
   _bufferBgn += _bufferLen;
+  _bufferLen  = 0;
 
   _bufferPos  = 0;
-  _bufferLen  = 0;
 
   assert(_filePos == _bufferBgn);
 
@@ -392,7 +394,7 @@ readBuffer::readIFFchunk(char*name, uint8 *&data, uint32 &dataLen, uint32 &dataM
 
   //  Allocate space for the data.
 
-  resizeArray(data, 0, dataMax, dataLen);
+  resizeArray(data, 0, dataMax, dataLen, resizeArray_doNothing);
 
   //  Copy the data to 'data'.
 
diff --git a/ext/meryl/src/utility/src/utility/files-buffered.H b/ext/meryl/src/utility/src/utility/files-buffered.H
index 345590c..034352a 100644
--- a/ext/meryl/src/utility/src/utility/files-buffered.H
+++ b/ext/meryl/src/utility/src/utility/files-buffered.H
@@ -87,7 +87,7 @@ public:
   const char          *filename(void) { return(_filename); };
 
 private:
-  void                 fillBuffer(void);
+  void                 fillBuffer(uint64 extra=0);
   void                 init(int fileptr, const char *filename, uint64 bufferMax);
 
   char                _filename[FILENAME_MAX+1];
@@ -102,9 +102,10 @@ private:
   bool                _ignoreCR;    //  Ignore blasted DOS CR letters in read() and readuntil().
 
   uint64              _bufferBgn;   //  File position where this buffer is from.
+  uint64              _bufferLen;   //  Length of the valid data in the buffer.
 
   uint64              _bufferPos;   //  Position in the buffer we're at.
-  uint64              _bufferLen;   //  Length of the valid data in the buffer.
+
   uint64              _bufferMax;   //  Size of _buffer allocation.
   char               *_buffer;      //  Data!
 };
diff --git a/ext/meryl/src/utility/src/utility/files-compressed.C b/ext/meryl/src/utility/src/utility/files-compressed.C
index 5b4f9a5..6588a17 100644
--- a/ext/meryl/src/utility/src/utility/files-compressed.C
+++ b/ext/meryl/src/utility/src/utility/files-compressed.C
@@ -44,86 +44,25 @@ compressedFileType(char const *filename) {
 
 
 
-static
-bool
-pigzAvailable(void) {
-  FILE *F = popen("pigz -h > /dev/null 2>&1", "r");
-
-  if (F == nullptr)
-    return(false);
-
-  int32 e = pclose(F);
-
-  return(e == 0);   //  If no error, then 'pigz' was able to run.
-}
-
-
-
 compressedFileReader::compressedFileReader(const char *filename) {
+  char    cmd[FILENAME_MAX];
+  int32   len = 0;
 
   _file     = NULL;
   _filename = duplicateString(filename);
-
-  _type     = compressedFileType(_filename);
-
   _pipe     = false;
   _stdi     = false;
 
-  reopen();
-}
-
-
-
-compressedFileReader::~compressedFileReader() {
-
-  if (_file == NULL)
-    return;
-
-  if (_stdi)
-    return;
-
-  if (_pipe)
-    pclose(_file);
-  else
-    AS_UTL_closeFile(_file);
-
-  delete [] _filename;
-}
-
-
-
-void
-compressedFileReader::reopen(void) {
-  char   cmd[FILENAME_MAX];
-
-  int32  nThreads = omp_get_max_threads();
-  bool   pigz     = false;
-
-  //  If input from stdin, do nothing.  reopen() on this makes no sense,
-  //  and doing nothing is _possibly_ more correct than failing.
-  if (_stdi)
-    return;
-
-  //  Close any existing file.
-  if ((_file) && (_pipe ==  true))   pclose(_file);
-  if ((_file) && (_pipe == false))   AS_UTL_closeFile(_file);
-
-  //  Blow up if the file doesn't exist.
-  if ((_type != cftSTDIN) && (fileExists(_filename) == false))
-    fprintf(stderr, "ERROR:  Failed to open input file '%s': %s\n", _filename, strerror(ENOENT)), exit(1);
+  cftType   ft = compressedFileType(_filename);
 
-  if (_type == cftGZ)
-    pigz = pigzAvailable();
+  if ((ft != cftSTDIN) && (fileExists(_filename) == false))
+    fprintf(stderr, "ERROR:  Failed to open input file '%s': %s\n", _filename, strerror(errno)), exit(1);
 
-  //  Open the file!
   errno = 0;
 
-  switch (_type) {
+  switch (ft) {
     case cftGZ:
-      if (pigz)
-        snprintf(cmd, FILENAME_MAX, "pigz -dc -p %d '%s'", nThreads, _filename);
-      else
-        snprintf(cmd, FILENAME_MAX, "gzip -dc '%s'", _filename);
+      snprintf(cmd, FILENAME_MAX, "gzip -dc '%s'", _filename);
       _file = popen(cmd, "r");
       _pipe = true;
       break;
@@ -138,6 +77,11 @@ compressedFileReader::reopen(void) {
       snprintf(cmd, FILENAME_MAX, "xz -dc '%s'", _filename);
       _file = popen(cmd, "r");
       _pipe = true;
+
+      if (_file == NULL)    //  popen() returns NULL on error.  It does not reliably set errno.
+        fprintf(stderr, "ERROR:  Failed to open input file '%s': popen() returned NULL\n", _filename), exit(1);
+
+      errno = 0;
       break;
 
     case cftSTDIN:
@@ -151,18 +95,26 @@ compressedFileReader::reopen(void) {
       break;
   }
 
-  //  Catch errors.
-  //   - popen() does not set errno, so all we can do is fail.
-  //   - otherwise, we can say something intelligent.
+  if (errno)
+    fprintf(stderr, "ERROR:  Failed to open input file '%s': %s\n", _filename, strerror(errno)), exit(1);
+}
 
-  if (_file == nullptr) {
-    if (_pipe)
-      fprintf(stderr, "ERROR:  Failed to open file with command '%s'\n", cmd);
-    else
-      fprintf(stderr, "ERROR:  Failed to open input file '%s': %s\n", _filename, strerror(errno));
 
-    exit(1);
-  }
+
+compressedFileReader::~compressedFileReader() {
+
+  if (_file == NULL)
+    return;
+
+  if (_stdi)
+    return;
+
+  if (_pipe)
+    pclose(_file);
+  else
+    AS_UTL_closeFile(_file);
+
+  delete [] _filename;
 }
 
 
@@ -170,8 +122,8 @@ compressedFileReader::reopen(void) {
 compressedFileWriter::compressedFileWriter(const char *filename, int32 level) {
   char   cmd[FILENAME_MAX];
 
-  int32  nThreads = omp_get_max_threads();
-  bool   pigz     = false;
+  int32  nThreads      = omp_get_max_threads();
+  bool   pigzAvailable = false;
 
   _file     = NULL;
   _filename = duplicateString(filename);
@@ -182,8 +134,22 @@ compressedFileWriter::compressedFileWriter(const char *filename, int32 level) {
 
   //  Decide if we have pigz or gzip available.
 
-  if (ft == cftGZ)
-    pigz = pigzAvailable();
+  if (ft == cftGZ) {
+    snprintf(cmd, FILENAME_MAX, "pigz -h > /dev/null 2>&1");
+
+    FILE *F = popen(cmd, "r");
+    int32 e = pclose(F);
+
+    if (e == 0)
+      pigzAvailable = true;
+  }
+
+#if 0
+  if (pigzAvailable)
+    fprintf(stderr, "Using pigz for compression.\n");
+  else
+    fprintf(stderr, "Using gzip for compression.\n");
+#endif
 
   //  Open the output processor for input.
 
@@ -191,7 +157,7 @@ compressedFileWriter::compressedFileWriter(const char *filename, int32 level) {
 
   switch (ft) {
     case cftGZ:
-      if (pigz)
+      if (pigzAvailable)
         snprintf(cmd, FILENAME_MAX, "pigz -%dc -p %d > '%s'", level, nThreads, _filename);
       else
         snprintf(cmd, FILENAME_MAX, "gzip -%dc > '%s'", level, _filename);
diff --git a/ext/meryl/src/utility/src/utility/files-compressed.H b/ext/meryl/src/utility/src/utility/files-compressed.H
index e716d8c..f05cb64 100644
--- a/ext/meryl/src/utility/src/utility/files-compressed.H
+++ b/ext/meryl/src/utility/src/utility/files-compressed.H
@@ -39,8 +39,6 @@ public:
   compressedFileReader(char const *filename);
   ~compressedFileReader();
 
-  void  reopen(void);
-
   FILE *operator*(void)     {  return(_file);              };
   FILE *file(void)          {  return(_file);              };
 
@@ -51,13 +49,10 @@ public:
                                       (_stdi == false));   };
 
 private:
-  FILE     *_file;
-  char     *_filename;
-
-  cftType   _type;
-
-  bool      _pipe;
-  bool      _stdi;
+  FILE  *_file;
+  char  *_filename;
+  bool   _pipe;
+  bool   _stdi;
 };
 
 
diff --git a/ext/meryl/src/utility/src/utility/files.C b/ext/meryl/src/utility/src/utility/files.C
index b94e0d0..36a663d 100644
--- a/ext/meryl/src/utility/src/utility/files.C
+++ b/ext/meryl/src/utility/src/utility/files.C
@@ -69,7 +69,7 @@ writeToFile(void const  *objects,
   //  writing 16 GB of data at once; it seems to truncate to 32-bit somewhere.
 
   while (nWritten < nObjects) {
-    uint64  toWrite = std::min(blockSize, nObjects - nWritten);
+    uint64  toWrite = min(blockSize, nObjects - nWritten);
 
     errno = 0;
     uint64 written = fwrite(((char *)objects) + nWritten * objectSize, objectSize, toWrite, file);
@@ -102,7 +102,7 @@ loadFromFile(void        *objects,
   //  we still read in 32 MB chunks.
 
   while (nLoaded < nObjects) {
-    uint64  toLoad = std::min(blockSize, nObjects - nLoaded);
+    uint64  toLoad = min(blockSize, nObjects - nLoaded);
 
     errno = 0;
     uint64 loaded = fread(((char *)objects) + nLoaded * objectSize, objectSize, toLoad, file);
@@ -156,7 +156,7 @@ readLine(char *&L, uint32 &Llen, uint32 &Lmax, FILE *F) {
     return(false);
 
   if ((L == NULL) || (Lmax == 0))
-    allocateArray(L, Lmax, 4, resizeArray_clearNew);
+    allocateArray(L, Lmax = 4, resizeArray_clearNew);
 
   L[Lmax-2] = 0;
   L[Lmax-1] = 0;
@@ -188,7 +188,7 @@ readLine(char *&L, uint32 &Llen, uint32 &Lmax, FILE *F) {
 
   //  Trim trailing whitespace.
 
-  while ((Llen > 0) && (isWhiteSpace(L[Llen-1])))
+  while ((Llen > 0) && (isspace(L[Llen-1])))
     L[--Llen] = 0;
 
   return(true);
@@ -205,7 +205,7 @@ AS_UTL_readLine(char *&L, uint32 &Llen, uint32 &Lmax, FILE *F) {
     return(false);
 
   if ((L == NULL) || (Lmax == 0))
-    allocateArray(L, Lmax, 1024);
+    allocateArray(L, Lmax = 1024, resizeArray_clearNew);
 
   Llen = 0;
 
@@ -217,7 +217,7 @@ AS_UTL_readLine(char *&L, uint32 &Llen, uint32 &Lmax, FILE *F) {
 
   while ((feof(F) == false) && (ch != '\n')) {
     if (Llen + 1 >= Lmax)
-      resizeArray(L, Llen, Lmax, Lmax + growth, _raAct::copyData | _raAct::clearNew);  //  Grow the array.
+      resizeArray(L, Llen, Lmax, Lmax + growth, resizeArray_copyData | resizeArray_clearNew);  //  Grow the array.
 
     L[Llen++] = ch;
 
@@ -230,7 +230,7 @@ AS_UTL_readLine(char *&L, uint32 &Llen, uint32 &Lmax, FILE *F) {
 
   //  Trim trailing whitespace.
 
-  while ((Llen > 0) && (isWhiteSpace(L[Llen-1])))
+  while ((Llen > 0) && (isspace(L[Llen-1])))
     L[--Llen] = 0;
 
   return(true);
@@ -344,27 +344,6 @@ AS_UTL_rename(char const *oldname, char const *newname) {
 
 
 
-void
-AS_UTL_rename(char const *oldprefix, char oldseparator, char const *oldsuffix,
-              char const *newprefix, char newseparator, char const *newsuffix) {
-  char   oldpath[FILENAME_MAX+1] = {0};
-  char   newpath[FILENAME_MAX+1] = {0};
-
-  snprintf(oldpath, FILENAME_MAX, "%s%c%s", oldprefix, oldseparator, oldsuffix);
-  snprintf(newpath, FILENAME_MAX, "%s%c%s", newprefix, newseparator, newsuffix);
-
-  if (pathExists(oldpath) == false)
-    return;
-
-  errno = 0;
-  rename(oldpath, newpath);
-  if (errno)
-    fprintf(stderr, "AS_UTL_renane()--  Failed to rename file '%s' to '%s': %s\n",
-            oldpath, newpath, strerror(errno)), exit(1);
-}
-
-
-
 //  Remove ALL write bits from a given path.
 bool
 AS_UTL_makeReadOnly(char const *prefix, char separator, char const *suffix) {
@@ -542,41 +521,6 @@ AS_UTL_sizeOfFile(FILE *file) {
 
 
 
-uint64
-AS_UTL_timeOfFile(char const *path) {
-  struct stat  s;
-
-  errno = 0;
-  if (stat(path, &s) == -1)
-    fprintf(stderr, "Failed to stat() file '%s': %s\n", path, strerror(errno)), exit(1);
-
-#ifdef __APPLE__
-  return(s.st_mtimespec.tv_sec);
-#else
-  return(s.st_mtim.tv_sec);
-#endif
-}
-
-
-
-uint64
-AS_UTL_timeOfFile(FILE *file) {
-  struct stat  s;
-  off_t        size = 0;
-
-  errno = 0;
-  if (fstat(fileno(file), &s) == -1)
-    fprintf(stderr, "Failed to stat() FILE*: %s\n", strerror(errno)), exit(1);
-
-#ifdef __APPLE__
-  return(s.st_mtimespec.tv_sec);
-#else
-  return(s.st_mtim.tv_sec);
-#endif
-}
-
-
-
 off_t
 AS_UTL_ftell(FILE *stream) {
 
@@ -715,7 +659,7 @@ findSharedFile(char const *relpath, char const *filename) {
 
 
 void
-AS_UTL_loadFileList(char const *fileName, std::vector<char const *> &fileList) {
+AS_UTL_loadFileList(char const *fileName, vector<char const *> &fileList) {
 
   FILE *F = AS_UTL_openInputFile(fileName);
 
@@ -869,10 +813,9 @@ AS_UTL_writeFastA(FILE *f,
                   char const *s, int sl, int bl,
                   char const *h, ...) {
   va_list ap;
-  int     olen = sl + ((bl == 0) ? (1) : (sl / bl)) + 2;
-  char   *o    = new char [olen];
-  int     si   = 0;
-  int     oi   = 0;
+  char   *o  = new char [sl + sl / ((bl == 0) ? sl : bl) + 2];
+  int     si = 0;
+  int     oi = 0;
 
   while (si < sl) {
     o[oi++] = s[si++];
@@ -880,10 +823,8 @@ AS_UTL_writeFastA(FILE *f,
     if (bl != 0 && (si % bl) == 0)
       o[oi++] = '\n';
   }
-
-  if ((oi == 0) || (o[oi-1] != '\n'))
+  if (o[oi-1] != '\n')
     o[oi++] = '\n';
-
   o[oi] = 0;
 
   va_start(ap, h);
@@ -953,46 +894,3 @@ AS_UTL_writeFastQ(FILE *f,
 
 
 
-//  A rather complicated output function.
-//    if seq is FASTQ and not wanting FASTA output -> FASTQ
-//    if seq is FASTA and     wanting FASTQ output -> FASTQ with fixed QV
-//    else                                         -> FASTA
-//
-//  The else cases are
-//    seq is FASTQ and     want FASTA output
-//    seq is FASTA and     want FASTA output
-//    seq is FASTA and not want FASTQ output
-//
-void
-outputSequence(FILE        *OUT,
-               char  const *outputName,
-               char  const *outputBases,
-               uint8 const *outputQuals,  uint32  outputBasesLen,
-               bool         isFASTA,
-               bool         isFASTQ,
-               bool         outputFASTA,
-               bool         outputFASTQ,
-               uint8        QV) {
-
-  if      ((isFASTQ == true) && (outputFASTA == false))
-    AS_UTL_writeFastQ(OUT,
-                      outputBases, outputBasesLen,
-                      outputQuals, outputBasesLen, "@%s\n", outputName);
-
-  else if ((isFASTA == true) && (outputFASTQ == true)) {
-    uint8 *qvs = new uint8 [outputBasesLen];
-
-    for (uint32 ii=0; ii<outputBasesLen; ii++)
-      qvs[ii] = QV;
-
-    AS_UTL_writeFastQ(OUT,
-                      outputBases, outputBasesLen,
-                      qvs,         outputBasesLen, "@%s\n", outputName);
-
-    delete [] qvs;
-  }
-
-  else
-    AS_UTL_writeFastA(OUT,
-                      outputBases, outputBasesLen, 0, ">%s\n", outputName);
-}
diff --git a/ext/meryl/src/utility/src/utility/files.H b/ext/meryl/src/utility/src/utility/files.H
index a4f26e2..01c70f8 100644
--- a/ext/meryl/src/utility/src/utility/files.H
+++ b/ext/meryl/src/utility/src/utility/files.H
@@ -21,8 +21,11 @@
 #define FILES_H
 
 #include "types.H"
+
 #include <vector>
 
+using namespace std;
+
 
 //  Provides a safe and reliable mechanism for reading / writing
 //  binary data.
@@ -48,8 +51,6 @@ void    AS_UTL_symlink(char const *pathToFile, char const *pathToLink);
 void    AS_UTL_unlink(char const *prefix, char separator='.', char const *suffix=NULL);
 
 void    AS_UTL_rename(char const *oldname, char const *newname);
-void    AS_UTL_rename(char const *oldprefix, char oldseparator, char const *oldsuffix,
-                      char const *newprefix, char newseparator, char const *newsuffix);
 
 bool    AS_UTL_makeReadOnly(char const *prefix, char separator='.', char const *suffix=NULL);
 bool    AS_UTL_makeWritable(char const *prefix, char separator='.', char const *suffix=NULL);
@@ -62,9 +63,6 @@ bool    directoryExists(char const *prefix, char separator='.', char const *suff
 off_t   AS_UTL_sizeOfFile(char const *path);
 off_t   AS_UTL_sizeOfFile(FILE *file);
 
-uint64  AS_UTL_timeOfFile(char const *path);
-uint64  AS_UTL_timeOfFile(FILE *file);
-
 off_t   AS_UTL_ftell(FILE *stream);
 void    AS_UTL_fseek(FILE *stream, off_t offset, int whence);
 
@@ -72,7 +70,7 @@ void    AS_UTL_fseek(FILE *stream, off_t offset, int whence);
 char const *findSharedFile(char const *relpath, char const *filename);
 
 //  Read a file-of-files into a vector
-void    AS_UTL_loadFileList(char const *fileName, std::vector<char const *> &FILE);
+void    AS_UTL_loadFileList(char const *fileName, vector<char const *> &FILE);
 
 FILE   *AS_UTL_openInputFile (char const *prefix, char separator='.', char const *suffix=NULL, bool doOpen=true);
 FILE   *AS_UTL_openOutputFile(char const *prefix, char separator='.', char const *suffix=NULL, bool doOpen=true);
@@ -224,19 +222,6 @@ AS_UTL_writeFastQ(FILE *f,
                   uint8 const *q, int ql,   //  As Sanger QV, from integer values
                   char  const *h, ...);
 
-//  Writes FASTA or FASTQ, depending on what data is present and what format
-//  is explicitly desired.
-void
-outputSequence(FILE        *OUT,
-               char  const *outputName,
-               char  const *outputBases,
-               uint8 const *outputQuals,  uint32  outputBasesLen,
-               bool         isFASTA,
-               bool         isFASTQ,
-               bool         outputFASTA,
-               bool         outputFASTQ,
-               uint8        QV);
-
 
 #include "files-compressed.H"
 #include "files-buffered.H"
diff --git a/ext/meryl/src/utility/src/utility/intervalList.H b/ext/meryl/src/utility/src/utility/intervalList.H
index cef8700..45d6efe 100644
--- a/ext/meryl/src/utility/src/utility/intervalList.H
+++ b/ext/meryl/src/utility/src/utility/intervalList.H
@@ -158,8 +158,8 @@ intervalList<iNum>::merge(iNum minOverlap) {
 
     if ((_list[thisI]._end >= _list[nextI]._end) ||
         (_list[thisI]._end >= _list[nextI]._bgn + minOverlap)) {
-      _list[thisI]._end  = std::max(_list[nextI]._end, _list[thisI]._end);
-      _list[thisI]._cnt +=          _list[nextI]._cnt;
+      _list[thisI]._end  = max(_list[nextI]._end, _list[thisI]._end);
+      _list[thisI]._cnt +=     _list[nextI]._cnt;
       nextI++;
     }
 
@@ -237,8 +237,8 @@ intervalList<iNum>::invert(iNum invlo, iNum invhi) {
       inv[invLen++] = { invlo, _list[0]._bgn, 1 };
 
     for (uint32 i=1; i<_listLen; i++) {
-      iNum  bgn = std::max(invlo,         _list[i-1]._end);
-      iNum  end = std::min(_list[i]._bgn, invhi);
+      iNum  bgn = max(invlo,         _list[i-1]._end);
+      iNum  end = min(_list[i]._bgn, invhi);
 
       if (bgn < end)
         inv[invLen++] = { bgn, end, 1 };
diff --git a/ext/meryl/src/utility/src/utility/intervals-implementation.H b/ext/meryl/src/utility/src/utility/intervals-implementation.H
deleted file mode 100644
index c711704..0000000
--- a/ext/meryl/src/utility/src/utility/intervals-implementation.H
+++ /dev/null
@@ -1,423 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of meryl-utility, a collection of miscellaneous code
- *  used by Meryl, Canu and others.
- *
- *  This software is based on:
- *    'Canu' v2.0              (https://github.com/marbl/canu)
- *  which is based on:
- *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
- *
- *  Except as indicated otherwise, this is a 'United States Government Work',
- *  and is released in the public domain.
- *
- *  File 'README.licenses' in the root directory of this distribution
- *  contains full conditions and disclaimers.
- */
-
-#include "arrays.H"
-#include <algorithm>
-
-
-#ifndef INTERVALS_IMPLEMENTATION
-#error Include intervals.H instead of intervals-implementation.H
-#else
-
-
-template <class iNum>
-void
-intervals<iNum>::add_position(iNum bgn, iNum end) {
-
-  if (bgn > end)
-    fprintf(stderr, "intervals<iNum>::add_position()-- ERROR: bgn=%u > end=%u\n", bgn, end);
-  assert(bgn <= end);
-
-  if (_listMax == 0)
-    allocateArray(_list, _listMax, 32);
-
-  increaseArray(_list, _listLen, _listMax, _listMax / 4);
-
-  _list[_listLen]._bgn = bgn;
-  _list[_listLen]._end = end;
-  _list[_listLen]._num = 1;
-
-  _listLen++;
-
-  _isSorted   = false;
-  _isSquashed = false;
-}
-
-
-
-template <class iNum>
-void
-intervals<iNum>::add(intervals<iNum> const &that) {
-
-  resizeArray(_list, _listLen, _listMax, _listLen + that._listLen);
-
-  for (uint32 ii=0; ii<that._listLen; ii++, _listLen++)
-    _list[_listLen] = that._list[ii];
-
-  _isSorted   = false;
-  _isSquashed = false;
-}
-
-
-
-template <class iNum>
-void
-intervals<iNum>::remove(uint32 idx) {
-
-  assert(idx < _listLen);
-
-  for (uint32 ii=idx; ii+1<_listLen; ii++)
-    _list[ii] = _list[ii+1];
-
-  _listLen--;
-}
-
-
-
-template <class iNum>
-void
-intervals<iNum>::sort(void) {
-
-  if ((_isSorted == true) ||
-      (_listLen < 2))
-    return;
-
-  auto increasing = [](_ir const &a,
-                       _ir const &b) {
-                      return(((a._bgn  < b._bgn)) ||
-                             ((a._bgn == b._bgn) && (a._end  < b._end)));
-                    };
-
-  std::sort(_list, _list + _listLen, increasing);
-
-  _isSorted = true;
-}
-
-
-
-template <class iNum>
-void
-intervals<iNum>::squash(iNum minOverlap) {
-  uint32  intoI = 0;  //  Interval we're merging into.
-  uint32  fromI = 1;  //  Interval we're merging from.
-
-  if (_isSquashed == true)
-    return;
-
-  sort();
-
-  while (fromI < _listLen) {
-    assert(_list[intoI]._bgn <  _list[intoI]._end);   //  Basic checks.  Both intervals
-    assert(_list[fromI]._bgn <  _list[fromI]._end);   //  cannot be empty, and intoI
-    assert(_list[intoI]._bgn <= _list[fromI]._bgn);   //  must be before fromI.
-
-    //  If the fromI intersects with intoI -- either contained in intoI, or
-    //  has a thick overlap to intoI -- merge it in.  We're guaranteed that
-    //  this._bgn is before next._bgn, so all we need to do is extend
-    //  this._end to cover the next interval.
-
-    if ((_list[intoI]._end >= _list[fromI]._end) ||
-        (_list[intoI]._end >= _list[fromI]._bgn + minOverlap)) {
-      _list[intoI]._end  = std::max(_list[fromI]._end, _list[intoI]._end);
-      _list[intoI]._num +=          _list[fromI]._num;
-    }
-
-    //  Otherwise, move to the next intoI, copy the current fromI to it, and
-    //  then move to the next fromI.  We should, to be pedantic, check that
-    //  intoI != fromI before the copy, but no harm if we don't.
-
-    else {
-      _list[++intoI] = _list[fromI];
-    }
-
-    fromI++;
-  }
-
-  _listLen    = intoI + 1;   //  Update the length of the list,
-  _isSquashed = true;        //  and note that it's now merged.
-}
-
-
-
-template <class iNum>
-void
-intervals<iNum>::filter(iNum minLength, iNum maxLength) {
-  uint32  intoI = 0;
-  uint32  fromI = 0;
-
-  //  Over every interval, if it is long enough, copy it
-  //  into the 'new' list.
-
-  while (fromI < _listLen) {
-    iNum  length = _list[fromI]._end - _list[fromI]._bgn;
-
-    if ((minLength <= length) &&
-        (length <= maxLength))
-      _list[intoI++] = _list[fromI];
-
-    fromI++;
-  }
-
-  _listLen = intoI;
-}
-
-
-
-
-#if 0
-template <class iNum>
-void
-setToUnion(intervals<iNum> const &A,
-           intervals<iNum> const &B) {
-}
-
-
-template <class iNum>
-void
-setToIntersection(intervals<iNum> const &A,
-                  intervals<iNum> const &B) {
-}
-
-
-template <class iNum>
-void
-setToContained(intervals<iNum> const &A,
-               intervals<iNum> const &B) {
-}
-
-
-template <class iNum>
-void
-setToUnion(iNum bgn, iNum end,
-           intervals<iNum> const &A) {
-}
-
-
-template <class iNum>
-void
-setToIntersection(iNum bgn, iNum end,
-                  intervals<iNum> const &A) {
-}
-
-
-template <class iNum>
-void
-setToContained(iNum bgn, iNum end,
-               intervals<iNum> const &A) {
-}
-#endif
-
-
-//  Helper function to invert a squashed intervals list.
-template <class iNum>
-void
-intervals<iNum>::setToInversion1(iNum bgn, iNum end,
-                                      intervals<iNum> const &A) {
-
-  delete [] _list;
-
-  _listLen = 0;                    //  Create a new list to store the
-  _listMax = A._listLen + 1;       //  inversion.  We need at most one
-  _list    = new _ir [_listMax];   //  more interval than the original.
-
-  //  If no existing list, just add a single interval covering the universe.
-  //
-  //  If the inversion range falls entirely inside a gap in the original list
-  //  (which would also result in the inverted list having one interval
-  //  covering the whole range) we'll catch it in the loop below.
-
-  if (A._listLen == 0) {
-    _list[_listLen++] = { bgn, end, 1 };
-  }
-
-  //  For an existing list:
-  //    1) Add an interval for the first gap, if it's inside the inersion
-  //       range.
-  //    2) Add intervals covering the middle gaps.  Threshold each endpoint
-  //       by the inversion range, and only add a new interval if it is of
-  //       positive length.
-  //    3) Add an interval for the last gap, if it's inside the inversion
-  //       range.
-
-  else {
-    if (bgn < A._list[0]._bgn)
-      _list[_listLen++] = { bgn, A._list[0]._bgn, 1 };
-
-    for (uint32 ii=1; ii<A._listLen; ii++) {
-      iNum  nb = std::max(bgn, A._list[ii-1]._end);
-      iNum  ne = std::min(end, A._list[ii  ]._bgn);
-
-      if (nb < ne)
-        _list[_listLen++] = { nb, ne, 1 };
-    }
-
-    if (A._list[A._listLen-1]._end < end)
-      _list[_listLen++] = { A._list[A._listLen-1]._end, end, 1 };
-  }
-
-  //  Check that we didn't blow up.
-
-  assert(_listLen <= _listMax);
-}
-
-
-//  Helper function to invert a non-squashed intervals list.
-template <class iNum>
-void
-intervals<iNum>::setToInversion2(iNum bgn, iNum end,
-                                      intervals<iNum> const &A) {
-
-  delete [] _list;
-
-  _listLen = 0;                    //  Create a new list to store the
-  _listMax = A._listLen * 2;       //  inversion.  We need at most twice
-  _list    = new _ir [_listMax];   //  the original size.
-
-  //  If no existing list, just add a single interval covering the universe.
-
-  if (A._listLen == 0) {
-    _list[_listLen++] = { bgn, end, 1 };
-  }
-
-  //  For an existing list:
-  //    Add two intervals for each existing interval, one on each end of the
-  //    interval.  The new intervals are thresholded aginst the inversion
-  //    range, and only added if they are of positive length.
-  //
-  //  Note the symmetrically-opposite comparisons; these prevent us from
-  //  adding length=0 intervals.
-
-  else {
-    iNum   nb, ne;
-
-    for (uint32 ii=0; ii<A._listLen; ii++) {
-      if ((bgn < A._list[ii]._bgn) && (A._list[ii]._bgn <= end))
-        _list[_listLen++] = { bgn, A._list[ii]._bgn, 1 };
-
-      if ((bgn <= A._list[ii]._end) && (A._list[ii]._end < end))
-        _list[_listLen++] = { A._list[ii]._end, end, 1 };
-    }
-  }
-
-  //  Check that we didn't blow up.
-
-  assert(_listLen <= _listMax);
-}
-
-
-template <class iNum>
-void
-intervals<iNum>::setToInversion(iNum bgn, iNum end,
-                                     intervals<iNum> const &A) {
-  if (A._isSquashed)
-    setToInversion1(bgn, end, A);
-  else
-    setToInversion2(bgn, end, A);
-}
-
-
-
-template <class iNum>
-void
-intervalsDepth<iNum>::computeDepth(intervals<iNum> const &IL) {
-  uint32    idplen = IL.size() * 2;
-  _idp     *idp    = new _idp [idplen];
-
-  for (uint32 ii=0; ii<IL.size(); ii++) {
-    idp[2*ii  ]._pos = IL.bgn(ii);     //  Enter into an inteval, change
-    idp[2*ii  ]._dlt = 1;              //  depth by +1.
-
-    idp[2*ii+1]._pos = IL.end(ii);     //  Leave an interval, change
-    idp[2*ii+1]._dlt = -1;             //  depth by -1.
-  }
-
-  delete [] _list;
-
-  _listLen  = 0;
-  _list     = nullptr;
-
-  if (idplen > 0)
-    computeDepth(idplen, idp);
-
-  delete [] idp;
-}
-
-
-
-template <class iNum>
-void
-intervalsDepth<iNum>::computeDepth(uint32 idplen, _idp *idp) {
-
-  //  Sort regions so that earlier positions are first, and so that depth
-  //  increases (+1) are before decreases (-1).
-
-  auto increasing = [](_idp const &a,
-                       _idp const &b) {
-                      return(((a._pos  < b._pos)) ||
-                             ((a._pos == b._pos) && (a._dlt > b._dlt)));
-                    };
-
-  std::sort(idp, idp + idplen, increasing);
-
-  //  The first thing must be an 'open' event.  If not, someone supplied a
-  //  negative length to the original intervalList.  Or, possibly, two
-  //  zero-length intervals.
-
-  if (idp[0]._dlt == -1)
-    for (uint32 ii=0; ii<idplen; ii++)
-      fprintf(stderr, "idp[%u] pos %d dlt %d\n", ii, idp[ii]._pos, idp[ii]._dlt);
-
-  assert(idp[0]._dlt == 1);
-
-  //  Init first interval.
-
-  _listLen  = 0;
-  _list     = new _idr [idplen + 1];
-
-  _list[_listLen]._bgn = idp[0]._pos;
-  _list[_listLen]._end = idp[0]._pos;
-  _list[_listLen]._dpt = 1;
-
-  for (uint32 i=1; i<idplen; i++) {
-
-    //  Update the end of the current interval to this position.
-
-    _list[_listLen]._end = idp[i]._pos;
-
-    //  If this position is different than the last position, make
-    //  a new depth interval.
-
-    if (idp[i-1]._pos != idp[i]._pos) {
-      _listLen++;
-
-      _list[_listLen]._bgn = idp[i]._pos;
-      _list[_listLen]._end = idp[i]._pos;
-      _list[_listLen]._dpt = _list[_listLen-1]._dpt;
-    }
-
-    //  Process any depth change associated with this position.
-
-    _list[_listLen]._dpt += idp[i]._dlt;
-
-    //  Is it safe to blindly change the depth of this region?  Yes.  If the
-    //  position is different than the last, we've already made a new depth
-    //  region.  And if it wasn't different, one of the previous positions
-    //  must have made a new depth region.  In any case, the depth region
-    //  we're currently at must always be length 0 -- we're never able to
-    //  change the depth of a region when we're at the end coordinate.
-
-    assert(_list[_listLen]._bgn == _list[_listLen]._end);
-  }
-
-  assert(_listLen < idplen + 1);
-  assert(_list[_listLen]._bgn == _list[_listLen]._end);
-  assert(_list[_listLen]._dpt == 0);
-}
-
-#endif   //  INTERVALS_IMPLEMENTATION
diff --git a/ext/meryl/src/utility/src/utility/intervals.H b/ext/meryl/src/utility/src/utility/intervals.H
deleted file mode 100644
index 8e498a9..0000000
--- a/ext/meryl/src/utility/src/utility/intervals.H
+++ /dev/null
@@ -1,221 +0,0 @@
-
-/******************************************************************************
- *
- *  This file is part of canu, a software program that assembles whole-genome
- *  sequencing reads into contigs.
- *
- *  This software is based on:
- *    'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- *    the 'kmer package' (http://kmer.sourceforge.net)
- *  both originally distributed by Applera Corporation under the GNU General
- *  Public License, version 2.
- *
- *  Canu branched from Celera Assembler at its revision 4587.
- *  Canu branched from the kmer project at its revision 1994.
- *
- *  Modifications by:
- *
- *    Brian P. Walenz beginning on 2018-JUL-20
- *      are a 'United States Government Work', and
- *      are released in the public domain
- *
- *  File 'README.licenses' in the root directory of this distribution contains
- *  full conditions and disclaimers for each license.
- */
-
-#ifndef INTERVALS_H
-#define INTERVALS_H
-
-#include "types.H"
-
-//  The interval coordinates use the usual C semantics of [bgn, end) -
-//  'x=bgn' is inside the interval, but 'x=end' is not.
-
-template <class iNum>
-class intervals {
-private:
-  struct _ir {
-    iNum      _bgn;
-    iNum      _end;
-    uint32    _num;
-  };
-
-public:
-  intervals()    {                  };
-  ~intervals()   { delete [] _list; };
-
-  void      clear(void) {
-    _isSorted   = true;
-    _isSquashed = true;
-    _listLen    = 0;
-  };
-
-  //  Accessors.
-
-  uint32    size(void) const         { return(_listLen); };
-
-  iNum      bgn (uint32 idx) const   { return(_list[idx]._bgn); };
-  iNum      end (uint32 idx) const   { return(_list[idx]._end); };
-  iNum      span(uint32 idx) const   { return(_list[idx]._end - _list[idx]._bgn); };
-
-  uint32    count(uint32 idx) const  { return(_list[idx]._num); };
-
-  //  Modifiers.
-
-  iNum     &bgn (uint32 idx)         { return(_list[idx]._bgn); };
-  iNum     &end (uint32 idx)         { return(_list[idx]._end); };
-
-  uint32   &count(uint32 idx)        { return(_list[idx]._num); };
-
-  void      clear(uint32 idx) {
-    _list[idx]._bgn = iNum();
-    _list[idx]._end = iNum();
-    _list[idx]._num = 0;
-  }
-
-  //  Creation.
-  //
-  //  Add a single interval to the list of intervals specified by either
-  //   - the position of the end points
-  //   - the position of the start and the length of the span
-  //
-  //  Add(intervals) will copy all the intervals from B into this object,
-  //  no further processing (sorting, squashing or filtering) is performed.
-  //
-  //  Remove the interval at position 'idx' in our list.  Doing so will
-  //  greatly screw up interation over the intervals, and it is suggested
-  //  to instead change the span of the interval to zero and then filter
-  //  them out after iteration is complete.
-
-  void      add_position(iNum bgn, iNum end);
-  void      add_span    (iNum bgn, iNum len) {
-    if (len < 0)
-      add_position(bgn+len, bgn);
-    else
-      add_position(bgn, bgn+len);
-  };
-
-  void      add(intervals<iNum> const &B);
-
-  void      remove(uint32 idx);
-
-  //  Sort intervals by increasing coordinate, breaking ties with the end
-  //  coordinate.
-  //
-  //  Combine intervals that overlap by at least 'minOverlap' into one item.
-  //
-  //  Discard intervals that are smaller than minLength or larger than
-  //  maxLength.
-
-  void      sort(void);
-  void      squash(iNum minOverlap=0);
-  void      filter(iNum minLength, iNum maxLength);
-
-  //  setToUnion - populate this intervals object with all the intervals in A
-  //  and B.  If both A and B are squashed, this intervals object will also
-  //  be squashed.
-  //
-  //  setToIntersection - each interval in A (B) is intersected with all
-  //  intervals in B (A), and the resulting interval is added to this object.
-  //
-  //  setToContained - each interval in A that is contained fully in some
-  //  interval in B is added to this intervals object.
-#if 0
-  void      setToUnion       (intervals<iNum> const &A, intervals<iNum> const &B);
-  void      setToIntersection(intervals<iNum> const &A, intervals<iNum> const &B);
-  void      setToContained   (intervals<iNum> const &A, intervals<iNum> const &B);
-#endif
-  //  setToUnion - copy the intervals in A that oveerlap with the interval
-  //  bgn-end.
-  //
-  //  setToIntersection - copy the intervals in A that intersect with the
-  //  interval bgn-end, and trim them to that range.
-  //
-  //  setToContained - copy the intervals in A that are contained within the
-  //  interval bgn-end.
-  //
-  //  setToInversion
-  //   - if A is squashed, intervals that fill the 'holes' in A, bounded by
-  //     bgn and end) are added to this object.
-  //   - if A is not squashed, each interval in A will contribute 0, 1 or 2
-  //     new intervals to this object, representing the holes, bounded by bgn and end,
-  //     created by only that single interval in A.
-  //
-  //                   bgn[               ]end
-  //                --------  ---------     ----  A
-  //                --------  ---------           union
-  //                      --  ---------           intersection
-  //                          ---------           contained
-  //                        --         ----       inversion
-#if 0
-  void      setToUnion       (iNum bgn, iNum end, intervals<iNum> const &A);
-  void      setToIntersection(iNum bgn, iNum end, intervals<iNum> const &A);
-  void      setToContained   (iNum bgn, iNum end, intervals<iNum> const &A);
-#endif
-  void      setToInversion   (iNum bgn, iNum end, intervals<iNum> const &A);
-
-  //  Helper functions.
-private:
-  void      setToInversion1(iNum bgn, iNum end, intervals<iNum> const &A);
-  void      setToInversion2(iNum bgn, iNum end, intervals<iNum> const &A);
-
-private:
-  bool     _isSorted   = true;
-  bool     _isSquashed = true;
-
-  uint32   _listMax    = 0;
-  uint32   _listLen    = 0;
-  _ir     *_list       = nullptr;
-};
-
-
-
-template <class iNum>
-class intervalsDepth {
-private:
-  struct _idp {     //  An intervalDepthPosition stores the position
-    iNum   _pos;    //  of a change in depth, and the delta of that
-    int32  _dlt;    //  change (which is either +1 or -1).
-  };
-
-  struct _idr {     //  An intervalDepthRegion has the coordinates
-    iNum   _bgn;    //  of the region and the depth.
-    iNum   _end;
-    uint32 _dpt;
-  };
-
-public:
-  intervalsDepth() {
-  };
-  intervalsDepth(intervals<iNum> const &IL) {
-    computeDepth(IL);
-  };
-  ~intervalsDepth() {
-    delete [] _list;
-  };
-
-  uint32    size(void)          { return(_listLen); };
-
-  iNum      bgn (uint32 idx)    { return(_list[idx]._bgn); };
-  iNum      end (uint32 idx)    { return(_list[idx]._end); };
-  iNum      span(uint32 idx)    { return(_list[idx]._end - _list[idx]._bgn); };
-
-  uint32    depth(uint32 idx)   { return(_list[idx]._dpt); };
-
-  void      computeDepth(intervals<iNum> const &IL);
-
-private:
-  void      computeDepth(uint32 idplen, _idp *idp);
-
-  uint32   _listLen = 0;
-  _idr    *_list    = nullptr;
-};
-
-
-
-#define INTERVALS_IMPLEMENTATION
-#include "intervals-implementation.H"
-#undef  INTERVALS_IMPLEMENTATION
-
-
-#endif  //  INTERVALS_H
diff --git a/ext/meryl/src/utility/src/utility/kmers-exact.C b/ext/meryl/src/utility/src/utility/kmers-exact.C
index bb342da..0fbb82a 100644
--- a/ext/meryl/src/utility/src/utility/kmers-exact.C
+++ b/ext/meryl/src/utility/src/utility/kmers-exact.C
@@ -22,22 +22,43 @@
 #include <vector>
 #include <algorithm>
 
+using namespace std;
 
-//  Set some basic boring stuff.
+
+//  If set, allocate another (large) array to verify that there are no holes in the
+//  data array.  Holes would lead to false positives.
 //
-void
-merylExactLookup::initialize(merylFileReader *input_, kmvalu minValue_, kmvalu maxValue_) {
+#undef  VERIFY_SUFFIX_END
+
+
+
+
+
+double
+bitsToGB(uint64 bits) {
+  return(bits / 8 / 1024.0 / 1024.0 / 1024.0);
+}
+
+double
+bitsToMB(uint64 bits) {
+  return(bits / 8 / 1024.0 / 1024.0);
+}
 
-  //  Save a pointer to the input data.
 
-  _input = input_;
+
+
+//  Set some basic boring stuff.
+//
+void
+merylExactLookup::initialize(uint64               minValue_,
+                             uint64               maxValue_) {
 
   //  Silently make minValue and maxValue be valid values.
 
   if (minValue_ == 0)
     minValue_ = 1;
 
-  if (maxValue_ == kmvalumax) {
+  if (maxValue_ == UINT64_MAX) {
     uint32  nV = _input->stats()->histogramLength();
 
     maxValue_ = _input->stats()->histogramValue(nV - 1);
@@ -65,6 +86,7 @@ merylExactLookup::initialize(merylFileReader *input_, kmvalu minValue_, kmvalu m
     _valueBits = countNumberOfBits64(_maxValue + 1 - _minValue);
 
   _suffixMask     = 0;
+  _dataMask       = 0;
 
   _nPrefix        = 0;                               //  Number of entries in pointer table.
   _nSuffix        = 0;                               //  Number of entries in suffix dable.
@@ -72,7 +94,7 @@ merylExactLookup::initialize(merylFileReader *input_, kmvalu minValue_, kmvalu m
   //  Scan the histogram to count the number of kmers in range.
 
   for (uint32 ii=0; ii<_input->stats()->histogramLength(); ii++) {
-    kmvalu  v = _input->stats()->histogramValue(ii);
+    uint64  v = _input->stats()->histogramValue(ii);
 
     if ((_minValue <= v) &&
         (v <= _maxValue))
@@ -82,11 +104,10 @@ merylExactLookup::initialize(merylFileReader *input_, kmvalu minValue_, kmvalu m
   _prePtrBits     = countNumberOfBits64(_nSuffix);   //  Width of an entry in the prefix table.
   _prePtrBits     = 64;
 
-  _suffixBgn      = nullptr;
-  _suffixLen      = nullptr;
-  _suffixEnd      = nullptr;
-  _sufData        = nullptr;
-  _valData        = nullptr;
+  _suffixBgn      = NULL;
+  _suffixEnd      = NULL;
+  _sufData        = NULL;
+  _valData        = NULL;
 }
 
 
@@ -96,51 +117,31 @@ merylExactLookup::initialize(merylFileReader *input_, kmvalu minValue_, kmvalu m
 //  use for indexing (prefixSize), and how many bits of data we need
 //  to store explicitly (suffixBits and valueBits).
 //
-void
-merylExactLookup::configure(double  memInGB,
-                            double &memInGBmin,
-                            double &memInGBmax,
-                            bool    useMinimalMemory,
-                            bool    useOptimalMemory,
-                            bool    reportMemory,
-                            bool    reportSizes) {
-
-  //  Convert the memory in GB to memory in BITS.  If no memory
-  //  size is supplied, as the OS how big we can get.
-
-  if (memInGB == 0.0)
-    _maxMemory = getMaxMemoryAllowed() * 8;
-  else
-    _maxMemory = (uint64)(memInGB * 1024.0 * 1024.0 * 1024.0 * 8);
-
-  //  Find the prefixBits that results in the smallest allocated memory size.
-  //  Due to threading over the files, we cannot use a prefix smaller than 6
-  //  bits.
+bool
+merylExactLookup::configure(void) {
+
+  //  First, find the prefixBits that results in the smallest allocated memory size.
+  //  Due to threading over the files, we cannot use a prefix smaller than 6 bits.
   //
   //  While it's nice to find the smallest memory size possible, that's also
   //  about the slowest possible.  Instead, empirically determined on a small
-  //  test, allow a very sparse table of 16 to 32 prefixes per kmer (if
-  //  possible).
+  //  test, allow a very sparse table of 16 to 32 prefixes per kmer (if possible).
 
-  uint64  minSpace   = uint64max;
-  uint64  optSpace   = uint64max;
-  uint64  usdSpace   = uint64max;
+  uint64  minSpace   = UINT64_MAX;
+  uint64  optSpace   = UINT64_MAX;
 
   //  _nSuffix here is just the number of distinct kmers in the input.  We'll
   //  search for prefix sizes up to that size plus a bit more to show that
   //  what we pick really is the best size.
-  //
-  //  We save the smallest size, and the 'optimal' size, defined as something
-  //  at least as big as the smallest, but not more than 8 times larger.
 
   uint32  pbMin      = 0;
   uint32  pbOpt      = 0;
-  uint32  pbMax      = countNumberOfBits64(_nSuffix) + 1;
+  uint32  pbMax      = countNumberOfBits64(_nSuffix) + 4;
 
   if (pbMax > kmer::merSize() * 2)
     pbMax = kmer::merSize() * 2;
 
-  for (uint32 pb=0; pb<pbMax; pb++) {
+  for (uint32 pb=1; pb<pbMax; pb++) {
     uint64  nprefix = (uint64)1 << pb;
     uint64  space   = nprefix * _prePtrBits + _nSuffix * (_Kbits - pb) + _nSuffix * _valueBits;
 
@@ -149,41 +150,23 @@ merylExactLookup::configure(double  memInGB,
       minSpace     = space;
     }
 
-    if ((space < _maxMemory) && (pb < pbMin + 4)) {
+    if (space < _maxMemory) {
       pbOpt        = pb;
       optSpace     = space;
-    }
-  }
-
-  //  Set parameters.  For logging, we need these set even if
-  //  useMinimalMemory and useOptimalMemory are false -- this happens when
-  //  we're called from estimateMemoryUsage.
-
-  if (useMinimalMemory == true) {
-    usdSpace = minSpace;
 
-    _prefixBits  =          pbMin;
-    _suffixBits  = _Kbits - pbMin;
+      _prefixBits  =          pb;
+      _suffixBits  = _Kbits - pb;
 
-    _suffixMask  = buildLowBitMask<kmdata>(_suffixBits);
+      _suffixMask  = uint64MASK(_suffixBits);
+      _dataMask    = uint64MASK(_valueBits);
 
-    _nPrefix     = (uint64)1 << pbMin;
-  }
-
-  if (useOptimalMemory == true) {
-    usdSpace = optSpace;
-
-    _prefixBits  =          pbOpt;
-    _suffixBits  = _Kbits - pbOpt;
-
-    _suffixMask  = buildLowBitMask<kmdata>(_suffixBits);
-
-    _nPrefix     = (uint64)1 << pbOpt;
+      _nPrefix     = nprefix;
+    }
   }
 
   //  And do it all again to keep the users entertained.
 
-  if (reportMemory) {
+  if (_verbose) {
     fprintf(stderr, "\n");
     fprintf(stderr, " p       prefixes             bits gigabytes (allowed: %lu GB)\n", _maxMemory >> 33);
     fprintf(stderr, "-- -------------- ---------------- ---------\n");
@@ -198,36 +181,38 @@ merylExactLookup::configure(double  memInGB,
       uint64  nprefix = (uint64)1 << pb;
       uint64  space   = nprefix * _prePtrBits + _nSuffix * (_Kbits - pb) + _nSuffix * _valueBits;
 
-      if     ((pb == pbMin) &&
-              (pb == pbOpt))
-        fprintf(stderr, "%2u %14lu %16lu %9.3f (smallest)\n", pb, nprefix, space, bitsToGB(space));
-      else if (pb == pbMin)
+      if      (pb == pbMin)
         fprintf(stderr, "%2u %14lu %16lu %9.3f (smallest)\n", pb, nprefix, space, bitsToGB(space));
+
       else if (pb == pbOpt)
-        fprintf(stderr, "%2u %14lu %16lu %9.3f (faster)\n",   pb, nprefix, space, bitsToGB(space));
+        fprintf(stderr, "%2u %14lu %16lu %9.3f (used)\n",     pb, nprefix, space, bitsToGB(space));
+
       else
         fprintf(stderr, "%2u %14lu %16lu %9.3f\n",            pb, nprefix, space, bitsToGB(space));
     }
 
     fprintf(stderr, "-- -------------- ---------------- ---------\n");
-    fprintf(stderr, "   %14lu total kmers\n", _nSuffix);
     fprintf(stderr, "\n");
-  }
 
-  if (reportSizes) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "For %lu distinct %u-mers (with %u bits used for indexing and %u bits for tags):\n", _nSuffix, _Kbits / 2, _prefixBits, _suffixBits);
-    fprintf(stderr, "  %7.3f GB memory for kmer indices - %12lu elements %2u bits wide)\n", bitsToGB(_nPrefix * _prePtrBits), _nPrefix, _prePtrBits);
-    fprintf(stderr, "  %7.3f GB memory for kmer tags    - %12lu elements %2u bits wide)\n", bitsToGB(_nSuffix * _suffixBits), _nSuffix, _suffixBits);
-    fprintf(stderr, "  %7.3f GB memory for kmer values  - %12lu elements %2u bits wide)\n", bitsToGB(_nSuffix * _valueBits),  _nSuffix, _valueBits);
-    fprintf(stderr, "  %7.3f GB memory\n",                                                  bitsToGB(usdSpace));
-    fprintf(stderr, "\n");
+    if (_prefixBits == 0) {
+      fprintf(stderr, "Not enough memory to load %lu distinct %u-kmers.\n", _nSuffix, _Kbits / 2);
+      fprintf(stderr, "Need at least %.3f GB memory.\n", bitsToGB(minSpace));
+    }
+
+    else {
+      fprintf(stderr, "For %lu distinct %u-mers (with %u bits used for indexing and %u bits for tags):\n", _nSuffix, _Kbits / 2, _prefixBits, _suffixBits);
+      fprintf(stderr, "  %7.3f GB memory\n",                                       bitsToGB(optSpace));
+      fprintf(stderr, "  %7.3f GB memory for index (%lu elements %u bits wide)\n", bitsToGB(_nPrefix * _prePtrBits), _nPrefix, _prePtrBits);
+      fprintf(stderr, "  %7.3f GB memory for tags  (%lu elements %u bits wide)\n", bitsToGB(_nSuffix * _suffixBits), _nSuffix, _suffixBits);
+      fprintf(stderr, "  %7.3f GB memory for data  (%lu elements %u bits wide)\n", bitsToGB(_nSuffix * _valueBits),  _nSuffix, _valueBits);
+      fprintf(stderr, "\n");
+    }
   }
 
-  //  Copy the min and optimal memory sizes to the output variables.
+  if (_prefixBits == 0)
+    return(false);
 
-  memInGBmin = bitsToGB(minSpace);
-  memInGBmax = bitsToGB(optSpace);
+  return(true);
 }
 
 
@@ -240,28 +225,15 @@ merylExactLookup::configure(double  memInGB,
 void
 merylExactLookup::count(void) {
 
-  _suffixBgn = new uint64 [_nPrefix];
-  _suffixLen = new uint64 [_nPrefix];
-  _suffixEnd = new uint64 [_nPrefix];
+  _suffixBgn = new uint64 [_nPrefix + 1];
 
-  for (uint64 ii=0; ii<_nPrefix; ii++)
-    _suffixBgn[ii] = _suffixLen[ii] = _suffixEnd[ii] = uint64zero;
+  memset(_suffixBgn, 0, sizeof(uint64) * (_nPrefix + 1));
 
   //  Scan all kmer files, counting the number of kmers per prefix.
   //  This is thread safe when _prefixBits is more than 6 (the number of files).
 
   uint32   nf = _input->numFiles();
 
-  assert(nf == 64);
-
-  uint64   minp[nf];
-  uint64   maxp[nf];
-
-  for (uint32 ii=0; ii<nf; ii++) {
-    minp[ii] = uint64max;
-    maxp[ii] = uint64min;
-  }
-
 #pragma omp parallel for schedule(dynamic, 1)
   for (uint32 ff=0; ff<nf; ff++) {
     FILE                  *blockFile = _input->blockFile(ff);
@@ -279,9 +251,9 @@ merylExactLookup::count(void) {
       block->decodeBlock();
 
       for (uint32 ss=0; ss<block->nKmers(); ss++) {
-        kmdata   kbits  = 0;
-        kmdata   prefix = 0;
-        kmvalu   value  = block->values()[ss];
+        uint64   sdata  = 0;
+        uint64   prefix = 0;
+        uint64   value  = block->values()[ss];
 
         if (value < _minValue) {
           tooLow++;
@@ -295,18 +267,15 @@ merylExactLookup::count(void) {
 
         loaded++;
 
-        kbits   = block->prefix();         //  Combine the file prefix and
-        kbits <<= _input->suffixSize();    //  suffix data to reconstruct
-        kbits  |= block->suffixes()[ss];   //  the kmer bits.
-
-        prefix = kbits >> _suffixBits;     //  Then extract the prefix
+        sdata   = block->prefix();         //  Reconstruct the kmer into sdata.  This is just
+        sdata <<= _input->suffixSize();    //  kmerTiny::setPrefixSuffix().  From the kmer,
+        sdata  |= block->suffixes()[ss];   //  generate the prefix we want to save it as.
 
-        minp[ff] = std::min(minp[ff], (uint64)prefix);
-        maxp[ff] = std::max(maxp[ff], (uint64)prefix);
+        prefix  = sdata >> _suffixBits;
 
         assert(prefix < _nPrefix);
 
-        _suffixLen[prefix]++;              //  Count the number of kmers per prefix.
+        _suffixBgn[prefix]++;              //  Count the number of kmers per prefix.
       }
     }
 
@@ -322,39 +291,28 @@ merylExactLookup::count(void) {
     AS_UTL_closeFile(blockFile);
   }
 
-  //  If the min/max intersect, we've got a problem somewhere.  Each 'prefix'
-  //  will map to exactly one file, and they're supposed to map
-  //  consecutively.  Good luck figuring out what broke if this triggers.
-
-  for (uint32 ii=1; ii<nf; ii++)
-    assert(maxp[ii-1] < minp[ii]);
-
-  //  Now that we know the length of each block, we can set _suffixBgn to the
-  //  address of the first element.  _suffixEnd is set to that too; we'll use
-  //  it to load data into the table.
-  //
-  //  To allow threads without locks, we need to pad the end of each block so
-  //  that two blocks don't share a wordArray word.  Instead, we just pad the
-  //  last block that each thread (one thread per file) will access.  A
-  //  little bit harder to figure out, but less memory used.
-  //
-  //  For a prefix of [ffffffppp..pppp] a single thread will process all
-  //  kmers [ffffff......].  Thus, when the prefix ends in 1111...111, we can
-  //  just bump up 'bgn' a bit, just enough to get to the next 128-bit word.
-  //  But since this index is used both in storing suffixes and values,
-  //  that's impossible and we just add 256 bits.
+  //  Convert the kmers per prefix into begin coordinate for each prefix.
+  //  The loading loop uses _suffixEnd[] as the position to add the next
+  //  data.
 
-  uint64 mask = (_nPrefix - 1) >> 6;
+  uint64  bgn = 0;
+  uint64  nxt = 0;
 
-  for (uint64 bgn=0, ii=0; ii<_nPrefix; ii++) {
+  for (uint64 ii=0; ii<_nPrefix; ii++) {
+    nxt            = _suffixBgn[ii];
     _suffixBgn[ii] = bgn;
-    _suffixEnd[ii] = bgn;
+    bgn           += nxt;
+  }
 
-    bgn += _suffixLen[ii];
+  assert(bgn == _nKmersLoaded);
+  _suffixBgn[_nPrefix] = bgn;
 
-    if ((ii & mask) == mask)
-      bgn += 256;
-  }
+#ifdef VERIFY_SUFFIX_END
+  _suffixEnd = new uint64 [_nPrefix];
+
+  for (uint64 ii=0; ii<_nPrefix; ii++)
+    _suffixEnd[ii] = _suffixBgn[ii];
+#endif
 
   //  Log.
 
@@ -372,45 +330,33 @@ merylExactLookup::count(void) {
 //  prevent the need for any locking or coordination when filling out the
 //  array.
 //
-double
+void
 merylExactLookup::allocate(void) {
-  uint64  arraySize;
-  uint64  arrayBlockMin;
-  double  memInGBused = 0.0;
-
-  uint64  ns = _suffixEnd[_nPrefix-1];   //  The largest word we access in wordArray.
+  uint64  arraySize, arrayBlockMin;
 
   if (_suffixBits > 0) {
-    arraySize      = ns * _suffixBits;
-    arrayBlockMin  = std::max(arraySize / 1024llu, 268435456llu);   //  In bits, so 32MB per block.
-    memInGBused   += bitsToGB(arraySize);
+    arraySize     = _nSuffix * _suffixBits;
+    arrayBlockMin = max(arraySize / 1024llu, 268435456llu);   //  In bits, so 32MB per block.
 
     if (_verbose)
       fprintf(stderr, "Allocating space for %lu suffixes of %u bits each -> %lu bits (%.3f GB) in blocks of %.3f MB\n",
-              ns, _suffixBits, arraySize, bitsToGB(arraySize), bitsToMB(arrayBlockMin));
-
-    assert(_suffixBits <= 128);
+              _nSuffix, _suffixBits, arraySize, bitsToGB(arraySize), bitsToMB(arrayBlockMin));
 
-    _sufData = new wordArray(_suffixBits, arrayBlockMin, false);
-    _sufData->allocate(ns);
+    _sufData = new wordArray(_suffixBits, arrayBlockMin);
+    _sufData->allocate(_nSuffix);
   }
 
   if (_valueBits > 0) {
-    arraySize     = ns * _valueBits;
-    arrayBlockMin = std::max(arraySize / 1024llu, 268435456llu);   //  In bits, so 32MB per block.
-    memInGBused   += bitsToGB(arraySize);
+    arraySize     = _nSuffix * _valueBits;
+    arrayBlockMin = max(arraySize / 1024llu, 268435456llu);   //  In bits, so 32MB per block.
 
     if (_verbose)
       fprintf(stderr, "                     %lu values   of %u bits each -> %lu bits (%.3f GB) in blocks of %.3f MB\n",
-              ns, _valueBits,  arraySize, bitsToGB(arraySize), bitsToMB(arrayBlockMin));
+              _nSuffix, _valueBits,  arraySize, bitsToGB(arraySize), bitsToMB(arrayBlockMin));
 
-    assert(_valueBits <= 64);
-
-    _valData = new wordArray(_valueBits, arrayBlockMin, false);
-    _valData->allocate(ns);
+    _valData = new wordArray(_valueBits, arrayBlockMin);
+    _valData->allocate(_nSuffix);
   }
-
-  return(memInGBused);
 }
 
 
@@ -421,9 +367,11 @@ merylExactLookup::allocate(void) {
 //  In this case, we overallocate, but cannot cleanup at the end.
 void
 merylExactLookup::load(void) {
-  uint32   nf      = _input->numFiles();
-  uint64   sufMask = buildLowBitMask<kmdata>(_suffixBits);
-  uint64   valMask = buildLowBitMask<kmvalu>(_valueBits);
+
+  count();
+  allocate();
+
+  uint32   nf = _input->numFiles();
 
 #pragma omp parallel for schedule(dynamic, 1)
   for (uint32 ff=0; ff<nf; ff++) {
@@ -436,23 +384,24 @@ merylExactLookup::load(void) {
       block->decodeBlock();
 
       for (uint32 ss=0; ss<block->nKmers(); ss++) {
-        kmdata   kbits  = 0;
-        kmdata   prefix = 0;
-        kmdata   suffix = 0;
-        kmvalu   value  = block->values()[ss];
+        uint64   prefix = 0;
+        uint64   suffix = 0;
+        uint64   value  = block->values()[ss];
 
         if ((value < _minValue) ||         //  Sanity checking and counting done
             (_maxValue < value))           //  in count() above.
           continue;
 
-        kbits   = block->prefix();         //  Combine the file prefix and
-        kbits <<= _input->suffixSize();    //  suffix data to reconstruct
-        kbits  |= block->suffixes()[ss];   //  the kmer bits.
+        //  Compute and store the prefix.
 
-        suffix = kbits  & sufMask;         //  Then extract the prefix
-        prefix = kbits >> _suffixBits;     //  and suffix to use in the table
+        prefix   = block->prefix();         //  Reconstruct the kmer into sdata.  This is just
+        prefix <<= _input->suffixSize();    //  kmerTiny::setPrefixSuffix().  From the kmer,
+        prefix  |= block->suffixes()[ss];   //  generate the prefix we want to save it as.
 
-        _sufData->set(_suffixEnd[prefix], suffix);
+        suffix   = prefix & uint64MASK(_suffixBits);
+        prefix >>= _suffixBits;
+
+        _sufData->set(_suffixBgn[prefix], suffix);
 
         //  Compute and store the value, if requested.
 
@@ -460,16 +409,20 @@ merylExactLookup::load(void) {
           value -= _valueOffset;
 
           if (value > _maxValue + 1 - _minValue)
-            fprintf(stderr, "minValue " F_U32 " maxValue " F_U32 " value " F_U32 " bits " F_U32 "\n",
+            fprintf(stderr, "minValue " F_U64 " maxValue " F_U64 " value " F_U64 " bits " F_U32 "\n",
                     _minValue, _maxValue, value, _valueBits);
-          assert(value <= valMask);
+          assert(value <= uint64MASK(_valueBits));
 
-          _valData->set(_suffixEnd[prefix], value);
+          _valData->set(_suffixBgn[prefix], value);
         }
 
         //  Move to the next item.
 
+        _suffixBgn[prefix]++;
+
+#ifdef VERIFY_SUFFIX_END
         _suffixEnd[prefix]++;
+#endif
       }
     }
 
@@ -478,92 +431,48 @@ merylExactLookup::load(void) {
     AS_UTL_closeFile(blockFile);
   }
 
-  //  Check that we loaded the expected number of kmers into each space
+  //  suffixBgn[i] is now the start of [i+1]; shift the array by one to
+  //  restore the proper meaning of suffixBgn.
 
-  for (uint64 ii=0; ii<_nPrefix; ii++)
-    assert(_suffixBgn[ii] + _suffixLen[ii] == _suffixEnd[ii]);
-  
-  //  Now just log.
+  for (uint64 ii=_nPrefix; ii>0; ii--)
+    _suffixBgn[ii] = _suffixBgn[ii-1];
 
-  if (_verbose)
-    fprintf(stderr, "Loaded " F_U64 " kmers.  Skipped " F_U64 " (too low) and " F_U64 " (too high) kmers.\n",
-            _nKmersLoaded, _nKmersTooLow, _nKmersTooHigh);
-}
+  _suffixBgn[0] = 0;
 
+  //  Optionally verify that bgn[i] == end[i-1].
 
+#ifdef VERIFY_SUFFIX_END
+  for (uint64 ii=1; ii<_nPrefix; ii++)
+    assert(_suffixBgn[ii] == _suffixEnd[ii-1]);
 
-void
-merylExactLookup::estimateMemoryUsage(merylFileReader *input_,
-                                      double           maxMemInGB_,
-                                      double          &minMemInGB_,
-                                      double          &optMemInGB_,
-                                      kmvalu           minValue_,
-                                      kmvalu           maxValue_) {
-  initialize(input_, minValue_, maxValue_);
-  configure(maxMemInGB_, minMemInGB_, optMemInGB_, false, false, true, false);
-}
-
+  delete [] _suffixEnd;
+  _suffixEnd = NULL;
+#endif
 
+  //  Now just log.
 
-double
-merylExactLookup::load(merylFileReader *input_,
-                       double           maxMemInGB_,
-                       bool             useMinimalMemory,
-                       bool             useOptimalMemory,
-                       kmvalu           minValue_,
-                       kmvalu           maxValue_) {
-  double  minMem  = 0.0;
-  double  maxMem  = 0.0;
-  double  memInGBused = 0.0;
-
-  initialize(input_, minValue_, maxValue_);            //  Initialize ourself.
-
-  configure(maxMemInGB_,                               //  Find parameters.
-            minMem,
-            maxMem,
-            useMinimalMemory,
-            useOptimalMemory,
-            false,
-            true);
-
-  if (_prefixBits == 0)                                //  Fail if needed.
-    return(0.0);
-
-  count();                                             //  Count kmers/prefix.
-  memInGBused = allocate();                            //  Allocate space.
-  load();                                              //  Load data.
-
-  return(memInGBused);
+  if (_verbose)
+    fprintf(stderr, "Loaded " F_U64 " kmers.  Skipped " F_U64 " (too low) and " F_U64 " (too high) kmers.\n",
+            _nKmersLoaded, _nKmersTooLow, _nKmersTooHigh);
 }
 
 
 
-
-
-
 bool
 merylExactLookup::exists_test(kmer k) {
-  char    kmerString[65];
-  kmdata  kmer   = (kmdata)k;
-  kmdata  prefix = kmer >> _suffixBits;
-  kmdata  suffix = kmer  & _suffixMask;
 
-  fprintf(stderr, "\n");
-  fprintf(stderr, "kmer        %s  %s\n", toHex(kmer, 2 * k.merSize()), k.toString(kmerString));
-  fprintf(stderr, "suffixBits  %s  %3u bits\n", toHex(_suffixMask, _suffixBits), _suffixBits);
-  fprintf(stderr, "prefix      %s  %3u bits\n", toHex(prefix, 2 * k.merSize() - _suffixBits), 2 * k.merSize() - _suffixBits);
-  fprintf(stderr, "suffix      %s\n", toHex(suffix, _suffixBits));
+  uint64  kmer   = (uint64)k;
+  uint64  prefix = kmer >> _suffixBits;
+  uint64  suffix = kmer  & _suffixMask;
 
   uint64  bgn = _suffixBgn[prefix];
   uint64  mid;
-  uint64  end = _suffixEnd[prefix];
+  uint64  end = _suffixBgn[prefix + 1];
 
-  kmdata  tag;
+  uint64  tag;
 
   //  Binary search for the matching tag.
 
-  fprintf(stderr, "BINARY SEARCH the bucket %lu-%lu for suffix %s.\n", bgn, end, toHex(suffix));
-
   while (bgn + 8 < end) {
     mid = bgn + (end - bgn) / 2;
 
@@ -589,26 +498,23 @@ merylExactLookup::exists_test(kmer k) {
   }
 
   fprintf(stderr, "\n");
-  fprintf(stderr, "FAILED kmer   0x%s\n", toHex(kmer));
-  fprintf(stderr, "FAILED prefix 0x%s\n", toHex(prefix));
-  fprintf(stderr, "FAILED suffix 0x%s\n", toHex(suffix));
+  fprintf(stderr, "FAILED kmer   0x%016lx\n", kmer);
+  fprintf(stderr, "FAILED prefix 0x%016lx\n", prefix);
+  fprintf(stderr, "FAILED suffix 0x%016lx\n", suffix);
   fprintf(stderr, "\n");
-  fprintf(stderr, "original  %9lu %9lu\n", _suffixBgn[prefix], _suffixEnd[prefix]);
+  fprintf(stderr, "original  %9lu %9lu\n", _suffixBgn[prefix], _suffixBgn[prefix + 1]);
   fprintf(stderr, "final     %9lu %9lu\n", bgn, end);
   fprintf(stderr, "\n");
 
   bgn = _suffixBgn[prefix];
-  end = _suffixEnd[prefix];
-
-  fprintf(stderr, "BINARY SEARCH the bucket %lu-%lu for suffix %s.\n", bgn, end, toHex(suffix));
+  end = _suffixBgn[prefix + 1];
 
   while (bgn + 8 < end) {
     mid = bgn + (end - bgn) / 2;
 
     tag = _sufData->get(mid);
 
-    fprintf(stderr, "TEST bgn %8lu %8lu %8lu end -- dat %s =?= %s suffix\n",
-            bgn, mid, end, toHex(tag), toHex(suffix));
+    fprintf(stderr, "TEST bgn %8lu %8lu %8lu end -- dat %lu =?= %lu suffix\n", bgn, mid, end, tag, suffix);
 
     if (tag == suffix)
       return(true);
@@ -620,35 +526,10 @@ merylExactLookup::exists_test(kmer k) {
       bgn = mid + 1;
   }
 
-  //  Exhaustively search the bucket.
-
-  fprintf(stderr, "LINEAR SEARCH the bucket %lu-%lu for suffix %s.\n", bgn, end, toHex(suffix));
-
-  for (mid=bgn; mid < end; mid++) {
-    tag = _sufData->get(mid);
-
-    fprintf(stderr, "ITER bgn %8lu %8lu %8lu end -- dat %s\n",
-            bgn, mid, end, toHex(tag));
-
-    if (tag == suffix)
-      return(true);
-  }
-
-  //  Exhaustively search all buckets.
-  //
-  //  THIS IS WRONG - it needs to skip the empty buckets in the middle, so needs to
-  //  iterate over each suffixBgn/suffixEnd pair individually.
-
-  bgn = _suffixBgn[0];
-  end = _suffixEnd[_nPrefix - 1];
-
-  fprintf(stderr, "LINEAR SEARCH the entire table %lu-%lu for suffix %s.\n", bgn, end, toHex(suffix));
-
   for (mid=bgn; mid < end; mid++) {
     tag = _sufData->get(mid);
 
-    fprintf(stderr, "ITER bgn %8lu %8lu %8lu end -- dat %s\n",
-            bgn, mid, end, toHex(tag));
+    fprintf(stderr, "ITER bgn %8lu %8lu %8lu end -- dat %lu =?= %lu suffix\n", bgn, mid, end, tag, suffix);
 
     if (tag == suffix)
       return(true);
diff --git a/ext/meryl/src/utility/src/utility/kmers-files.C b/ext/meryl/src/utility/src/utility/kmers-files.C
index 8814a59..627fef5 100644
--- a/ext/meryl/src/utility/src/utility/kmers-files.C
+++ b/ext/meryl/src/utility/src/utility/kmers-files.C
@@ -119,7 +119,10 @@ merylFileBlockReader::decodeBlock(void) {
   if (_data == NULL)
     return;
 
-  resizeArrayPair(_suffixes, _values, 0, _nKmersMax, _nKmers, _raAct::doNothing);
+  //fprintf(stderr, "decodeBlock() nKmersMax %lu nKmers %lu\n", _nKmersMax, _nKmers);
+
+  resizeArrayPair(_suffixes, _values, 0, _nKmersMax, _nKmers, resizeArray_doNothing);
+
   decodeBlock(_suffixes, _values);
 }
 
diff --git a/ext/meryl/src/utility/src/utility/kmers-histogram.C b/ext/meryl/src/utility/src/utility/kmers-histogram.C
index 232807e..8e4b267 100644
--- a/ext/meryl/src/utility/src/utility/kmers-histogram.C
+++ b/ext/meryl/src/utility/src/utility/kmers-histogram.C
@@ -95,7 +95,7 @@ merylHistogram::dump(stuffedBits *bits) {
     }
   }
 
-  for (auto it=_histBig.begin(); it != _histBig.end(); it++) {
+  for (map<uint64,uint64>::iterator it=_histBig.begin(); it != _histBig.end(); it++) {
     bits->setBinary(64, it->first);      //  Value
     bits->setBinary(64, it->second);     //  Number of occurrences
   }
diff --git a/ext/meryl/src/utility/src/utility/kmers-histogram.H b/ext/meryl/src/utility/src/utility/kmers-histogram.H
index 921528a..4816f7c 100644
--- a/ext/meryl/src/utility/src/utility/kmers-histogram.H
+++ b/ext/meryl/src/utility/src/utility/kmers-histogram.H
@@ -24,8 +24,11 @@
 #error "include kmers.H, not this."
 #endif
 
+
 #include <map>
 
+using namespace std;
+
 
 //  Stores a histogram of kmer count values.
 
@@ -71,17 +74,17 @@ public:
   uint64    histogramOccurrences(uint32 i)        { return(_histOs[i]);   };
 
 private:
-  uint64                   _numUnique;
-  uint64                   _numDistinct;
-  uint64                   _numTotal;
+  uint64              _numUnique;
+  uint64              _numDistinct;
+  uint64              _numTotal;
 
-  uint32                   _histMax;    //  Max value that can be stored in _hist.
-  uint64                  *_hist;
-  std::map<uint64, uint64> _histBig;    //  Values bigger than _histMax; <value,occurrances>
+  uint32              _histMax;    //  Max value that can be stored in _hist.
+  uint64             *_hist;
+  map<uint64, uint64> _histBig;    //  Values bigger than _histMax; <value,occurrances>
 
-  uint64                   _histLen;    //  If loaded from disk, this is the unpacked histogram.
-  uint64                  *_histVs;     //  The value this histogram entry is counting.
-  uint64                  *_histOs;     //  The number of occurrences of that value.
+  uint64              _histLen;    //  If loaded from disk, this is the unpacked histogram.
+  uint64             *_histVs;     //  The value this histogram entry is counting.
+  uint64             *_histOs;     //  The number of occurrences of that value.
 };
 
 
diff --git a/ext/meryl/src/utility/src/utility/kmers-iterator.H b/ext/meryl/src/utility/src/utility/kmers-iterator.H
index 27ba8a1..bb02f8d 100644
--- a/ext/meryl/src/utility/src/utility/kmers-iterator.H
+++ b/ext/meryl/src/utility/src/utility/kmers-iterator.H
@@ -36,7 +36,7 @@ public:
     addSequence(NULL, 0);
   };
   kmerIterator(FILE *input);
-  kmerIterator(char const *buffer, uint64 bufferLen) {
+  kmerIterator(char *buffer, uint64 bufferLen) {
     assert(kmer::merSize() > 0);
     reset();
     addSequence(buffer, bufferLen);
@@ -48,7 +48,7 @@ public:
     _kmerValid  = _fmer.merSize() - 1;
   };
 
-  void       addSequence(char const *buffer, uint64 bufferLen) {
+  void       addSequence(char *buffer, uint64 bufferLen) {
     _buffer    = buffer;
     _bufferLen = bufferLen;
     _bufferPos = 0;
@@ -163,16 +163,16 @@ public:
   uint64     endPosition(void)  { return(_bufferPos);                };
 
 private:
-  uint32       _kmerSize;
-  uint32       _kmerLoad;
-  uint32       _kmerValid;
+  uint32    _kmerSize;
+  uint32    _kmerLoad;
+  uint32    _kmerValid;
 
-  char const  *_buffer;
-  uint64       _bufferLen;
-  uint64       _bufferPos;
+  char     *_buffer;
+  uint64    _bufferLen;
+  uint64    _bufferPos;
 
-  kmerTiny     _fmer;
-  kmerTiny     _rmer;
+  kmerTiny  _fmer;
+  kmerTiny  _rmer;
 };
 
 
diff --git a/ext/meryl/src/utility/src/utility/kmers-lookup.H b/ext/meryl/src/utility/src/utility/kmers-lookup.H
index 3feb1f0..4ec9d76 100644
--- a/ext/meryl/src/utility/src/utility/kmers-lookup.H
+++ b/ext/meryl/src/utility/src/utility/kmers-lookup.H
@@ -24,295 +24,247 @@
 #error "include kmers.H, not this."
 #endif
 
+
 class merylExactLookup {
 public:
-  merylExactLookup() {
+  merylExactLookup(merylFileReader *input_,
+                   uint32               maxMemory_ = 0,
+                   uint64               minValue_  = 0,
+                   uint64               maxValue_  = UINT64_MAX) {
+
+    _input     = input_;
+    _maxMemory = maxMemory_;   //  maxMemory_ is In GB; _maxMemory should be in BITS!
+    _verbose   = true;
+
+    if (_maxMemory == 0)
+      _maxMemory   = getPhysicalMemorySize() * 8;
+    else
+      _maxMemory <<= 33;
+
+    initialize(minValue_, maxValue_);  //  Do NOT use minValue_ or maxValue_ from now on!
   };
+
   ~merylExactLookup() {
     delete [] _suffixBgn;
-    delete [] _suffixLen;
     delete [] _suffixEnd;
     delete    _sufData;
     delete    _valData;
   };
 
-public:
-  //  Optional.  Quickly analyze the input kmers and compute the minimum and
-  //  'optimal' memory needed for the lookup tables.
-  //
-  //  maxMemInGB is used as an upper limit on minMem and optMem.
+  //  To use this object:
+  //    lookup = new merylExactLookup(input, 0, 0, UINT32_MAX);
+  //    if (lookup->configure() == true)
+  //      lookup->load()
   //
-  void     estimateMemoryUsage(merylFileReader *input_,
-                               double           maxMemInGB_,
-                               double          &minMemInGB_,
-                               double          &optMemInGB_,
-                               kmvalu           minValue_ = 0,
-                               kmvalu           maxValue_ = kmvalumax);
-
-public:
-  //  Load a new meryl database into the lookup table.
-  //
-  //  maxMemInGB is used as an upper limit on the size of the lookup table.
-  //  The actual size used is determined from useMinimalMemory or
-  //  useOptimalMemory, as returned from estinmateMemoryUsage().
-  //
-  //  The difference between 'minimal' and 'optimal' is one of speed; lookups
-  //  with 'minimal' memory will be slower than with 'optimal' memory;
-  //  however, it isn't known how significant this is.
-  //
-  //  The return value is the actual memory used, in GB, or 0.0 if loading
-  //  failed.  (I think)
-  //
-  double   load(merylFileReader *input_,
-                double           maxMemInGB_,
-                bool             useMinimalMemory,
-                bool             useOptimalMemory,
-                kmvalu           minValue_      = 0,
-                kmvalu           maxValue_      = kmvalumax);
 
+private:
+  void     initialize(uint64 minValue_, uint64 maxValue_);
 public:
-  //  For describing what we've loaded.
-  //
-  uint64   nKmers(void)  {  return(_nKmersLoaded);  };
-
-  //  The accessors.
-  //
-  //  Return true/false if the kmer exists/does not.
-  //  Return true/false if the kmer exists/does not, and populate 'value' with the value.
-  //  Return the value of the kmer, or zero if it doesn't exist.
-  //
-  bool     exists(kmer k);
-  bool     exists(kmer k, kmvalu &value);
-  kmvalu   value(kmer k);
-
-  //  For testing the implementation.
-  //
-  bool     exists_test(kmer k);
-
+  bool     configure(void);
 private:
-  //  Used internally for construction.  As tempting is it seems to call
-  //  initialize() or configure() directly, you can't.
-  //
-  void     initialize(merylFileReader *input_, kmvalu minValue_, kmvalu maxValue_);
-  void     configure(double  memInGB,
-                     double &memInGBmin,
-                     double &memInGBmax,
-                     bool    useMinimalMemory,
-                     bool    useOptimalMemory,
-                     bool    reportMemory,
-                     bool    reportSizes);
   void     count(void);
-  double   allocate(void);
+  void     allocate(void);
+public:
   void     load(void);
 
-  kmvalu   value_value(kmvalu value);
-
 private:
-  merylFileReader  *_input         = nullptr;
+  uint64           value_value(uint64 value) {
+    if (_valueBits == 0)               //  Return 'true' if no value
+      return(1);                       //  is stored.
 
-  uint64            _maxMemory     = 0;
-  bool              _verbose       = true;
+    value &= uint64MASK(_valueBits);
 
-  kmvalu            _minValue      = 0;    //  Minimum value stored in the table -| both of these filter the
-  kmvalu            _maxValue      = 0;    //  Maximum value stored in the table -| input kmers.
-  kmvalu            _valueOffset   = 0;    //  Offset of values stored in the table.
+    //if (value == 0)                    //  Return zero if the value
+    //  return(0);                       //  is actually zero.
 
-  uint64            _nKmersLoaded  = 0;
-  uint64            _nKmersTooLow  = 0;
-  uint64            _nKmersTooHigh = 0;
+    return(value + _valueOffset);      //  Otherwise, return the value.
+  };
 
-  uint32            _Kbits;
+public:
+  uint64           nKmers(void)  {  return(_nKmersLoaded);  };
 
-  uint32            _prefixBits    = 0;    //  How many high-end bits of the kmer is an index into _suffixBgn.
-  uint32            _suffixBits    = 0;    //  How many bits of the kmer are in the suffix table.
-  uint32            _valueBits     = 0;    //  How many bits of the suffix entry are data.
 
-  kmdata            _suffixMask    = 0;
+  //  Return true/false if the kmer exists/does not.
+  bool             exists(kmer k) {
+    kmdata  kmer   = (kmdata)k;
+    uint64  prefix = kmer >> _suffixBits;
+    kmdata  suffix = kmer  & _suffixMask;
 
-  uint64            _nPrefix       = 0;    //  How many entries in _suffixBgn  == 2 ^ _prefixBits.
-  uint64            _nSuffix       = 0;    //  How many entries in _suffixData == nDistinct in the input database.
+    uint64  bgn = _suffixBgn[prefix];
+    uint64  mid;
+    uint64  end = _suffixBgn[prefix + 1];
 
-  uint32            _prePtrBits    = 0;    //  How many bits wide is _suffixBgn (used only if _suffixBgn is a wordArray).
+    kmdata  tag;
 
-  uint64           *_suffixBgn = nullptr;  //  The start of a block of data in suffix Data.
-  uint64           *_suffixLen = nullptr;  //  The number of kmers to load in each block.
-  uint64           *_suffixEnd = nullptr;  //  The end of a block.  (NOTE: bgn + len != end)
-  wordArray        *_sufData   = nullptr;  //  Finally, kmer suffix data!
-  wordArray        *_valData   = nullptr;  //  Finally, value data!
-};
+    //  Binary search for the matching tag.
 
+    while (bgn + 8 < end) {
+      mid = bgn + (end - bgn) / 2;
 
+      tag = _sufData->get(mid);
 
+      if (tag == suffix)
+        return(true);
 
+      if (suffix < tag)
+        end = mid;
 
+      else
+        bgn = mid + 1;
+    }
 
-inline
-kmvalu
-merylExactLookup::value_value(kmvalu value) {
-  if (_valueBits == 0)               //  Return 'true' if no value
-    return(1);                       //  is stored.
+    //  Switch to linear search when we're down to just a few candidates.
 
-  value &= buildLowBitMask<kmvalu>(_valueBits);
+    for (mid=bgn; mid < end; mid++) {
+      tag = _sufData->get(mid);
 
-  //if (value == 0)                    //  Return zero if the value
-  //  return(0);                       //  is actually zero.
+      if (tag == suffix)
+        return(true);
+    }
 
-  return(value + _valueOffset);      //  Otherwise, return the value.
-};
+    return(false);
+  }
 
 
+  //  Return true/false if the kmer exists/does not.
+  //  And populate 'value' with the value of the kmer.
+  bool             exists(kmer k, uint64 &value) {
+    kmdata  kmer   = (kmdata)k;
+    uint64  prefix = kmer >> _suffixBits;
+    kmdata  suffix = kmer  & _suffixMask;
 
-//  Return true/false if the kmer exists/does not.
-inline
-bool
-merylExactLookup::exists(kmer k) {
-  kmdata  kmer   = (kmdata)k;
-  uint64  prefix = kmer >> _suffixBits;
-  kmdata  suffix = kmer  & _suffixMask;
+    uint64  bgn = _suffixBgn[prefix];
+    uint64  mid;
+    uint64  end = _suffixBgn[prefix + 1];
 
-  uint64  bgn = _suffixBgn[prefix];
-  uint64  mid;
-  uint64  end = _suffixEnd[prefix];
+    kmdata  tag;
 
-  kmdata  tag;
+    //  Binary search for the matching tag.
 
-  //  Binary search for the matching tag.
+    while (bgn + 8 < end) {
+      mid = bgn + (end - bgn) / 2;
 
-  while (bgn + 8 < end) {
-    mid = bgn + (end - bgn) / 2;
+      tag = _sufData->get(mid);
 
-    tag = _sufData->get(mid);
+      if (tag == suffix) {
+        if (_valueBits == 0)
+          value = 1;
+        else
+          value = _valData->get(mid);
+        return(true);
+      }
 
-    if (tag == suffix)
-      return(true);
+      if (suffix < tag)
+        end = mid;
 
-    if (suffix < tag)
-      end = mid;
+      else
+        bgn = mid + 1;
+    }
 
-    else
-      bgn = mid + 1;
-  }
+    //  Switch to linear search when we're down to just a few candidates.
 
-  //  Switch to linear search when we're down to just a few candidates.
+    for (mid=bgn; mid < end; mid++) {
+      tag = _sufData->get(mid);
 
-  for (mid=bgn; mid < end; mid++) {
-    tag = _sufData->get(mid);
+      if (tag == suffix) {
+        if (_valueBits == 0)
+          value = 1;
+        else
+          value = _valData->get(mid);
+        return(true);
+      }
+    }
 
-    if (tag == suffix)
-      return(true);
+    value = 0;
+    return(false);
   }
 
-  return(false);
-}
 
+  //  Returns the value of the kmer, '0' if it doesn't exist.
+  uint64           value(kmer k) {
+    kmdata  kmer   = (kmdata)k;
+    uint64  prefix = kmer >> _suffixBits;
+    kmdata  suffix = kmer  & _suffixMask;
 
+    uint64  bgn = _suffixBgn[prefix];
+    uint64  mid;
+    uint64  end = _suffixBgn[prefix + 1];
 
-//  Return true/false if the kmer exists/does not.
-//  And populate 'value' with the value of the kmer.
-inline
-bool
-merylExactLookup::exists(kmer k, kmvalu &value) {
-  kmdata  kmer   = (kmdata)k;
-  kmdata  prefix = kmer >> _suffixBits;
-  kmdata  suffix = kmer  & _suffixMask;
+    kmdata  tag;
 
-  uint64  bgn = _suffixBgn[prefix];
-  uint64  mid;
-  uint64  end = _suffixEnd[prefix];
+    //  Binary search for the matching tag.
 
-  kmdata  tag;
+    while (bgn + 8 < end) {
+      mid = bgn + (end - bgn) / 2;
 
-  //  Binary search for the matching tag.
+      tag = _sufData->get(mid);
 
-  while (bgn + 8 < end) {
-    mid = bgn + (end - bgn) / 2;
+      if (tag == suffix) {
+        if (_valueBits == 0)
+          return(1);
+        else
+          return(_valData->get(mid));
+      }
 
-    tag = _sufData->get(mid);
+      if (suffix < tag)
+        end = mid;
 
-    if (tag == suffix) {
-      if (_valueBits == 0)
-        value = 1;
       else
-        value = _valData->get(mid);
-      return(true);
+        bgn = mid + 1;
     }
 
-    if (suffix < tag)
-      end = mid;
-
-    else
-      bgn = mid + 1;
-  }
+    //  Switch to linear search when we're down to just a few candidates.
 
-  //  Switch to linear search when we're down to just a few candidates.
+    for (mid=bgn; mid < end; mid++) {
+      tag = _sufData->get(mid);
 
-  for (mid=bgn; mid < end; mid++) {
-    tag = _sufData->get(mid);
-
-    if (tag == suffix) {
-      if (_valueBits == 0)
-        value = 1;
-      else
-        value = _valData->get(mid);
-      return(true);
+      if (tag == suffix) {
+        if (_valueBits == 0)
+          return(1);
+        else
+          return(_valData->get(mid));
+      }
     }
-  }
-
-  value = 0;
-  return(false);
-}
 
+    return(0);
+  };
 
-//  Returns the value of the kmer, '0' if it doesn't exist.
-inline
-kmvalu
-merylExactLookup::value(kmer k) {
-  kmdata  kmer   = (kmdata)k;
-  kmdata  prefix = kmer >> _suffixBits;
-  kmdata  suffix = kmer  & _suffixMask;
 
-  uint64  bgn = _suffixBgn[prefix];
-  uint64  mid;
-  uint64  end = _suffixEnd[prefix];
+  bool             exists_test(kmer k);
 
-  kmdata  tag;
 
-  //  Binary search for the matching tag.
+private:
+  merylFileReader  *_input;
 
-  while (bgn + 8 < end) {
-    mid = bgn + (end - bgn) / 2;
+  uint64                _maxMemory;
+  bool                  _verbose;
 
-    tag = _sufData->get(mid);
+  uint64                _minValue;    //  Minimum value stored in the table -| both of these filter the
+  uint64                _maxValue;    //  Maximum value stored in the table -| input kmers.
+  uint64                _valueOffset; //  Offset of values stored in the table.
 
-    if (tag == suffix) {
-      if (_valueBits == 0)
-        return(1);
-      else
-        return(_valData->get(mid));
-    }
+  uint64                _nKmersLoaded;
+  uint64                _nKmersTooLow;
+  uint64                _nKmersTooHigh;
 
-    if (suffix < tag)
-      end = mid;
+  uint32                _Kbits;
 
-    else
-      bgn = mid + 1;
-  }
+  uint32                _prefixBits;  //  How many high-end bits of the kmer is an index into _suffixBgn.
+  uint32                _suffixBits;  //  How many bits of the kmer are in the suffix table.
+  uint32                _valueBits;   //  How many bits of the suffix entry are data.
 
-  //  Switch to linear search when we're down to just a few candidates.
+  kmdata                _suffixMask;
+  uint64                _dataMask;
 
-  for (mid=bgn; mid < end; mid++) {
-    tag = _sufData->get(mid);
+  uint64                _nPrefix;     //  How many entries in _suffixBgn  == 2 ^ _prefixBits.
+  uint64                _nSuffix;     //  How many entries in _suffixData == nDistinct in the input database.
 
-    if (tag == suffix) {
-      if (_valueBits == 0)
-        return(1);
-      else
-        return(_valData->get(mid));
-    }
-  }
+  uint32                _prePtrBits;  //  How many bits wide is _suffixBgn (used only if _suffixBgn is a wordArray).
 
-  return(0);
+  uint64               *_suffixBgn;   //  The start of a block of data in suffix Data.  The end is the next start.
+  uint64               *_suffixEnd;   //  The end.  Temporary.
+  wordArray            *_sufData;     //  Finally, kmer suffix data!
+  wordArray            *_valData;     //  Finally, value data!
 };
 
-
 #endif  //  MERYL_UTIL_KMER_LOOKUP_H
diff --git a/ext/meryl/src/utility/src/utility/kmers-reader.C b/ext/meryl/src/utility/src/utility/kmers-reader.C
index d20b017..3aa4239 100644
--- a/ext/meryl/src/utility/src/utility/kmers-reader.C
+++ b/ext/meryl/src/utility/src/utility/kmers-reader.C
@@ -495,7 +495,7 @@ merylFileReader::nextMer(void) {
 
   //  Make sure we have space for the decoded data
 
-  resizeArrayPair(_suffixes, _values, 0, _nKmersMax, _nKmers, _raAct::doNothing);
+  resizeArrayPair(_suffixes, _values, 0, _nKmersMax, _nKmers, resizeArray_doNothing);
 
   //  Decode the block into _OUR_ space.
   //
diff --git a/ext/meryl/src/utility/src/utility/kmers-tiny.H b/ext/meryl/src/utility/src/utility/kmers-tiny.H
index 9e2b5c2..c8d9373 100644
--- a/ext/meryl/src/utility/src/utility/kmers-tiny.H
+++ b/ext/meryl/src/utility/src/utility/kmers-tiny.H
@@ -28,12 +28,9 @@
 
 typedef uint128    kmdata;   //  128 bits of kmer data
 typedef uint32     kmpref;   //   32 bits of kmer prefix == 6 bits file prefix, 6 (default) suffix prefix
-typedef uint32     kmvalu;   //   32 bits of kmer value
+typedef uint32     kmvalu;   //   64 bits of kmer count
 typedef uint64     kmcolo;   //   64 bits of kmer color
 
-constexpr kmvalu   kmvalumax = uint32max;
-constexpr kmcolo   kmcolomax = uint64max;
-
 
 class  kmerTiny {
 public:
@@ -70,15 +67,6 @@ public:
   //  to make space for the new base.  Unlike the 'standard' two-bit encoding,
   //  these encode bases as A=00, C=01, G=11, T=10.
   //
-  //       +---------+-- upper/lower case bit
-  //       |         |
-  //    A 1000001 a 1100001 == 00
-  //    C 1000011 c 1100011 == 01
-  //    G 1000111 g 1100111 == 11
-  //    T 1010100 t 1110100 == 10
-  //                    ||
-  //                    ++-- bits used for 2-bit encoding
-  //
   void        addR(kmdata base)       { _mer  = (((_mer << 2) & _fullMask) | (((base >> 1) & 0x03llu)          )              );  };
   void        addL(kmdata base)       { _mer  = (((_mer >> 2) & _leftMask) | (((base >> 1) & 0x03llu) ^ 0x02llu) << _leftShift);  };
 
@@ -90,16 +78,16 @@ public:
 
     //  Complement the bases
 
-    mer ^= build_uint128(0xaaaaaaaaaaaaaaaallu, 0xaaaaaaaaaaaaaaaallu);
+    mer ^= uint128NUMBER(0xaaaaaaaaaaaaaaaallu, 0xaaaaaaaaaaaaaaaallu);
 
     //  Reverse the mer
 
-    mer = ((mer >>  2) & build_uint128(0x3333333333333333llu, 0x3333333333333333llu)) | ((mer <<  2) & build_uint128(0xccccccccccccccccllu, 0xccccccccccccccccllu));
-    mer = ((mer >>  4) & build_uint128(0x0f0f0f0f0f0f0f0fllu, 0x0f0f0f0f0f0f0f0fllu)) | ((mer <<  4) & build_uint128(0xf0f0f0f0f0f0f0f0llu, 0xf0f0f0f0f0f0f0f0llu));
-    mer = ((mer >>  8) & build_uint128(0x00ff00ff00ff00ffllu, 0x00ff00ff00ff00ffllu)) | ((mer <<  8) & build_uint128(0xff00ff00ff00ff00llu, 0xff00ff00ff00ff00llu));
-    mer = ((mer >> 16) & build_uint128(0x0000ffff0000ffffllu, 0x0000ffff0000ffffllu)) | ((mer << 16) & build_uint128(0xffff0000ffff0000llu, 0xffff0000ffff0000llu));
-    mer = ((mer >> 32) & build_uint128(0x00000000ffffffffllu, 0x00000000ffffffffllu)) | ((mer << 32) & build_uint128(0xffffffff00000000llu, 0xffffffff00000000llu));
-    mer = ((mer >> 64) & build_uint128(0x0000000000000000llu, 0xffffffffffffffffllu)) | ((mer << 64) & build_uint128(0xffffffffffffffffllu, 0x0000000000000000llu));
+    mer = ((mer >>  2) & uint128NUMBER(0x3333333333333333llu, 0x3333333333333333llu)) | ((mer <<  2) & uint128NUMBER(0xccccccccccccccccllu, 0xccccccccccccccccllu));
+    mer = ((mer >>  4) & uint128NUMBER(0x0f0f0f0f0f0f0f0fllu, 0x0f0f0f0f0f0f0f0fllu)) | ((mer <<  4) & uint128NUMBER(0xf0f0f0f0f0f0f0f0llu, 0xf0f0f0f0f0f0f0f0llu));
+    mer = ((mer >>  8) & uint128NUMBER(0x00ff00ff00ff00ffllu, 0x00ff00ff00ff00ffllu)) | ((mer <<  8) & uint128NUMBER(0xff00ff00ff00ff00llu, 0xff00ff00ff00ff00llu));
+    mer = ((mer >> 16) & uint128NUMBER(0x0000ffff0000ffffllu, 0x0000ffff0000ffffllu)) | ((mer << 16) & uint128NUMBER(0xffff0000ffff0000llu, 0xffff0000ffff0000llu));
+    mer = ((mer >> 32) & uint128NUMBER(0x00000000ffffffffllu, 0x00000000ffffffffllu)) | ((mer << 32) & uint128NUMBER(0xffffffff00000000llu, 0xffffffff00000000llu));
+    mer = ((mer >> 64) & uint128NUMBER(0x0000000000000000llu, 0xffffffffffffffffllu)) | ((mer << 64) & uint128NUMBER(0xffffffffffffffffllu, 0x0000000000000000llu));
 
     //  Shift and mask out the bases not in the mer
 
@@ -150,7 +138,7 @@ public:
     kmdata  mask = _mer;
 
     mask >>= 1;
-    mask  &= build_uint128(0x5555555555555555llu, 0x5555555555555555llu);
+    mask  &= uint128NUMBER(0x5555555555555555llu, 0x5555555555555555llu);
 
     fmer ^= mask;      //  Convert from ACTG ordering to ACGT ordering.
     rmer ^= mask;
@@ -167,12 +155,10 @@ public:
     return(_mer);
   };
 
-  operator uint64 () const = delete;   //  Explicitly fail of someone tries to convert us to an integer
-  operator  int64 () const = delete;   //  instead of to a kmdata.  Without these, a cast to, say, uint64
-  operator uint32 () const = delete;   //  would be first convert to kmdata (uint128) then down to uint64.
-  operator  int32 () const = delete;   //  With these, you'll either get a compile-time error (because
-  operator uint16 () const = delete;   //  these are private) or link time error (because they're not
-  operator  int16 () const = delete;   //  defined.
+  operator uint64 () const {
+    assert(0);
+    return(_mer);
+  };
 
   void     setPrefixSuffix(kmpref prefix, kmdata suffix, uint32 width) {
     _mer   = prefix;
diff --git a/ext/meryl/src/utility/src/utility/kmers-writer-block.C b/ext/meryl/src/utility/src/utility/kmers-writer-block.C
index 4ca5535..b8a2708 100644
--- a/ext/meryl/src/utility/src/utility/kmers-writer-block.C
+++ b/ext/meryl/src/utility/src/utility/kmers-writer-block.C
@@ -24,7 +24,7 @@ merylBlockWriter::merylBlockWriter(merylFileWriter *writer) {
 
   _writer = writer;
 
-  strncpy(_outName, _writer->_outName, FILENAME_MAX+1);
+  strncpy(_outName, _writer->_outName, FILENAME_MAX);
 
   //  Encoding data
 
@@ -284,7 +284,7 @@ merylBlockWriter::mergeBatches(uint32 oi) {
 
     //  Setup the merge.
 
-    resizeArrayPair(suffixes, values, 0, nKmersMax, totnKmers);
+    resizeArrayPair(suffixes, values, 0, nKmersMax, totnKmers, resizeArray_doNothing);
 
     //  Merge!  We don't know the number of different kmers in the input, and are forced
     //  to loop infinitely.
diff --git a/ext/meryl/src/utility/src/utility/kmers-writer-stream.C b/ext/meryl/src/utility/src/utility/kmers-writer-stream.C
index 7c9fb1f..c9055df 100644
--- a/ext/meryl/src/utility/src/utility/kmers-writer-stream.C
+++ b/ext/meryl/src/utility/src/utility/kmers-writer-stream.C
@@ -24,7 +24,7 @@ merylStreamWriter::merylStreamWriter(merylFileWriter *writer, uint32 fileNumber)
 
   _writer = writer;
 
-  strncpy(_outName, _writer->_outName, FILENAME_MAX+1);
+  strncpy(_outName, _writer->_outName, FILENAME_MAX);
 
   //  Encoding data
 
diff --git a/ext/meryl/src/utility/src/utility/kmers-writer.C b/ext/meryl/src/utility/src/utility/kmers-writer.C
index 89a660c..c0ca7af 100644
--- a/ext/meryl/src/utility/src/utility/kmers-writer.C
+++ b/ext/meryl/src/utility/src/utility/kmers-writer.C
@@ -23,14 +23,11 @@
 void
 merylFileWriter::initialize(uint32 prefixSize, bool isMultiSet) {
 
-  //  Fail if we're already initialized and asked to change the prefix size.
-  //  But just ignore the re-init request if the prefix size is the same.
-
   if ((_initialized == true) &&
       (prefixSize != _prefixSize))
     fprintf(stderr, "merylFileWriter::initialize()-- asked to initialize with different prefixSize (new %u existing %u).\n", prefixSize, _prefixSize), exit(1);
 
-  if (_initialized == true)
+  if (_initialized == true)    //  Nothing to do if we're already done.
     return;
 
   //  If the global mersize isn't set, we're hosed.
@@ -57,12 +54,12 @@ merylFileWriter::initialize(uint32 prefixSize, bool isMultiSet) {
       _prefixSize = 12;  //max((uint32)8, 2 * kmer::merSize() / 3);
 
     _suffixSize         = 2 * kmer::merSize() - _prefixSize;
-    _suffixMask         = buildLowBitMask<kmdata>(_suffixSize);
+    _suffixMask         = uint64MASK(_suffixSize);
 
     //  Decide how many files to write.  We can make up to 2^32 files, but will
     //  run out of file handles _well_ before that.  For now, limit to 2^6 = 64 files.
 
-    _numFilesBits       = 6;
+    _numFilesBits       = 6;  //(_prefixSize < 7) ? _prefixSize : 6;
     _numBlocksBits      = _prefixSize - _numFilesBits;
 
     _numFiles           = (uint64)1 << _numFilesBits;
@@ -276,7 +273,7 @@ merylFileWriter::writeBlockToFile(FILE            *datFile,
 
   //  Save the index entry.
 
-  uint64  block = blockPrefix & buildLowBitMask<uint64>(_numBlocksBits);
+  uint64  block = blockPrefix & uint64MASK(_numBlocksBits);
 
   datFileIndex[block].set(blockPrefix, datFile, nKmers);
 
diff --git a/ext/meryl/src/utility/src/utility/kmers-writer.H b/ext/meryl/src/utility/src/utility/kmers-writer.H
index 35e1038..61df83a 100644
--- a/ext/meryl/src/utility/src/utility/kmers-writer.H
+++ b/ext/meryl/src/utility/src/utility/kmers-writer.H
@@ -73,7 +73,7 @@ private:
   uint32                     _prefixSize;
 
   uint32                     _suffixSize;
-  kmdata                     _suffixMask;
+  uint64                     _suffixMask;
 
   uint32                     _numFilesBits;
   uint32                     _numBlocksBits;
diff --git a/ext/meryl/src/utility/src/utility/logging.C b/ext/meryl/src/utility/src/utility/logging.C
index c5cacc9..60638fa 100644
--- a/ext/meryl/src/utility/src/utility/logging.C
+++ b/ext/meryl/src/utility/src/utility/logging.C
@@ -99,9 +99,11 @@ public:
 
     _part       = 0;
 
-    _length     = 512 * 1024 * 1024;   //  Forces a rotate() on the first write.
+    _length     = 0;
     _lengthMax  = 512 * 1024 * 1024;
 
+    _bufferSize = bufferSize;   //  Forces a rotate() on the first write.
+
     _output     = NULL;
   };
 
@@ -146,10 +148,6 @@ public:
 
     _part++;
 
-    if (_prefix[0] == 0)
-      fprintf(stderr, "_prefix not set for thread %d\n", _threadID);
-    assert(_prefix[0] != 0);
-
     if (_threadID < UINT32_MAX) {
       snprintf(_filePrefix, FILENAME_MAX, "%s.%03u.%s",         _prefix, _order, _name);
       snprintf(_fileName,   FILENAME_MAX, "%s.%03u.%s.thr%03d", _prefix, _order, _name, _threadID);
@@ -216,6 +214,8 @@ private:
 
   uint32        _part;
 
+  uint32        _bufferSize;
+
   writeBuffer  *_output;
   uint64        _length;
   uint64        _lengthMax;
@@ -230,18 +230,10 @@ private:
 
 logFile::logFile(char const *prefix, uint64 maxSize) {
 
-  _threadMax = 1024;
-  _threadNum = omp_get_max_threads();
-
-  _maxSize   = maxSize;
-
-  _mainI     = new logFileInstance(prefix, UINT32_MAX, maxSize);
-  _threadI   = new logFileInstance * [_threadMax];
+  _mainI   = new logFileInstance(prefix, UINT32_MAX, maxSize);
+  _threadI = new logFileInstance * [omp_get_max_threads()];
 
-  for (uint32 ii=0; ii<_threadMax; ii++)
-    _threadI[ii] = nullptr;
-
-  for (uint32 ii=0; ii<_threadNum; ii++)
+  for (uint32 ii=0; ii<omp_get_max_threads(); ii++)
     _threadI[ii] = new logFileInstance(prefix, ii, maxSize);
 
   _levelsLen = 0;
@@ -254,9 +246,11 @@ logFile::logFile(char const *prefix, uint64 maxSize) {
 
 logFile::~logFile() {
 
+  fprintf(stderr, "~logFile\n");
+
   delete    _mainI;
 
-  for (uint32 ii=0; ii<_threadMax; ii++)
+  for (uint32 ii=0; ii<omp_get_max_threads(); ii++)
     delete _threadI[ii];
   delete [] _threadI;
 
@@ -271,10 +265,8 @@ logFile::setPrefix(char const *prefix) {
 
   _mainI->setPrefix(prefix);
 
-  for (uint32 ii=0; ii<_threadMax; ii++) {
-    if (_threadI[ii])
-      _threadI[ii]->setPrefix(prefix);
-  }
+  for (uint32 ii=0; ii<omp_get_max_threads(); ii++)
+    _threadI[ii]->setPrefix(prefix);
 }
 
 
@@ -296,9 +288,8 @@ logFile::setName(char const *name) {
 
   _mainI->setName(name);
 
-  for (uint32 ii=0; ii<_threadMax; ii++)
-    if (_threadI[ii])
-      _threadI[ii]->setName(name);
+  for (uint32 ii=0; ii<omp_get_max_threads(); ii++)
+    _threadI[ii]->setName(name);
 }
 
 
@@ -307,9 +298,8 @@ logFile::setMaxSize(uint64 size) {
 
   _mainI->setMaxSize(size);
 
-  for (uint32 ii=0; ii<_threadMax; ii++)
-    if (_threadI[ii])
-      _threadI[ii]->setMaxSize(size);
+  for (uint32 ii=0; ii<omp_get_max_threads(); ii++)
+    _threadI[ii]->setMaxSize(size);
 }
 
 
@@ -373,7 +363,7 @@ logFile::enable(char const *optionString, char const *levelName) {
     optionString++;
   }
 
-  while ((*optionString != 0)) {
+  while ((*optionString != 0) && (*optionString == '-')) {
     verbosity++;
     optionString++;
   }
@@ -477,35 +467,12 @@ logFile::writeStatus(char const *fmt, va_list ap) {
 
 void
 logFile::writeLog(char const *fmt, va_list ap) {
-  int32   nt = omp_get_num_threads();   //  Number of threads currently active
-  int32   tn = omp_get_thread_num();    //  ID of this thread
+  int32             nt = omp_get_num_threads();
+  int32             tn = omp_get_thread_num();
 
-  //  If tn is more than we have space for we need to allocate a new
-  //  _threadI array.  But this is hard.  So just blow up.
+  logFileInstance  *lf = (nt == 1) ? (_mainI) : (_threadI[tn]);
 
-  if (tn >= _threadMax) {
-    fprintf(stderr, "TOO MANY THREADS!\n");
-    assert(0);
-  }
-
-  //  If we're only running a single thread, or we have already allocated an
-  //  output for this thread, we can immediately write the log.
-
-  if (nt == 1) {
-    _mainI->writeLog(fmt, ap);
-  }
-
-  else if (_threadI[tn]) {
-    _threadI[tn]->writeLog(fmt, ap);
-  }
-
-  //  Otherwise, we need to allocate a new thread output and set it up before
-  //  we can write.
-
-  else {
-    _threadI[tn] = new logFileInstance(getPrefix(), tn, _maxSize);
-    _threadI[tn]->writeLog(fmt, ap);
-  }
+  lf->writeLog(fmt, ap);
 }
 
 
@@ -619,8 +586,7 @@ void
 logFile::flush(void) {
   _mainI->flush();
 
-  for (uint32 ii=0; ii<_threadMax; ii++)
-    if (_threadI[ii])
-      _threadI[ii]->flush();
+  for (uint32 ii=0; ii<omp_get_max_threads(); ii++)
+    _threadI[ii]->flush();
 }
 
diff --git a/ext/meryl/src/utility/src/utility/logging.H b/ext/meryl/src/utility/src/utility/logging.H
index 16faeb6..d4a9eb3 100644
--- a/ext/meryl/src/utility/src/utility/logging.H
+++ b/ext/meryl/src/utility/src/utility/logging.H
@@ -152,11 +152,6 @@ public:
   void        flush(void);
 
 private:
-  uint32                       _threadMax;   //  How many threads we can allocate.
-  uint32                       _threadNum;   //  How many threads we have configured.
-
-  uint64                       _maxSize;
-
   logFileInstance             *_mainI;
   logFileInstance            **_threadI;
 
diff --git a/ext/meryl/src/utility/src/utility/mt19937ar.C b/ext/meryl/src/utility/src/utility/mt19937ar.C
index 70b1728..991f053 100644
--- a/ext/meryl/src/utility/src/utility/mt19937ar.C
+++ b/ext/meryl/src/utility/src/utility/mt19937ar.C
@@ -68,7 +68,7 @@
 
 //  initialize with a single seed
 void
-mtRandom::mtSetSeed(uint32 s) {
+mtRandom::construct(uint32 s) {
 
   mt[0] = s;
 
@@ -79,7 +79,7 @@ mtRandom::mtSetSeed(uint32 s) {
   for (mti=1; mti<MT_N; mti++)
     mt[mti] = (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
 
-  mag01[0] = 0;
+  mag01[0] = uint32ZERO;
   mag01[1] = MT_MATRIX_A;
 }
 
@@ -92,7 +92,7 @@ mtRandom::mtSetSeed(uint32 s) {
 /* slight change for C++, 2004/2/26 */
 mtRandom::mtRandom(uint32 *init_key, uint32 key_length) {
 
-  mtSetSeed(19650218UL);
+  construct(19650218UL);
 
   int   i   = 1;
   int   j   = 0;
@@ -136,14 +136,14 @@ mtRandom::mtRandom32(void) {
 
     for (kk=0; kk < MT_N - MT_M; kk++) {
       y = (mt[kk] & MT_UPPER_MASK) | (mt[kk+1] & MT_LOWER_MASK);
-      mt[kk] = mt[kk + MT_M] ^ (y >> 1) ^ mag01[y & 0x00000001UL];
+      mt[kk] = mt[kk + MT_M] ^ (y >> 1) ^ mag01[y & uint32ONE];
     }
     for (; kk < MT_N-1; kk++) {
       y = (mt[kk] & MT_UPPER_MASK) | (mt[kk + 1] & MT_LOWER_MASK);
-      mt[kk] = mt[kk + (MT_M - MT_N)] ^ (y >> 1) ^ mag01[y & 0x00000001UL];
+      mt[kk] = mt[kk + (MT_M - MT_N)] ^ (y >> 1) ^ mag01[y & uint32ONE];
     }
     y = (mt[MT_N-1] & MT_UPPER_MASK) | (mt[0] & MT_LOWER_MASK);
-    mt[MT_N-1] = mt[MT_M-1] ^ (y >> 1) ^ mag01[y & 0x00000001UL];
+    mt[MT_N-1] = mt[MT_M-1] ^ (y >> 1) ^ mag01[y & uint32ONE];
 
     mti = 0;
   }
@@ -152,7 +152,7 @@ mtRandom::mtRandom32(void) {
 
   /* Tempering */
   y ^= (y >> 11);
-  y ^= (y << 7)  & 0x9d2c5680UL;
+  y ^= (y << 7) & 0x9d2c5680UL;
   y ^= (y << 15) & 0xefc60000UL;
   y ^= (y >> 18);
 
diff --git a/ext/meryl/src/utility/src/utility/mt19937ar.H b/ext/meryl/src/utility/src/utility/mt19937ar.H
index 4e46e1c..ac6ce97 100644
--- a/ext/meryl/src/utility/src/utility/mt19937ar.H
+++ b/ext/meryl/src/utility/src/utility/mt19937ar.H
@@ -40,15 +40,19 @@
   static const uint32 MT_LOWER_MASK = 0x7fffffffUL;  //  least significant r bits
 
 class mtRandom {
+private:
+  void   construct(uint32 s);
+
 public:
-  mtRandom()           { mtSetSeed(getpid() * time(NULL)); };
-  mtRandom(uint32 s)   { mtSetSeed(s);                     };
+  mtRandom()           { construct(getpid() * time(NULL)); };
+  mtRandom(uint32 s)   { construct(s);                     };
   mtRandom(uint32 *init_key, uint32 key_length);
 
-  void     mtSetSeed(uint32 s);
+  ~mtRandom() {
+  };
 
-  uint32   mtRandom32(void);
-  uint64   mtRandom64(void)   { return((((uint64)mtRandom32()) << 32) | (uint64)mtRandom32()); }
+  uint32     mtRandom32(void);
+  uint64     mtRandom64(void)   { return((((uint64)mtRandom32()) << 32) | (uint64)mtRandom32()); }
 
   //  Real valued randomness
   //    mtRandomRealOpen()    -- on [0,1) real interval
@@ -67,9 +71,9 @@ public:
 
   //  returns a random number with gaussian distribution, mean of zero and std.dev. of 1
   //
-  double   mtRandomGaussian(double mean=0.0, double stddev=1.0);
+  double  mtRandomGaussian(double mean=0.0, double stddev=1.0);
 
-  double   mtRandomExponential(double lambda, double tau=1.0);
+  double  mtRandomExponential(double lambda, double tau=1.0);
 
 private:
   uint32  mt[MT_N];  //  State vector array
diff --git a/ext/meryl/src/utility/src/utility/objectStore.C b/ext/meryl/src/utility/src/utility/objectStore.C
new file mode 100644
index 0000000..13861fb
--- /dev/null
+++ b/ext/meryl/src/utility/src/utility/objectStore.C
@@ -0,0 +1,306 @@
+
+/******************************************************************************
+ *
+ *  This file is part of meryl-utility, a collection of miscellaneous code
+ *  used by Meryl, Canu and others.
+ *
+ *  This software is based on:
+ *    'Canu' v2.0              (https://github.com/marbl/canu)
+ *  which is based on:
+ *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
+ *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
+ *
+ *  Except as indicated otherwise, this is a 'United States Government Work',
+ *  and is released in the public domain.
+ *
+ *  File 'README.licenses' in the root directory of this distribution
+ *  contains full conditions and disclaimers.
+ */
+
+#include "types.H"
+#include "arrays.H"
+#include "strings.H"
+
+#include "objectStore.H"
+
+#include <libgen.h>
+#include <sys/wait.h>
+
+
+
+extern char **environ;  //  Where, or where, is this really defined?!
+
+
+
+static
+char *
+findSeqStorePath(char *requested) {
+  splitToWords  F(requested, splitPaths);
+
+  if (F.numWords() < 2)
+    return(NULL);
+
+  char  *filename  = F.last(0);
+  char  *storename = F.last(1);
+
+  //  If not a blobs file name, return no file.
+
+  if (strlen(filename) != 10)
+    return(NULL);
+
+  if ((filename[0] != 'b') ||
+      (filename[1] != 'l') ||
+      (filename[2] != 'o') ||
+      (filename[3] != 'b') ||
+      (filename[4] != 's') ||
+      (filename[5] != '.') ||
+      (isdigit(filename[6]) == 0) ||
+      (isdigit(filename[7]) == 0) ||
+      (isdigit(filename[8]) == 0) ||
+      (isdigit(filename[9]) == 0))
+    return(NULL);
+
+  //  Now just paste the two components together in the proper
+  //  way and return it.
+
+  char  *filepath = new char [FILENAME_MAX + 1];
+
+  snprintf(filepath, FILENAME_MAX, "%s/%s", storename, filename);
+
+  return(filepath);
+}
+
+
+
+static
+char *
+findOvlStorePath(char *requested) {
+  splitToWords  F(requested, splitPaths);
+
+  if (F.numWords() < 2)
+    return(NULL);
+
+  char  *basename  = NULL;
+  char  *storename = F.last(1);
+  char  *filename  = F.last(0);
+
+  if (strlen(filename) != 9)
+    return(NULL);
+
+  //  If not an overlap store data file name, return no file.
+
+  if ((isdigit(filename[0]) == 0) ||
+      (isdigit(filename[1]) == 0) ||
+      (isdigit(filename[2]) == 0) ||
+      (isdigit(filename[3]) == 0) ||
+      (filename[4]          != '<') ||
+      (isdigit(filename[5]) == 0) ||
+      (isdigit(filename[6]) == 0) ||
+      (isdigit(filename[7]) == 0) ||
+      (filename[8]          != '>'))
+    return(NULL);
+
+  //  Get ready for some ugly string parsing.  We expect strings similar to:
+  //
+  //    requested file F -- '../asm.ovlStore/0001<000>'
+  //    current path   P -- '/path/to/assembly/correction/2-correction'
+  //
+  //  If the first component of F is '..', we drop it and the last component of P.
+  //  When there are no more '..'s at the start, we should be left with the
+  //  store name in F and the assembly stage in P.
+
+  char  *cwd = getcwd(new char [FILENAME_MAX+1], FILENAME_MAX);
+
+  splitToWords  P(cwd, splitPaths);
+
+  delete [] cwd;
+
+  uint32  nStrip = 0;
+
+  //fprintf(stderr, "FROM cwd       '%s'\n", cwd);
+  //fprintf(stderr, "     requested '%s'\n", requested);
+
+  //  Remove identity components.
+
+  while ((F.numWords() > 0) &&
+         (strcmp(F.first(), ".") == 0))
+    F.shift();
+
+  //  Remove up components.
+
+  while ((P.numWords() > 0) &&
+         (F.numWords() > 0) &&
+         (strcmp(F.first(), "..") == 0)) {
+    //fprintf(stderr, "STRIP '%s' from requested and '%s' from cwd\n", F.first(), P.last());
+
+    F.shift();
+    P.pop();
+
+    nStrip++;
+  }
+
+  //fprintf(stderr, "P.last  '%s'\n", P.last());
+  //fprintf(stderr, "F.first '%s'\n", F.first());
+
+  //  We can run in one of three different places:
+  //    1) assembly_root/correction/1-stuff  - ../asm.ovlStore/0001<001>
+  //    2) assembly_root/correction          -  ./asm.ovlStore/0001<001>
+  //    3) assembly_root                     -  ./correction/asm.ovlStore/0001<001>
+  //
+  //  In the first case, we strip off the '..' and '1-stuff', set basename
+  //  to the last component in P, the storename to the first component
+  //  in F and the file to the last component in F (which is always true).
+  //
+  //  In the second case, nothing was stripped, and the result is the same.
+  //
+  //  In the third case, again, nothing was stripped, but the basename is
+  //  now in F, not P.
+  //
+  //  All that boils down to
+
+  if      (nStrip > 0) {                 //  First case.
+    basename  = P.last();
+    storename = F.first();
+    assert(F.numWords() == 2);
+  }
+
+  else if (F.numWords() == 2) {          //  Second case.
+    basename  = P.last();                //  (same result as third case)
+    storename = F.first();
+    assert(F.numWords() == 2);
+  }
+
+  else {                                 //  Third case.
+    basename  = F.first(0);
+    storename = F.first(1);
+    assert(F.numWords() == 3);
+  }
+
+  //  We could check that the namespace -- the name of this assembly -- is before
+  //  the basename (lots of work) and that the basename is one of 'correction',
+  // 'trimming', etc.  But why?
+
+  char  *filepath = new char [FILENAME_MAX + 1];
+
+  //fprintf(stderr, "MAKE PATH STAGE     '%s'\n", basename);
+  //fprintf(stderr, "          STORENAME '%s'\n", storename);
+  //fprintf(stderr, "          FILENAME  '%s'\n", filename);
+
+  snprintf(filepath, FILENAME_MAX, "%s/%s/%s", basename, storename, filename);
+
+  return(filepath);
+}
+
+
+
+bool
+fetchFromObjectStore(char *requested) {
+
+  //  Decide if we even need to bother.  If the file exists locally, or if
+  //  one of the environment variables is missing, no, we don't need to bother.
+
+  if (fileExists(requested))
+    return(false);
+
+  char  *da = getenv("CANU_OBJECT_STORE_CLIENT_DA");
+  char  *ns = getenv("CANU_OBJECT_STORE_NAMESPACE");
+  char  *pr = getenv("CANU_OBJECT_STORE_PROJECT");
+
+  if ((da == NULL) ||
+      (ns == NULL) ||
+      (pr == NULL))
+    return(false);
+
+  //  Try to figure out the object store path for this object based on the name
+  //  of the requested file.  Paths to stores are relative, but we need them
+  //  rooted in the assembly root directory:
+  //
+  //      ../../asm.seqStore -> ./asm.seqStore
+  //      ../asm.ovlStore    -> ./correction/asm.ovlStore
+  //
+  //  For the seqStore, we can just grab the last two components.
+  //  For the ovlStore, we need to parse out the subdirectory the store is in.
+
+  char *path   = NULL;
+
+  if (path == NULL)
+    path = findSeqStorePath(requested);
+
+  if (path == NULL)
+    path = findOvlStorePath(requested);
+
+  if (path == NULL)
+    fprintf(stderr, "fetchFromObjectStore()-- requested file '%s', but don't know where that is.\n", requested), exit(1);
+
+  //  With the path to the object figured out, finish making the path by appending
+  //  the PROJEXT and NAMESPACE.
+
+  char *object = new char [FILENAME_MAX+1];
+
+  snprintf(object, FILENAME_MAX, "%s:%s/%s", pr, ns, path);
+
+  //  Then report what's going on.
+
+  fprintf(stderr, "fetchFromObjectStore()-- fetching file '%s'\n", requested);
+  fprintf(stderr, "fetchFromObjectStore()--   from object '%s'\n", object);
+
+  //  Build up a command we can execute after forking.
+
+  char *args[8];
+
+  args[0] = basename(da);
+  args[1] = duplicateString("download");        //  Thanks, execve, for wanting mutable
+  args[2] = duplicateString("--overwrite");     //  strings and making us jump through
+  args[3] = duplicateString("--no-progress");   //  a hoop to get them without compiler
+  args[4] = duplicateString("--output");        //  warnings.
+  args[5] = requested;
+  args[6] = object;
+  args[7] = NULL;
+
+  //  Fork and run the child command if we're the child.  Normally, evecve()
+  //  doesn't return (because it obliterated the process it could return to).
+  //  If it does return, an error occurred, so we just go BOOM too.  As per
+  //  the manpage, _exit() MUST be used instead of exit(), so that
+  //  stdin/out/err are left intact.
+  //
+  //  vfork() is dangerous.  If we're the child, all we're allowed to do
+  //  after the call is execve() or _exit().  Absolutely nothing else.
+
+  pid_t pid = vfork();
+
+  if (pid == 0) {
+    execve(da, args, environ);
+    fprintf(stderr, "fetchFromObjectStore()-- execve() failed with error '%s'.\n", strerror(errno));
+    _exit(127);
+  }
+
+  if (pid == -1)
+    fprintf(stderr, "fetchFromObjectStore()-- vfork() failed with error '%s'.\n", strerror(errno)), exit(1);
+
+  //  Otherwise, we're still the parent; wait for the child process to
+  //  terminate.
+
+  int   status = 0;
+  pid_t wid    = waitpid(pid, &status, 0);
+
+  if (wid == -1)
+    fprintf(stderr, "fetchFromObjectStore()-- waitpid() failed with error '%s'.\n", strerror(errno)), exit(1);
+
+  if ((WIFEXITED(status)) &&
+      (WEXITSTATUS(status) == 127))
+    fprintf(stderr, "fetchFromObjectStore()-- execve() failed to run the command.\n"), exit(1);
+
+  //  If no file, it's fatal.
+  if (fileExists(requested) == false)
+    fprintf(stderr, "fetchFromObjectStore()-- failed fetch file '%s'.\n", requested), exit(1);
+
+  delete [] args[1];
+  delete [] args[2];
+  delete [] args[3];
+  delete [] args[4];
+
+  delete [] path;
+  delete [] object;
+
+  return(true);
+}
diff --git a/ext/meryl/src/utility/src/tests/readLines.C b/ext/meryl/src/utility/src/utility/objectStore.H
similarity index 55%
rename from ext/meryl/src/utility/src/tests/readLines.C
rename to ext/meryl/src/utility/src/utility/objectStore.H
index c536e86..148a306 100644
--- a/ext/meryl/src/utility/src/tests/readLines.C
+++ b/ext/meryl/src/utility/src/utility/objectStore.H
@@ -17,30 +17,19 @@
  *  contains full conditions and disclaimers.
  */
 
+#include "types.H"
 #include "files.H"
 
-int32
-main(int32 argc, char **argv) {
-  uint32   lineMax = 0;
-  uint32   lineLen = 0;
-  char    *line    = nullptr;
-  uint32   nLines  = 0;
-
-  if (argc == 1) {
-    fprintf(stderr, "usage: %s inputFile[.gz]\n", argv[0]);
-    return(1);
-  }
-
-  compressedFileReader  *in = new compressedFileReader(argv[1]);
-
-  while (AS_UTL_readLine(line, lineLen, lineMax, in->file())) {
-    nLines++;
-  }
-
-  delete    in;
-  delete [] line;
-
-  fprintf(stderr, "Found %u lines!  Yay!\n", nLines);
-
-  return(0);
-}
+//  Basic routines to fetch and stash files from an object store.
+//  Most of this is done in the executive, but low level fetching
+//  of sqStore and ovStore data is done here.
+//
+//  NOTE that this function is limited in its ability to fetch files.
+//  It will ONLY work with seqStore and ovlStore data files:
+//     seqStore/blobs.*
+//     ovlStore/0000<000>
+//
+//  Returns false if the file was not fetched (either no object store
+//  in use, or the file existed already), true if it was fetched.
+//
+bool   fetchFromObjectStore(char *filename);
diff --git a/ext/meryl/src/utility/src/utility/sampledDistribution.H b/ext/meryl/src/utility/src/utility/sampledDistribution.H
index 26cbde6..7c92bc4 100644
--- a/ext/meryl/src/utility/src/utility/sampledDistribution.H
+++ b/ext/meryl/src/utility/src/utility/sampledDistribution.H
@@ -80,12 +80,12 @@ public:
       }
 
       while (_dataMax <= val)
-        resizeArray(_data, _dataLen, _dataMax, 2 * _dataMax, _raAct::copyData | _raAct::clearNew);
+        resizeArray(_data, _dataLen, _dataMax, 2 * _dataMax, resizeArray_copyData | resizeArray_clearNew);
 
       _data[val] += cnt;
       _dataSum   += cnt;
 
-      _dataLen = std::max(_dataLen, val + 1);
+      _dataLen = max(_dataLen, val + 1);
     }
 
     AS_UTL_closeFile(D);
diff --git a/ext/meryl/src/utility/src/utility/sequence.C b/ext/meryl/src/utility/src/utility/sequence.C
index f5c23b3..900ea3a 100644
--- a/ext/meryl/src/utility/src/utility/sequence.C
+++ b/ext/meryl/src/utility/src/utility/sequence.C
@@ -461,123 +461,50 @@ encode8bitSequence(uint8 *&chunk, char *seq, uint32 seqLen) {
 
 
 
-////////////////////////////////////////
-//  dnaSeq functions
-//
-
-dnaSeq::dnaSeq() {
-};
 
-
-dnaSeq::~dnaSeq() {
-  delete [] _name;
-  delete [] _seq;
-  delete [] _qlt;
+//  Saves the file offset of the first byte in the record:
+//    for FASTA, the '>'
+//    for FASTQ, the '@'.
+
+class dnaSeqIndexEntry {
+public:
+  dnaSeqIndexEntry() {
+    _fileOffset     = UINT64_MAX;
+    _sequenceLength = 0;
+  };
+  ~dnaSeqIndexEntry() {
+  };
+
+  uint64   _fileOffset;
+  uint64   _sequenceLength;
 };
 
 
-void
-dnaSeq::releaseAll(void) {
-  delete [] _name;    _name = _ident = _flags = nullptr;
-  delete [] _seq;     _seq                    = nullptr;
-  delete [] _qlt;     _qlt                    = nullptr;
-
-  _nameMax = 0;
-  _seqMax  = 0;
-  _seqLen  = 0;
-}
-
-
-void
-dnaSeq::releaseBases(void) {
-  delete [] _seq;     _seq                    = nullptr;
-  delete [] _qlt;     _qlt                    = nullptr;
-
-  _seqMax  = 0;
-  _seqLen  = 0;
-}
-
-
-bool
-dnaSeq::copy(char  *bout,
-             uint32 bgn, uint32 end, bool terminate) {
-
-  if ((end < bgn) || (_seqLen < end))
-    return(false);
-
-  for (uint32 ii=bgn; ii<end; ii++)
-    bout[ii-bgn] = _seq[ii];
-
-  if (terminate)
-    bout[end-bgn] = 0;
-
-  return(true);
-}
 
+dnaSeqFile::dnaSeqFile(const char *filename, bool indexed) {
 
-bool
-dnaSeq::copy(char  *bout,
-             uint8 *qout,
-             uint32 bgn, uint32 end, bool terminate) {
-
-  if ((end < bgn) || (_seqLen < end))
-    return(false);
-
-  for (uint32 ii=bgn; ii<end; ii++) {
-    bout[ii-bgn] = _seq[ii];
-    qout[ii-bgn] = _qlt[ii];
-  }
-
-  if (terminate) {
-    bout[end-bgn] = 0;
-    qout[end-bgn] = 0;
-  }
-
-  return(true);
-}
-
-
-void
-dnaSeq::findNameAndFlags(void) {
-  uint32 ii=0;
-
-  while (isWhiteSpace(_name[ii]) == true)   //  Skip white space before the name.
-    ii++;                                   //  Why do you torture us?
-
-  _ident = _name + ii;                      //  At the start of the name.
-
-  while (isVisible(_name[ii]) == true)      //  Skip over the name.
-    ii++;
-
-  if (isNUL(_name[ii]) == true) {           //  If at the end of the string,
-    _flags = _name + ii;                    //  there are no flags,
-    return;                                 //  so just return.
-  }
-
-  _name[ii++] = 0;                          //  Terminate the name, move ahead.
-
-  while (isWhiteSpace(_name[ii]) == true)   //  Otherwise, skip whitespace
-    ii++;                                   //  to get to the flags.
-
-  _flags = _name + ii;                      //  Flags are here or NUL.
-}
+  _file     = new compressedFileReader(filename);
+  _buffer   = new readBuffer(_file->file());
 
+  _index    = NULL;
+  _indexLen = 0;
+  _indexMax = 0;
 
+  if (indexed == false)
+    return;
 
-////////////////////////////////////////
-//  dnaSeqFile functions
-//
+  if (_file->isCompressed() == true)
+    fprintf(stderr, "ERROR: cannot index compressed input '%s'.\n", filename), exit(1);
 
-dnaSeqFile::dnaSeqFile(char const *filename, bool indexed) {
-  _filename = duplicateString(filename);
+  if (_file->isNormal() == false)
+    fprintf(stderr, "ERROR: cannot index pipe input.\n"), exit(1);
 
-  reopen(indexed);
+  generateIndex();
 }
 
 
 
 dnaSeqFile::~dnaSeqFile() {
-  delete [] _filename;
   delete    _file;
   delete    _buffer;
   delete [] _index;
@@ -585,33 +512,6 @@ dnaSeqFile::~dnaSeqFile() {
 
 
 
-//  Open, or reopen, an input file.
-//
-void
-dnaSeqFile::reopen(bool indexed) {
-
-  //  If a _file exists already, reopen it, otherwise, make a new one.
-  if (_file)
-    _file->reopen();
-  else
-    _file = new compressedFileReader(_filename);
-
-  //  Since the file object is always new, we need to make a new read buffer.
-  //  gzip inputs seem to be (on FreeBSD) returning only 64k blocks
-  //  regardless of the size of our buffer; but uncompressed inputs will
-  //  benefit slightly from a bit larger buffer.
-  delete _buffer;
-
-  _buffer = new readBuffer(_file->file(), 128 * 1024);
-
-  //  If we have an index already or one is requested, (re)generate it.
-
-  if ((_index != nullptr) || (indexed == true))
-    generateIndex();
-}
-
-
-
 bool
 dnaSeqFile::findSequence(uint64 i) {
 
@@ -620,8 +520,6 @@ dnaSeqFile::findSequence(uint64 i) {
 
   _buffer->seek(_index[i]._fileOffset);
 
-  _seqIdx = i;
-
   return(true);
 }
 
@@ -639,142 +537,81 @@ dnaSeqFile::sequenceLength(uint64 i) {
 
 
 
-////////////////////////////////////////
-//  dnaSeqFile indexing
-//
-
-const uint64 dnaSeqVersion01 = 0x3130716553616e64;   //  dnaSeq01
-const uint64 dnaSeqVersion02 = 0x3230716553616e64;   //  dnaSeq02 - not used yet
-
-
-char const *
-makeIndexName(char const *prefix) {
-  char const *suffix = ".dnaSeqIndex";
-  uint32      plen   = strlen(prefix);
-  uint32      slen   = strlen(suffix);
-  char       *iname  = new char [plen + slen + 1];
-
-  memcpy(iname,        prefix, plen + 1);   //  +1 for the NUL byte.
-  memcpy(iname + plen, suffix, slen + 1);
-
-  return(iname);
+bool
+dnaSeqFile::findSequence(const char *name) {
+  fprintf(stderr, "dnaSeqFile::findSequence(const char *) not supported.\n");
+  exit(1);
+  return(false);
 }
 
 
-//  Load an index.  Returns true if one was loaded.
+
 bool
 dnaSeqFile::loadIndex(void) {
-  char const  *indexName = makeIndexName(_filename);
-  FILE        *indexFile = nullptr;
-
-  if (fileExists(indexName) == true) {
-    FILE   *indexFile = AS_UTL_openInputFile(indexName);
-    uint64  magic;
-    uint64  size;
-    uint64  date;
-
-    loadFromFile(magic,     "dnaSeqFile::magic",    indexFile);
-    loadFromFile(size,      "dnaSeqFile::size",     indexFile);
-    loadFromFile(date,      "dnaSeqFile::date",     indexFile);
-    loadFromFile(_indexLen, "dnaSeqFile::indexLen", indexFile);
-
-    if (magic != dnaSeqVersion01) {
-      fprintf(stderr, "ERROR: file '%s' isn't a dnaSeqIndex; manually remove this file.\n", indexName);
-      exit(1);
-    }
+  char   indexName[FILENAME_MAX+1];
 
-    if ((size == AS_UTL_sizeOfFile(_filename)) &&
-        (date == AS_UTL_timeOfFile(_filename))) {
-      _index = new dnaSeqIndexEntry [_indexLen];
+  snprintf(indexName, FILENAME_MAX, "%s.index", _file->filename());
 
-      loadFromFile(_index, "dnaSeqFile::index", _indexLen, indexFile);
+  if (fileExists(indexName) == false)
+    return(false);
 
-    } else {
-      fprintf(stderr, "WARNING: file '%s' disagrees with index; recreating index.\n", _filename);
+  FILE   *indexFile = AS_UTL_openInputFile(indexName);
 
-      _index    = nullptr;
-      _indexLen = 0;
-      _indexMax = 0;
-    }
+  loadFromFile(_indexLen, "dnaSeqFile::indexLen", indexFile);
 
-    AS_UTL_closeFile(indexFile, indexName);
-  }
+  _index = new dnaSeqIndexEntry [_indexLen];
+
+  loadFromFile(_index, "dnaSeqFile::index", _indexLen, indexFile);
 
-  delete [] indexName;
+  AS_UTL_closeFile(indexFile, indexName);
 
-  return(_index != nullptr);   //  Return true if we have an index.
+  return(true);
 }
 
 
 
 void
 dnaSeqFile::saveIndex(void) {
-  char const *indexName = makeIndexName(_filename);
-  FILE       *indexFile = AS_UTL_openOutputFile(indexName);
+  char   indexName[FILENAME_MAX+1];
+
+  snprintf(indexName, FILENAME_MAX, "%s.index", _file->filename());
 
-  uint64  magic = dnaSeqVersion01;
-  uint64  size  = AS_UTL_sizeOfFile(_filename);
-  uint64  date  = AS_UTL_timeOfFile(_filename);
+  FILE   *indexFile = AS_UTL_openOutputFile(indexName);
 
-  writeToFile(magic,     "dnaSeqFile::magic",    indexFile);
-  writeToFile(size,      "dnaSeqFile::size",     indexFile);
-  writeToFile(date,      "dnaSeqFile::date",     indexFile);
-  writeToFile(_indexLen, "dnaSeqFile::indexLen", indexFile);
+  writeToFile(_indexLen, "dnaSeqFile::indexLen",            indexFile);
   writeToFile(_index,    "dnaSeqFile::index",    _indexLen, indexFile);
 
   AS_UTL_closeFile(indexFile, indexName);
-
-  delete [] indexName;
 }
 
 
 
 void
 dnaSeqFile::generateIndex(void) {
-  dnaSeq     seq;
-
-  //  Fail if an index is requested for a compressed file.
-
-  if (_file->isCompressed() == true)
-    fprintf(stderr, "ERROR: cannot index compressed input '%s'.\n", _filename), exit(1);
-
-  if (_file->isNormal() == false)
-    fprintf(stderr, "ERROR: cannot index pipe input.\n"), exit(1);
-
-  //  If we can load an index, do it and return.
+  uint32          nameMax = 0;
+  char           *name    = NULL;
+  uint64          seqMax  = 0;
+  char           *seq     = NULL;
+  uint8          *qlt     = NULL;
+  uint64          seqLen  = 0;
 
   if (loadIndex() == true)
     return;
 
-  //  Rewind the buffer to make sure we're at the start of the file.
-
-  _buffer->seek(0);
-
-  //  Allocate space for the index, set the first entry to the current
-  //  position of the file.
-
   _indexLen = 0;
   _indexMax = 1048576;
   _index    = new dnaSeqIndexEntry [_indexMax];
 
-  _index[0]._fileOffset     = _buffer->tell();
-  _index[0]._sequenceLength = 0;
+  _index[_indexLen]._fileOffset     = _buffer->tell();
+  _index[_indexLen]._sequenceLength = 0;
 
   //  While we read sequences:
-  //    update the length of the sequence (we've already saved the position)
+  //    update the length of the sequence (we've already save the position)
   //    make space for more sequences
   //    save the position of the next sequence
-
-  while (loadSequence(seq) == true) {
-    if (seq.wasError()) {
-      fprintf(stderr, "WARNING: error reading sequence at/before '%s'\n", seq.ident());
-    }
-
-    if (seq.wasReSync()) {
-      fprintf(stderr, "WARNING: lost sync reading before sequence '%s'\n", seq.ident());
-    }
-
-    _index[_indexLen]._sequenceLength = seq.length();
+  //
+  while (loadSequence(name, nameMax, seq, qlt, seqMax, seqLen) == true) {
+    _index[_indexLen]._sequenceLength = seqLen;
 
     increaseArray(_index, _indexLen, _indexMax, 1048576);
 
@@ -784,207 +621,128 @@ dnaSeqFile::generateIndex(void) {
     _index[_indexLen]._sequenceLength = 0;
   }
 
-  //  Save whatever index we made.
-
-  saveIndex();
-}
-
-
-
-void
-dnaSeqFile::removeIndex(void) {
-
-  delete [] _index;
+  //for (uint32 ii=0; ii<_indexLen; ii++)
+  //  fprintf(stderr, "%u offset %lu length %lu\n", ii, _index[ii]._fileOffset, _index[ii]._sequenceLength);
 
-  _indexLen = 0;
-  _indexMax = 0;
-  _index    = nullptr;
+  if (_indexLen > 0)
+    saveIndex();
 }
 
 
 
-bool
-dnaSeqFile::loadFASTA(char  *&name, uint32 &nameMax,
-                      char  *&seq,
-                      uint8 *&qlt,  uint64 &seqMax, uint64 &seqLen, uint64 &qltLen) {
+uint64
+dnaSeqFile::loadFASTA(char   *&name,     uint32  &nameMax,
+                      char   *&seq,
+                      uint8  *&qlt,      uint64  &seqMax) {
   uint64  nameLen = 0;
+  uint64  seqLen  = 0;
   char    ch      = _buffer->read();
 
-  //  Skip any whitespace.
-
-  while (isWhiteSpace(ch))
-    ch = _buffer->read();
-
-  //  Fail rather ungracefully if we aren't at a sequence start.
+  assert(ch == '>');
 
-  if (ch != '>')
-    return(false);
-
-  //  Read the header line into the name string.  We cannot skip whitespace
-  //  here, but we do allow DOS to insert a \r before any \n.
+  //  Read the header line into the name string.
 
   for (ch=_buffer->read(); (ch != '\n') && (ch != 0); ch=_buffer->read()) {
-    if (ch == '\r')
-      continue;
     if (nameLen+1 >= nameMax)
       resizeArray(name, nameLen, nameMax, 3 * nameMax / 2);
     name[nameLen++] = ch;
   }
 
-  //  Trim back the header line to remove white space at the end.  The
-  //  terminating NUL is tacked on at the end.
+  //  Read sequence, skipping whitespace, until we hit a new sequence (or eof).
 
-  while ((nameLen > 0) && (isWhiteSpace(name[nameLen-1])))
-    nameLen--;
-
-  name[nameLen] = 0;
-
-  //  Read sequence, skipping whitespace, until we hit a new sequence or eof.
-
-  seqLen = 0;
-  qltLen = 0;
+  for (ch=_buffer->readuntil('>'); (ch != '>') && (ch != 0); ch=_buffer->readuntil('>')) {
+    if ((ch == '\n') || (ch == '\r') || (ch == '\t') || (ch == ' '))
+      continue;
 
-  for (ch = _buffer->peek(); ((ch != '>') &&
-                              (ch != '@') &&
-                              (ch !=  0)); ch = _buffer->peek()) {
     assert(_buffer->eof() == false);
 
-    ch = _buffer->read();
-
-    if (isWhiteSpace(ch))
-      continue;
-
     if (seqLen+1 >= seqMax)
       resizeArrayPair(seq, qlt, seqLen, seqMax, 3 * seqMax / 2);
 
-    seq[seqLen++] = ch;
-    qlt[qltLen++] = 0;
+    seq[seqLen] = ch;
+    qlt[seqLen] = 0;
+
+    seqLen++;
   }
 
+  name[nameLen] = 0;
   seq[seqLen] = 0;
-  qlt[qltLen] = 0;
+  qlt[seqLen] = 0;
 
   assert(nameLen < nameMax);
   assert(seqLen  < seqMax);
-  assert(qltLen  < seqMax);
-
-  _seqIdx++;
 
-  return(true);
+  return(seqLen);
 }
 
 
 
-bool
-dnaSeqFile::loadFASTQ(char  *&name, uint32 &nameMax,
-                      char  *&seq,
-                      uint8 *&qlt,  uint64 &seqMax, uint64 &seqLen, uint64 &qltLen) {
+uint64
+dnaSeqFile::loadFASTQ(char   *&name,     uint32  &nameMax,
+                      char   *&seq,
+                      uint8  *&qlt,      uint64  &seqMax) {
   uint32  nameLen = 0;
+  uint64  seqLen  = 0;
+  uint64  qltLen  = 0;
   char    ch      = _buffer->read();
 
-  //  Skip any whitespace.
+  assert(ch == '@');
 
-  while (isWhiteSpace(ch))
-    ch = _buffer->read();
-
-  //  Fail rather ungracefully if we aren't at a sequence start.
-
-  if (ch != '@')
-    return(false);
-
-  //  Read the header line into the name string.  We cannot skip whitespace
-  //  here, but we do allow DOS to insert a \r before any \n.
+  //  Read the header line into the name string.
 
   for (ch=_buffer->read(); (ch != '\n') && (ch != 0); ch=_buffer->read()) {
-    if (ch == '\r')
-      continue;
     if (nameLen+1 >= nameMax)
       resizeArray(name, nameLen, nameMax, 3 * nameMax / 2);
     name[nameLen++] = ch;
   }
 
-  //  Trim back the header line to remove white space at the end.
-
-  while ((nameLen > 0) && (isWhiteSpace(name[nameLen-1])))
-    nameLen--;
-
-  name[nameLen] = 0;
-
-  //  Skip any whitespace, again.  Once we hit non-whitespace we'll suck in
-  //  the whole line.
-
-  while (isWhiteSpace(ch))
-    ch = _buffer->read();
-
-  //  Read sequence.  Pesky DOS files end with \r\n, and it suffices
-  //  to stop on the \n and ignore all the rest.
+  //  Read sequence.
 
-  seqLen = 0;
-  qltLen = 0;
-
-  for (; (ch != '\n') && (ch != 0); ch=_buffer->read()) {
-    if (isWhiteSpace(ch))
+  for (ch=_buffer->read(); (ch != '\n') && (ch != 0); ch=_buffer->read()) {
+    if ((ch == '\n') || (ch == '\r') || (ch == '\t') || (ch == ' '))
       continue;
     if (seqLen+1 >= seqMax)
       resizeArrayPair(seq, qlt, seqLen, seqMax, 3 * seqMax / 2);
     seq[seqLen++] = ch;
   }
 
-  //  Skip any more whitespace, fail if we're not at a quality start, then
-  //  suck in the quality line.  And then skip more whitespace.
-
-  while (isWhiteSpace(ch))
-    ch = _buffer->read();
-
-  if (ch != '+')
-    return(false);
+  //  Skip header line
 
   for (ch=_buffer->read(); (ch != '\n') && (ch != 0); ch=_buffer->read()) {
     ;
   }
 
-  while (isWhiteSpace(ch))
-    ch = _buffer->read();
-
-  //  Read qualities and convert to integers.
+  //  Read qualities.
 
-  for (; (ch != '\n') && (ch != 0); ch=_buffer->read()) {
-    if (isWhiteSpace(ch))
+  for (ch=_buffer->read(); (ch != '\n') && (ch != 0); ch=_buffer->read()) {
+    if ((ch == '\n') || (ch == '\r') || (ch == '\t') || (ch == ' '))
       continue;
     if (qltLen+1 >= seqMax)
       resizeArrayPair(seq, qlt, qltLen, seqMax, 3 * seqMax / 2);
-    qlt[qltLen++] = ch - '!';
+    qlt[qltLen++] = ch;
   }
 
-  //  Skip whitespace after the sequence.  This one is a little weird.  It
-  //  tests if the _next_ letter is whitespace, and if so, gets it from the
-  //  buffer.  After this loop, the _next_ letter in the buffer should be
-  //  either a '>' or a '@'.
-
-  while (isWhiteSpace(_buffer->peek()))
-    _buffer->read();
+  //fprintf(stderr, "READ FASTQ name %u seq %lu qlt %lu\n", nameLen, seqLen, qltLen);
 
+  name[nameLen] = 0;
   seq[seqLen] = 0;
   qlt[qltLen] = 0;
 
   assert(nameLen < nameMax);
   assert(seqLen  < seqMax);
   assert(qltLen  < seqMax);
+  assert(seqLen == qltLen);
 
-  _seqIdx++;
-
-  return(true);
+  return(seqLen);
 }
 
 
 
 bool
-dnaSeqFile::loadSequence(char  *&name, uint32 &nameMax,
-                         char  *&seq,
-                         uint8 *&qlt,  uint64 &seqMax, uint64 &seqLen, uint32 &error) {
-  uint64 qltLen = 0;
-
-  //  Allocate space for the arrays, if they're currently unallocated.
+dnaSeqFile::loadSequence(char   *&name,     uint32  &nameMax,
+                         char   *&seq,
+                         uint8  *&qlt,      uint64  &seqMax,
+                         uint64  &seqLen) {
 
   if (nameMax == 0)
     resizeArray(name, 0, nameMax, (uint32)1024);
@@ -992,100 +750,27 @@ dnaSeqFile::loadSequence(char  *&name, uint32 &nameMax,
   if (seqMax == 0)
     resizeArrayPair(seq, qlt, 0, seqMax, (uint64)65536);
 
-  //  Clear our return values.
-
-  bool   loadSuccess = false;
-
-  _isFASTA = false;
-  _isFASTQ = false;
-
-  name[0] = 0;
-  seq[0]  = 0;
-  qlt[0]  = 0;
-  seqLen  = 0;
-
-  error   = 0;
-
-  //  Skip any whitespace at the start of the file, or before the next FASTQ
-  //  sequence (the FASTA reader will automagically skip whitespace at the
-  //  end of the sequence).
-
-  while (isWhiteSpace(_buffer->peek()))
-    _buffer->read();
-
-  //  If we're not at a sequence start, scan ahead to find the next one.
-  //  Not bulletproof; FASTQ qv's can match this.
-
-  if ((_buffer->peek() != '>') &&
-      (_buffer->peek() != '@') &&
-      (_buffer->peek() !=  0)) {
-    //fprintf(stderr, "dnaSeqFile::loadSequence()-- sequence sync lost at position %lu, attempting to find the next sequence.\n", _buffer->tell());
-    error |= 0x02;
-  }
-
-  bool  lastWhite = isWhiteSpace(_buffer->peek());
-
-  while ((_buffer->peek() != '>') &&
-         (_buffer->peek() != '@') &&
-         (_buffer->peek() !=  0)) {
+  while (_buffer->peek() == '\n')
     _buffer->read();
-  }
-
-  //  Peek at the file to decide what type of sequence we need to read.
 
-  if      (_buffer->peek() == '>') {
-    _isFASTA    = true;
-    loadSuccess = loadFASTA(name, nameMax, seq, qlt, seqMax, seqLen, qltLen);
-  }
-
-  else if (_buffer->peek() == '@') {
-    _isFASTQ    = true;
-    loadSuccess = loadFASTQ(name, nameMax, seq, qlt, seqMax, seqLen, qltLen);
-  }
+  if      (_buffer->peek() == '>')
+    seqLen = loadFASTA(name, nameMax,
+                       seq,
+                       qlt, seqMax);
 
-  else {
-    _isFASTA = false;
-    _isFASTQ = false;
+  else if (_buffer->peek() == '@')
+    seqLen = loadFASTQ(name, nameMax,
+                       seq,
+                       qlt, seqMax);
 
+  else
     return(false);
-  }
-
-  //  If we failed to load a sequence, report an error message and zero out
-  //  the sequence.  Leave the name as-is so we can at least return a length
-  //  zero sequence.  If we failed to load a name, it'll still be set to NUL.
-
-  if (loadSuccess == false) {
-    //if (name[0] == 0)
-    //  fprintf(stderr, "dnaSeqFile::loadSequence()-- failed to read sequence correctly at position %lu.\n", _buffer->tell());
-    //else
-    //  fprintf(stderr, "dnaSeqFile::loadSequence()-- failed to read sequence '%s' correctly at position %lu.\n", name, _buffer->tell());
-
-    error |= 0x01;
-
-    seq[0]  = 0;
-    qlt[0]  = 0;
-    seqLen  = 0;
-  }
 
   return(true);
 }
 
 
 
-bool
-dnaSeqFile::loadSequence(dnaSeq &seq) {
-  bool result = loadSequence(seq._name, seq._nameMax,
-                             seq._seq,
-                             seq._qlt,  seq._seqMax, seq._seqLen, seq._error);
-
-  if (result)
-    seq.findNameAndFlags();
-
-  return(result);
-}
-
-
-
 bool
 dnaSeqFile::loadBases(char    *seq,
                       uint64   maxLength,
diff --git a/ext/meryl/src/utility/src/utility/sequence.H b/ext/meryl/src/utility/src/utility/sequence.H
index a950ac3..fdab9c8 100644
--- a/ext/meryl/src/utility/src/utility/sequence.H
+++ b/ext/meryl/src/utility/src/utility/sequence.H
@@ -50,216 +50,128 @@ void   decode3bitSequence(uint8 *chunk, uint32 chunkLen, char *seq, uint32 seqLe
 void   decode8bitSequence(uint8 *chunk, uint32 chunkLen, char *seq, uint32 seqLen);
 
 
-//  Encode/decode an ACGT base to 0132.  Relies on the ASCII encoding:
-//
-//    A      a     01c0 000 1 == 0 -> 0
-//    C      c     01c0 001 1 == 1 -> 1
-//    T      t     01c1 010 0 == 2 -> 2
-//    G      g     01c0 011 1 == 3 -> 3
-//    N      n     01c0 111 0 == 7 -> 4
-//                      ^^^
-//  Decoding will always return uppercase letters (c=0).
-//
-//  The inline arrays, in gcc anyway, compile to a single 64-bit constant
-//  and is equivalent to the C code:
-//
-//    0x0706050403020100llu >> (((base >> 1) & 0x07) << 3) & 0x0f
-//
-//  with the additional optimization of removing the redundant shifts.
-
-inline
-uint8
-encode2bitBase(char  base) {
-  return((uint8 [8]){0, 1, 2, 3, 4, 4, 4, 4}[base >> 1 & 0x07]);
-}
-
-inline
-char
-decode2bitBase(uint8 base) {
-  return("ACTGNNNN"[base & 0x07]);
-}
-
-
-
-//  A sequence loaded from disk.  It should be treated as a read-only object.
-//
-//  ident() returns the first word of the sequence header line, while flags()
-//  returns the rest of the line, or an empty line if there is no more line.
-//
-//  It isn't possible to modify ident() and flags().  They're pointers
-//  into the same memory, and that isn't exposed.
-//
-//  bases() and quals() could support modifications, as long as the length of
-//  the string doesn't change.  Only canu needed to do that and it was worked
-//  around.
-//
-//  If quality values are not available (e.g., FASTA) then all values are set
-//  to zero.
-//
-//  The copy functions will copy bases (and qualities) from bgn to end, but
-//  not including the base at end -- that is, normal C-style semantics.  The
-//  output will be NUL-terminated, unless explicitly told not to.  Returns
-//  false if bgn or end are out of range or inconsistent.
-//
-class dnaSeq {
-public:
-  dnaSeq();
-  ~dnaSeq();
 
-  char  const      *ident(void)        { return(_ident);  };
-  char  const      *flags(void)        { return(_flags);  };
-  char  const      *bases(void)        { return(_seq);    };
-  uint8 const      *quals(void)        { return(_qlt);    };
 
-  uint64            length(void)       { return(_seqLen); };
+class dnaSeqIndexEntry;   //  Internal use only, sorry.
 
-  void              releaseAll(void);      //  Release all memory.
-  void              releaseBases(void);    //  Release seq memory; keep the name.
+class dnaSeq {
+public:
+  dnaSeq() {
+    _nameMax = 0;
+    _name    = NULL;
+    _seqMax  = 0;
+    _seq     = NULL;
+    _qlt     = NULL;
+    _seqLen  = 0;
+  };
 
-  bool              copy(char  *bout,
-                         uint32 bgn, uint32 end, bool terminate = true);
+  ~dnaSeq() {
+    delete [] _name;
+    delete [] _seq;
+    delete [] _qlt;
+  };
 
-  bool              copy(char  *bout,
-                         uint8 *qout,
-                         uint32 bgn, uint32 end, bool terminate = true);
 
-  bool              wasError(void)     { return((_error & 0x01) == 0x01); };
-  bool              wasReSync(void)    { return((_error & 0x02) == 0x02); };
+  char             *name(void)         { return(_name);   };
+  char             *bases(void)        { return(_seq);    };
+  uint8            *quals(void)        { return(_qlt);    };
 
-private:
-  void              findNameAndFlags(void);
+  uint64            length(void)       { return(_seqLen); };
 
 private:
-  char             *_name    = nullptr;
-  uint32            _nameMax = 0;
-
-  char             *_ident   = nullptr;
-  char             *_flags   = nullptr;
-
-  char             *_seq     = nullptr;
-  uint8            *_qlt     = nullptr;
-  uint64            _seqMax  = 0;         //  Space allocated.
-  uint64            _seqLen  = 0;         //  Actual length.
-
-  uint32            _error   = 0;
+  uint32            _nameMax;
+  char             *_name;
+  uint64            _seqMax;
+  char             *_seq;
+  uint8            *_qlt;
+  uint64            _seqLen;
 
   friend class dnaSeqFile;
 };
 
 
 
-//  An interface to FASTA and FASTQ files.
-//
-//  Upon object creation, you can request that an index of the file be
-//  generated.  Without an index, numberOfSequences(), findSequence() and
-//  sequenceLength() do not work well or at all.
-//
-//  generateIndex() will force an index to be generated.
-//  removeIndex will remove any index.
-//
-//  reopen() will reset the file to the start and.  If the 'indexed' flag is
-//  true, or an index already exists, an index is (re)created.  Note that
-//  setting 'indexed=false' will NOT remove an existing index.
-//
-//  findSequence() will return true if the specified sequence is found in the
-//  file and leave the file positioned such that the next loadSequence() will
-//  load that sequence.
-//   - If an index exists, the index will be searched and the sequence will
-//     be returned regardless of where it is in the file.
-//   - If no index exists, the file will be searched forward until the
-//     sequence is found or the file ends.  It is not possible to move
-//     'backward' in the file in this case.
-//
-//  sequenceLength() will return the length of sequence index i.  If no index
-//  exists, or i is not a valid sequence index, UINT64_MAX is returned.
-//
-//  isFASTA() and isFASTQ() return true if the last sequence loaded came from
-//  a FASTA or FASTQ source, respectively.  If no sequence has been loaded
-//  yet, both functions will return false.
-//
-//  loadSequence() will read the next sequence from the file.  Returns false
-//  if the end of file is encountered, true otherwise.  In particular, a
-//  sequence of length zero will return true.
-//
-//  loadBases() will return a chunk of sequence from the file, up to
-//  'maxLength' bases or the end of the current sequence.
-//   - Returns false only if EOF is encountered.
-//   - seqLength will have the length of the sequence returned.  This can be zero.
-//   - endOfSequence will be true if the end of the sequence was encountered.
-//   - The returned sequence is NOT NUL terminated.
-//
 
 class dnaSeqFile {
 public:
-  dnaSeqFile(char const *filename, bool indexed=false);
+  dnaSeqFile(const char *filename, bool indexed=false);
   ~dnaSeqFile();
 
-  void        reopen(bool indexed=false);
-  void        generateIndex(void);
-  void        removeIndex(void);
-
-public:
-  char const *filename(void)            { return(_filename); };
-  uint64      numberOfSequences(void)   { return(_indexLen); };
-
-  bool        findSequence(uint64 i);
-  uint64      sequenceLength(uint64 i);
-
-public:
-  //  True if the last sequence loaded was from a FASTA or FASTQ file.
-  bool   isFASTA(void)      { return(_isFASTA); };
-  bool   isFASTQ(void)      { return(_isFASTQ); };
-
-  //  Return the sequence index of the last loaded sequence.
-  uint32 seqIdx(void)       { return(_seqIdx-1); };
+  compressedFileReader  *_file;
+  readBuffer            *_buffer;
 
-  //  True if the input file is compressed (gzip, xz, etc).
-  bool   isCompressed(void) { return(_file->isCompressed()); };
-
-public:
-  bool   loadSequence(char   *&name, uint32 &nameMax,
-                      char   *&seq,
-                      uint8  *&qlt,  uint64 &seqMax, uint64 &seqLen, uint32 &errorCode);
-  bool   loadSequence(dnaSeq &seq);
-
-public:
-  bool   loadBases(char    *seq,
-                   uint64   maxLength,
-                   uint64  &seqLength,
-                   bool    &endOfSequence);
+  dnaSeqIndexEntry      *_index;
+  uint64                 _indexLen;
+  uint64                 _indexMax;
 
 private:
   bool     loadIndex(void);
   void     saveIndex(void);
 
-  bool
-  loadFASTA(char  *&name, uint32 &nameMax,
-            char  *&seq,
-            uint8 *&qlt,  uint64 &seqMax, uint64 &seqLen, uint64 &qltLen);
+public:
+  void     generateIndex(void);
+
+  //  If indexed, searches the index for the proper sequence.
+  //
+  //  If not indexed, searches forward in the file for the sequence.  If not found,
+  //  the file will be at the end.
+  //
+  //  In both cases, the file is left positioned at the start of the sequence header.
+  //
+  //  Returns true if found, false if not.
+  //
+  bool     findSequence(uint64 i);
+  bool     findSequence(const char *name);
+
+  //  Returns the number of sequences in the file.
+  uint64   numberOfSequences(void) {
+    return(_indexLen);
+  };
+
+  //  Returns the length of sequence i.  If no such sequence, returns UINT64_MAX.
+  uint64   sequenceLength(uint64 i);
 
-  bool
-  loadFASTQ(char  *&name, uint32 &nameMax,
-            char  *&seq,
-            uint8 *&qlt,  uint64 &seqMax, uint64 &seqLen, uint64 &qltLen);
+  char    *filename(void) {
+    return(_file->filename());
+  }
 
 private:
-  char                  *_filename = nullptr;
+  uint64
+  loadFASTA(char   *&name,     uint32  &nameMax,
+            char   *&seq,
+            uint8  *&qlt,      uint64  &seqMax);
 
-  bool                   _isFASTA  = false;
-  bool                   _isFASTQ  = false;
-  uint64                 _seqIdx   = 0;
+  uint64
+  loadFASTQ(char   *&name,     uint32  &nameMax,
+            char   *&seq,
+            uint8  *&qlt,      uint64  &seqMax);
 
-  compressedFileReader  *_file     = nullptr;
-  readBuffer            *_buffer   = nullptr;
 
-  struct dnaSeqIndexEntry {     //  Offset of the first byte in the record:
-    uint64   _fileOffset;       //  '>' for FASTA, '@' for fastq.
-    uint64   _sequenceLength;   //
+public:
+  //  Return the next sequence in the file.
+  //  Returns false if EOF, true otherwise, even if the sequence is length zero.
+  //
+  bool   loadSequence(char   *&name,     uint32  &nameMax,
+                      char   *&seq,
+                      uint8  *&qlt,      uint64  &seqMax,
+                      uint64  &seqLen);
+
+  bool   loadSequence(dnaSeq &seq) {
+    return(loadSequence(seq._name,    seq._nameMax,
+                        seq._seq,
+                        seq._qlt,     seq._seqMax,
+                        seq._seqLen));
   };
 
-  dnaSeqIndexEntry      *_index    = nullptr;
-  uint64                 _indexLen = 0;
-  uint64                 _indexMax = 0;
+  //  Returns a chunk of sequence from the file, up to 'maxLength' bases or
+  //  the end of the current sequence.  This is NOT NUL terminated!
+  //
+  //  Returns false if EOF is hit and no bases were loaded.
+  //
+  bool   loadBases(char    *seq,
+                   uint64   maxLength,
+                   uint64  &seqLength,
+                   bool    &endOfSequence);
 };
 
 
diff --git a/ext/meryl/src/utility/src/utility/speedCounter.H b/ext/meryl/src/utility/src/utility/speedCounter.H
index 300684b..510aa3a 100644
--- a/ext/meryl/src/utility/src/utility/speedCounter.H
+++ b/ext/meryl/src/utility/src/utility/speedCounter.H
@@ -38,7 +38,7 @@ public:
   void   enableLiner(void)   { _line = true; };
 
   bool   tick(void) {
-    if (_enabled && ((++_count & _freq) == 0)) {
+    if (_enabled && ((++_count & _freq) == uint64ZERO)) {
       double  v = _count / _unit;
       if (_spin) fputs(_spinr[_draws %  4], stderr);
       if (_line) fputs(_liner[_draws % 19], stderr);
@@ -55,7 +55,7 @@ public:
       return(false);
 
     _count += increment;
-    if ((_count & _freq) == 0) {
+    if ((_count & _freq) == uint64ZERO) {
       double  v = _count / _unit;
       if (_spin) fputs(_spinr[_draws %  4], stderr);
       if (_line) fputs(_liner[_draws % 19], stderr);
diff --git a/ext/meryl/src/utility/src/utility/stddev.H b/ext/meryl/src/utility/src/utility/stddev.H
index daa4fd4..59d36b5 100644
--- a/ext/meryl/src/utility/src/utility/stddev.H
+++ b/ext/meryl/src/utility/src/utility/stddev.H
@@ -26,6 +26,9 @@
 #include <vector>
 #include <algorithm>
 
+using namespace std;
+
+
 
 //  Online mean and std.dev calculation.
 //  B. P. Welford, Technometrics, Vol 4, No 3, Aug 1962 pp 419-420.
@@ -60,19 +63,11 @@ public:
     _nn = n0;
   };
 
-  void     remove(TT val) {
+  void     remove(double val) {
     uint32 n0 = _nn - 1;
     double m0 = (n0 == 0) ? (0) : ((_nn * _mn - val) / n0);
     double s0 = _sn - (val - m0) * (val - _mn);
 
-    if (n0 == 0)   m0 = 0.0;   //  Reset mean and variance to zero when we can.
-    if (n0 <= 1)   s0 = 0.0;   //  See tests/stddevTest.C testStability() for details.
-
-    if (s0 < 0.0)              //  Assume negative values are due to stability problems,
-      s0 = 0.0;                //  and not mismatched insert() and delete() values.
-    if (-1e-10 <= m0 && m0 <= 1e-10)
-      m0 = 0.0;
-
     if (_nn == 0)
       fprintf(stderr, "ERROR: stdDev has no data; can't remove() old value.\n"), exit(1);
 
@@ -135,7 +130,7 @@ computeStdDev(TT *dist, uint64 distLen, double &mean, double &stddev, bool isSor
   //  Sort the values.  Lets us approximate the stddev for filtering out outliers.
 
   if (isSorted == false)
-    std::sort(dist, dist + distLen);
+    sort(dist, dist + distLen);
 
   //  Approximate the stddev to filter out outliers.  This is done by assuming we're normally
   //  distributed, finding the values that would represent 1 standard deviation (about 68.27% of the
@@ -182,7 +177,7 @@ computeStdDev(TT *dist, uint64 distLen, double &mean, double &stddev, bool isSor
 
 template<typename TT>
 void
-computeStdDev(std::vector<TT> dist, double &mean, double &stddev, bool isSorted=false) {
+computeStdDev(vector<TT> dist, double &mean, double &stddev, bool isSorted=false) {
   computeStdDev(dist.data(), dist.size(), mean, stddev, isSorted);
 }
 
@@ -200,7 +195,7 @@ computeMode(TT *dist, uint64 distLen, TT &mode, bool isSorted=false) {
     return;
 
   if (isSorted == false)
-    std::sort(dist, dist + distLen);
+    sort(dist, dist + distLen);
 
   uint32  modeCnt = 0;
   TT      modeVal = 0;
@@ -232,7 +227,7 @@ computeMode(TT *dist, uint64 distLen, TT &mode, bool isSorted=false) {
 
 template<typename TT>
 void
-computeMode(std::vector<TT> dist, TT &mode, bool isSorted=false) {
+computeMode(vector<TT> dist, TT &mode, bool isSorted=false) {
   computeMode(dist.data(), dist.size(), mode, isSorted);
 }
 
@@ -247,7 +242,7 @@ computeMedian(TT *dist, uint64 distLen, TT &median, bool isSorted=false) {
     return;
 
   if (isSorted == false)
-    std::sort(dist, dist + distLen);
+    sort(dist, dist + distLen);
 
   if (distLen % 2 == 0)
      median = (dist[distLen / 2 - 1] + dist[distLen / 2]) / 2;
@@ -257,7 +252,7 @@ computeMedian(TT *dist, uint64 distLen, TT &median, bool isSorted=false) {
 
 template<typename TT>
 void
-computeMedian(std::vector<TT> dist, TT &median, bool isSorted=false) {
+computeMedian(vector<TT> dist, TT &median, bool isSorted=false) {
   computeMedian(dist.data(), dist.size(), median, isSorted);
 }
 
@@ -274,11 +269,11 @@ computeMedianAbsoluteDeviation(TT *dist, uint64 distLen, TT &median, TT &mad, bo
     return;
 
   if (isSorted == false)
-    std::sort(dist, dist + distLen);
+    sort(dist, dist + distLen);
 
   computeMedian(dist, distLen, median, true);
 
-  std::vector<TT>  m;
+  vector<TT>  m;
 
   for (uint64 ii=0; ii<distLen; ii++) {
     if (dist[ii] < median)
@@ -287,14 +282,14 @@ computeMedianAbsoluteDeviation(TT *dist, uint64 distLen, TT &median, TT &mad, bo
       m.push_back(dist[ii] - median);
   }
 
-  std::sort(m.begin(), m.end());
+  sort(m.begin(), m.end());
 
   mad = m[ m.size()/2 ];
 };
 
 template<typename TT>
 void
-computeMedianAbsoluteDeviation(std::vector<TT> dist, TT &median, TT &mad, bool isSorted=false) {
+computeMedianAbsoluteDeviation(vector<TT> dist, TT &median, TT &mad, bool isSorted=false) {
   computeMedianAbsoluteDeviation(dist.data(), dist.size(), median, mad, isSorted);
 }
 
@@ -334,7 +329,7 @@ public:
 
   void               add(uint64 data, uint32 count=1) {
     while (_histogramAlloc <= data)
-      resizeArray(_histogram, _histogramMax+1, _histogramAlloc, _histogramAlloc * 2, _raAct::copyData | _raAct::clearNew);
+      resizeArray(_histogram, _histogramMax+1, _histogramAlloc, _histogramAlloc * 2, resizeArray_copyData | resizeArray_clearNew);
 
     if (_histogramMax < data)
       _histogramMax = data;
@@ -352,6 +347,18 @@ public:
   uint64             median(void)           { finalizeData(); return(_median);   };
   uint64             mad(void)              { finalizeData(); return(_mad);      };
 
+#if 0
+  vector<uint64>    &histogram(void) {    //  Returns pointer to private histogram data
+    finalizeData();
+    return(_histogram);
+  };
+
+  vector<uint64>    &Nstatistics(void) {  //  Returns pointer to private N data
+    finalizeData();
+    return(_Nstatistics);
+  };
+#endif
+
   void               clearStatistics(void) {
     _numObjs  = 0;
 
diff --git a/ext/meryl/src/utility/src/utility/strings.C b/ext/meryl/src/utility/src/utility/strings.C
index 1d5b8c3..4147ee9 100644
--- a/ext/meryl/src/utility/src/utility/strings.C
+++ b/ext/meryl/src/utility/src/utility/strings.C
@@ -20,165 +20,208 @@
 #include "strings.H"
 #include "arrays.H"
 
-////////////////////////////////////////////////////////////
-//
-//  Strip whitespace from the end of a line.
-//
 
-void
-chomp(char *S) {
-  char *t = S;
+uint64
+scaledNumber(uint64 n, uint32 div) {
 
-  while (*t != 0)
-    t++;
+  if (n > 9999)   n /= div;
+  if (n > 9999)   n /= div;
+  if (n > 9999)   n /= div;
+  if (n > 9999)   n /= div;
+  if (n > 9999)   n /= div;
+  if (n > 9999)   n /= div;
+  if (n > 9999)   n /= div;
+  if (n > 9999)   n /= div;
+
+  return(n);
+}
 
-  t--;
 
-  while ((t >= S) && (isWhiteSpace(*t) == true))
-    *t-- = 0;
+char
+scaledUnit(uint64 n, uint32 div) {
+  char u = ' ';
+
+  if (n > 9999)  {  n /= div; u = 'k';  }
+  if (n > 9999)  {  n /= div; u = 'M';  }
+  if (n > 9999)  {  n /= div; u = 'G';  }
+  if (n > 9999)  {  n /= div; u = 'T';  }
+  if (n > 9999)  {  n /= div; u = 'P';  }
+  if (n > 9999)  {  n /= div; u = 'E';  }
+  if (n > 9999)  {  n /= div; u = 'Z';  }
+  if (n > 9999)  {  n /= div; u = 'Y';  }
+
+  return(u);
 }
 
 
 
+const char *
+scaledName(uint64 n, uint32 div) {
+  const char *s = "";
+
+  if (n > 9999)  {  n /= div; s = " thousand";     }
+  if (n > 9999)  {  n /= div; s = " million";      }
+  if (n > 9999)  {  n /= div; s = " billion";      }
+  if (n > 9999)  {  n /= div; s = " trillion";     }
+  if (n > 9999)  {  n /= div; s = " quadrillion";  }
+  if (n > 9999)  {  n /= div; s = " quintillion";  }
+  if (n > 9999)  {  n /= div; s = " sextillion";   }
+  if (n > 9999)  {  n /= div; s = " septillion";   }
+
+  return(s);
+}
+
 
-////////////////////////////////////////////////////////////
-//
-//  Convert a line into a key=value pair.
-//
 
 bool
-KeyAndValue::find(const char *line) {
-  char  *ptr = nullptr;
+decodeBoolean(char *value) {
+  bool ret = false;
+
+  switch (value[0]) {
+    case '0':
+    case 'f':
+    case 'F':
+    case 'n':
+    case 'N':
+      ret = false;
+      break;
+    case '1':
+    case 't':
+    case 'T':
+    case 'y':
+    case 'Y':
+      ret = true;
+      break;
+    default:
+      fprintf(stderr, "decodeBoolean()-- unrecognized value '%s'\n", value);
+      break;
+  }
 
-  //  Reset our state, but return fail if there is no line.
+  return(ret);
+}
 
-  _key = nullptr;
-  _val = nullptr;
 
-  if (isEmptyString(line) == true)
-    return(false);
 
-  //  Copy the string so we can do bad things to it.
+//  Returns true if a key and value are found.  line is modified.
+//  Returns true, with value == NULL, if no delimiter is found.
+//  Returns false if the line is blank, or is a comment.
+//
+bool
+KeyAndValue::find(char *line) {
 
-  duplicateArray(_line, _lineLen, _lineMax, line, (uint32)strlen(line) + 1);
+  key_ = NULL;
+  val_ = NULL;
 
-  //  Zip ahead until the first non-space letter.
-  //
-  //  If the letter is a comment or the delimiter, we're done; there is no key.
+  if (line == NULL)
+    return(false);
 
-  ptr = _line;
+  key_ = line;
 
-  while (isWhiteSpace(*ptr) == true)          //  Spaces before the key.
-    ptr++;
+  while  (isspace(*key_) == true)        //  Spaces before the key
+    key_++;
 
-  if ((*ptr == 0) ||
-      (isComment(*ptr) == true) ||
-      (isDelimiter(*ptr) == true))
+  if ((iscomment(*key_) == true) ||      //  If we're at a comment right now, there is no key
+      (*key_ == 0)) {                    //  and we return failure.
+    key_ = NULL;
+    val_ = NULL;
     return(false);
+  }
 
-  _key = ptr;
+  val_ = key_;                           //  We're at the key now
 
-  //  Keep zipping ahead until the end of the line.
-  //    Detect the first comment mark that is preceeded by a space.
-  //      Change it to NUL to terminate the string and return.
-  //
-  //    Detect the key=value delimiter.
-  //      Change it to a space so we can iterate over it.
-  //      lastspace must be set before this is changed.
+  while ((*val_ != 0) &&
+         (isdelimiter(*val_) == false))  //  The key cannot contain a delimiter.
+    val_++;
+
+  if (*val_ == 0) {                      //   If at the end of the string, there isn't a
+    val_ = NULL;                         //   value, but we'll return true and a key anyway.
+    return(true);
+  }
 
-  char *equals    = nullptr;
-  char *eol       = nullptr;
-  bool  lastspace = false;
+  *val_++ = 0;
 
-  while (1) {
-    eol = ptr;
+  while (isdelimiter(*val_) == true) {   //  Spaces or delimiter after the key
+    *val_ = 0;
+    val_++;
+  }
 
-    if ((lastspace == true) && (isComment(*ptr) == true)) {
-      *ptr = 0;
-      break;
-    }
+  if (*val_ == 0)                        //  And there is no value, must be a filename.
+    return(true);
 
-    lastspace = isWhiteSpace(*ptr);
+  char *eol = val_;                      //  We're at the value now
 
-    if ((isDelimiter(*ptr) == true) && (equals == nullptr)) {
-      *ptr = ' ';
-      equals = ptr;
-    }
+  //  If quoted, all we need to do is find the other quote and stop.
+  if ((*val_ == '"') ||
+      (*val_ == '\'')) {
+    val_++;
+    eol++;
 
-    if (*ptr == 0)
-      break;
+    while (*eol != '"')                  //  The value itself.
+      eol++;                             //  The value CAN contain delimiters and comment markers.
 
-    ptr++;
+    *eol = 0;
   }
 
-  //  If no delimiter, we're done.  There cannot be a key/value pair.
+  //  Otherwise, not quoted.  Find the first comment marker (or eol) then backup to the first non-space.
+  else {
+    while (iscomment(*eol) == false)     //  The value MUST NOT contain delimiters or comment markers.
+      eol++;                             //  But it can contains spaces and other nasties.
 
-  if (equals == nullptr)
-    return(false);
+    eol--;                               //  Back up off the comment or eol.
 
-  //  Cleanup 1:  Find the last letter in the key make the key stop there.
+    while (isspace(*eol) == true)        //  And keep backing up as long as we're a space.
+      eol--;
 
-  while (isWhiteSpace(*equals) == true)
-    equals--;
+    eol++;                               //  Move past the last non-space, non-comment
 
-  equals++;      //  Move from the last letter of the key.
-  *equals = 0;   //  Terminate the key string.
-  equals++;      //  Move to the next letter, either space or the value.
+    *eol = 0;                            //  And terminate the value
+  }
 
-  //  Cleanup 2: Find the first letter of the value.
-  //  If we're at eol now, return true with an empty value string.
+  return(true);
+}
 
-  while (isWhiteSpace(*equals) == true)
-    equals++;
 
-  _val = equals;
 
-  if (equals == eol)
-    return(true);
+splitToWords::splitToWords(const char *string, splitType type) {
+  _wordsLen  = 0;
+  _wordsMax  = 0;
+  _words     = NULL;
 
-  //  Cleanup 3: Find the last letter of the value and make the value stop
-  //  there.
+  _charsLen = 0;
+  _charsMax = 0;
+  _chars    = NULL;
 
-  assert(*eol == 0);
+  if (string)
+    split(string, type);
+}
 
-  eol--;
 
-  while (isWhiteSpace(*eol) == true) {
-    *eol = 0;
-    eol--;
-  }
 
-  return(true);
+splitToWords::~splitToWords() {
+  delete [] _chars;
+  delete [] _words;
 }
 
 
 
-////////////////////////////////////////////////////////////
-//
-//  Split the input 'line' into an array of words or path
-//  components.
-
 void
-splitToWords::split(const char *line, splitType type, char sep) {
+splitToWords::split(const char *line, splitType type) {
 
-  //  Initialize to no words and no characters.
-  //  Then return if the input line is empty.
+  _wordsLen = 0;        //  Initialize to no words
+  _charsLen = 0;        //  and no characters.
 
-  _wordsLen = 0;
-  _charsLen = 0;
-
-  if (isEmptyString(line) == true)
+  if (line == NULL)     //  Bail if there isn't a line to process.
     return;
 
   //  Count the number of words and chars in the input line, then make
   //  sure there is space for us to store them.
 
   while (line[_charsLen] != 0)
-    if (isSeparator(line[_charsLen++], type, sep))
+    if (isSeparator(line[_charsLen++], type))
       _wordsLen++;
 
-  resizeArray(_words, 0, _wordsMax, _wordsLen + 1);
-  resizeArray(_chars, 0, _charsMax, _charsLen + 1);
+  resizeArray(_words, 0, _wordsMax, _wordsLen + 1, resizeArray_doNothing);
+  resizeArray(_chars, 0, _charsMax, _charsLen + 1, resizeArray_doNothing);
 
   //  Clear all the words pointers, and copy the input line to our storage.
   //  This greatly simplifies the loop, as we don't need to worry about
@@ -193,37 +236,17 @@ splitToWords::split(const char *line, splitType type, char sep) {
   _wordsLen = 0;
 
   for (uint32 st=1, ii=0; ii < _charsLen; ii++) {
-    if (isSeparator(line[ii], type, sep)) {   //  If the character is a word
-      _chars[ii] = 0;                         //  separator, convert to NUL,
-      st         = true;                      //  and flag the next character
-    }                                         //  as the start of a new word.
-
-    else if (st) {                            //  Otherwise, if this is the
-      _words[_wordsLen++] = _chars + ii;      //  start of a word, make
-      st                  = false;            //  a new word.
+    if (isSeparator(line[ii], type)) {      //  If the character is a word
+      _chars[ii] = 0;                       //  separator, convert to NUL,
+      st         = true;                    //  and flag the next character
+    }                                       //  as the start of a new word.
+
+    else if (st) {                          //  Otherwise, if this is the
+      _words[_wordsLen++] = _chars + ii;    //  start of a word, make
+      st                  = false;          //  a new word.
     }
   }
 }
 
 
-void
-splitToWords::clear(void) {
-  _wordsLen = 0;
-  _charsLen = 0;
-}
-
-
-void
-splitToWords::erase(void) {
-
-  delete [] _words;
-  delete [] _chars;
-
-  _wordsLen = 0;
-  _wordsMax = 0;
-  _words    = nullptr;
 
-  _charsLen = 0;
-  _charsMax = 0;
-  _chars    = nullptr;
-}
diff --git a/ext/meryl/src/utility/src/utility/strings.H b/ext/meryl/src/utility/src/utility/strings.H
index 1a98000..2b9a078 100644
--- a/ext/meryl/src/utility/src/utility/strings.H
+++ b/ext/meryl/src/utility/src/utility/strings.H
@@ -25,75 +25,144 @@
 #include <set>
 #include <vector>
 
+using namespace std;
 
-//  Some string cleanup functions.
-//
 
-void   chomp(char *S);    //  Remove whitespace from the end of a line.
+//  perl's chomp is pretty nice
+//  Not a great place to put this, but it's getting used all over.
+#ifndef chomp
+#define chomp(S)  { char *t=(S); while (*t) t++; t--; while (t >= S && isspace(*t)) *t--=0; }
+#endif
 
+#ifndef munch
+#define munch(S)  { while (*(S) &&  isspace(*(S))) (S)++; }
+#endif
 
-//  Basic string functions.
+#ifndef crunch
+#define crunch(S) { while (*(S) && !isspace(*(S))) (S)++; }
+#endif
+
+
+
+//  For pretty-printing numbers.  Converts integers to, e.g., 123 k, 123 M, G, T, P.
+uint64      scaledNumber(uint64 n, uint32 div=1024);
+char        scaledUnit  (uint64 n, uint32 div=1024);
+const char *scaledName  (uint64 n, uint32 div=1024);
+
+
+
+
+
+template<typename T>
+char *
+decodeRange(char *range, T &lo, T &hi) {
+  char    *ap = range;
+
+  strtonumber(lo, ap, &ap);     //  Grab the first number.
+
+  hi = lo;                      //  Set the second to that.
+
+  if ((*ap == '-') ||           //  If this is a range,
+      (*ap == '/')) {           //  or a one-of-many selection,
+    ap++;                       //  grab the second number
+    strtonumber(hi, ap, &ap);
+  }
+
+  if (*ap == ',')               //  If the next letter continues
+    return(ap + 1);             //  move past that and return.
+
+  if (*ap == 0)                 //  If the next letter is the end
+    return(NULL);               //  of the string, return NULL.
+
+  //  Otherwise, we can't decode this range.
+
+  fprintf(stderr, "ERROR: invalid range '%s'\n", range);
+  exit(1);
+
+  return(NULL);
+}
+
+
+
+template<typename T>
+void
+decodeRange(char *range, vector<T> &bgn, vector<T> &end) {
+  char    *ap = range;
+  T        av = 0;
+  T        bv = 0;
+
+  while ((ap != NULL) && (*ap != 0)) {
+    ap = decodeRange(ap, av, bv);
+
+    bgn.push_back(av);
+    end.push_back(bv);
+  }
+}
+
+
+
+template<typename T>
+void
+decodeRange(char *range, set<T> &ranges) {
+  char    *ap = range;
+  T        av = 0;
+  T        bv = 0;
+
+  while ((ap != NULL) && (*ap != 0)) {
+    ap = decodeRange(ap, av, bv);
+
+    for (T xx=av; xx<=bv; xx++)
+      ranges.insert(xx);
+  }
+}
+
+
+
+//  Decodes a string with 0/1, false/true, no/yes into an integer flag.
+bool   decodeBoolean(char *value);
 
-inline
-bool
-isEmptyString(char const *s)   { return((s == nullptr) || (s[0] == 0)); }
 
-//  Convert a line into a key-value pair.
-//
-//  The line should be of the form:
-//   - 'key'           find() returns true with value == nullptr
-//   - 'key = value'   find() returns true
-//   - 'key : value'   find() returns true
-//
-//  In all cases the line is modified by removing whitespace and inserting
-//  NUL characters at the end of the key and value.
-//
-//  Comments are accepted with either '!' or '#' at the start
-//  of the line, or preceeded by a white-space letter.
 
 class KeyAndValue {
 public:
-  KeyAndValue(const char *line = nullptr)   {  find(line);       };
-  ~KeyAndValue()                            {  delete [] _line;  };
+  KeyAndValue(char *line = NULL)   {  find(line);  };
+  ~KeyAndValue()                   {               };
 
-public:
-  bool     find(const char *line);
+  bool     find(char *line);
 
-public:
-  char    *key(void)           { return(_key); };
-  char    *value(void)         { return(_val); };
+  char    *key(void)           { return(key_); };
+  char    *value(void)         { return(val_); };
 
-private:
-  uint32  _lineMax = 0;
-  uint32  _lineLen = 0;
-  char   *_line    = nullptr;
+  bool     value_bool(void)    { return(decodeBoolean(val_)); };
+
+  int32    value_int32(void)   { return(strtol (val_, NULL, 10)); };
+  int64    value_int64(void)   { return(strtoll(val_, NULL, 10)); };
 
-  char   *_key     = nullptr;
-  char   *_val     = nullptr;
+  uint32   value_uint32(void)  { return(strtoul (val_, NULL, 10)); };
+  uint64   value_uint64(void)  { return(strtoull(val_, NULL, 10)); };
+
+  float    value_float(void)   { return(strtof(val_, NULL)); };
+  double   value_double(void)  { return(strtod(val_, NULL)); };
+
+public:
+  bool    iscomment(char c)    { return((c == '!') || (c == '#') || (c == 0)); };
+  bool    isdelimiter(char c)  { return((c == ':') || (c == '=') || isspace(c)); };
+
+  char   *key_;
+  char   *val_;
 };
 
-//  Split the input 'line' into an array of words or path
-//  components.
+
 
 enum splitType {
   splitWords  = 0,
-  splitPaths  = 1,
-  splitLetter = 2
+  splitPaths  = 1
 };
 
 class splitToWords {
 public:
-  splitToWords(const char *string=nullptr, splitType type=splitWords, char sep=0) {
-    split(string, type, sep);
-  };
-
-  ~splitToWords() {
-    erase();
-  };
-
-  void    split(const char *line, splitType type=splitWords, char sep=0);
-  void    clear(void);   //  Remove the words, but leave memory intact.
-  void    erase(void);   //  Remove words and free memory.
+  splitToWords(const char *string=NULL, splitType type=splitWords);
+  ~splitToWords();
 
 private:
   bool   isPath(char c) {
@@ -107,30 +176,32 @@ private:
            (c == '\r'));
   };
 
-  bool   isSeparator(char c, splitType type, char sep) {
-    return(((type == splitWords)  && (isSpace(c))) ||
-           ((type == splitPaths)  && (isPath (c))) ||
-           ((type == splitLetter) && (sep  == c)));
+  bool   isSeparator(char c, splitType type) {
+    return(((type == splitWords) && (isSpace(c))) ||
+           ((type == splitPaths) && (isPath(c))));
   };
 
 public:
+  void    split(const char *line, splitType type=splitWords);
+
   uint32  numWords(void)        { return(_wordsLen); };
 
   char   *operator[](uint32 i)  { return(first(i)); };
 
-  char   *first(uint32 i=0)     { return((_wordsLen <= i) ? nullptr : _words[i]);  };
-  char   *last(uint32 i=0)      { return((_wordsLen == 0) ? nullptr : _words[_wordsLen - i - 1]); };
-  char   *pop(void)             { return((_wordsLen == 0) ? nullptr : _words[--_wordsLen]);       };
+  char   *first(uint32 i=0)     { return((_wordsLen <= i) ? NULL : _words[i]);  };
   char   *shift(void)           {
     if (_wordsLen == 0)                     //  If no words, nothing to return.
-      return(nullptr);
+      return(NULL);
 
     for (uint32 ii=1; ii<_wordsLen; ii++)   //  Shift all words down one place, moving
-      std::swap(_words[ii-1], _words[ii]);  //  the word to shift off to the end.
+      swap(_words[ii-1], _words[ii]);       //  the word to shift off to the end.
 
     return(_words[--_wordsLen]);            //  Return the word we shifted out.
   };
 
+  char   *last(uint32 i=0)      { return((_wordsLen == 0) ? NULL : _words[_wordsLen - i - 1]); };
+  char   *pop(void)             { return((_wordsLen == 0) ? NULL : _words[--_wordsLen]);       };
+
   int32   toint32(uint32 i)     { return(strtoint32 (_words[i])); };
   uint32  touint32(uint32 i)    { return(strtouint32(_words[i])); };
   int64   toint64(uint32 i)     { return(strtoint64 (_words[i])); };
@@ -138,13 +209,13 @@ public:
   double  todouble(uint32 i)    { return(strtodouble(_words[i])); };
 
 private:
-  uint32    _wordsLen = 0;         //  An array of pointers into _chars
-  uint32    _wordsMax = 0;         //  for the words in the string.
-  char    **_words    = nullptr;
+  uint32    _wordsLen;
+  uint32    _wordsMax;
+  char    **_words;
 
-  uint32    _charsLen = 0;         //  A modified copy of the
-  uint32    _charsMax = 0;         //  input string.
-  char     *_chars    = nullptr;
+  uint32    _charsLen;
+  uint32    _charsMax;
+  char     *_chars;
 };
 
 
diff --git a/ext/meryl/src/utility/src/utility/sweatShop.C b/ext/meryl/src/utility/src/utility/sweatShop.C
index eb1055e..1169493 100644
--- a/ext/meryl/src/utility/src/utility/sweatShop.C
+++ b/ext/meryl/src/utility/src/utility/sweatShop.C
@@ -48,17 +48,15 @@ public:
 class sweatShopState {
 public:
   sweatShopState(void *userData) {
-    _user      = userData;
-    _computed  = false;
-    _outputted = false;
-    _next      = 0L;
+    _user     = userData;
+    _computed = false;
+    _next     = 0L;
   };
   ~sweatShopState() {
   };
 
   void             *_user;
   bool              _computed;
-  bool              _outputted;
   sweatShopState   *_next;
 };
 
@@ -107,7 +105,6 @@ sweatShop::sweatShop(void*(*loaderfcn)(void *G),
   _loaderP          = 0L;
 
   _showStatus       = false;
-  _writeInOrder     = true;
 
   _loaderQueueSize  = 1024;
   _loaderQueueMax   = 10240;
@@ -149,7 +146,7 @@ sweatShop::setThreadData(uint32 t, void *x) {
 //  Build a list of states to add in one swoop
 //
 void
-sweatShop::loaderAddToLocal(sweatShopState *&tail, sweatShopState *&head, sweatShopState *thisState) {
+sweatShop::loaderSave(sweatShopState *&tail, sweatShopState *&head, sweatShopState *thisState) {
 
   thisState->_next  = 0L;
 
@@ -159,13 +156,14 @@ sweatShop::loaderAddToLocal(sweatShopState *&tail, sweatShopState *&head, sweatS
   } else {
     tail = head = thisState;
   }
+  _numberLoaded++;
 }
 
 
 //  Add a bunch of new states to the queue.
 //
 void
-sweatShop::loaderAppendToGlobal(sweatShopState *&tail, sweatShopState *&head, uint32 num) {
+sweatShop::loaderAppend(sweatShopState *&tail, sweatShopState *&head) {
   int err;
 
   if ((tail == 0L) || (head == 0L))
@@ -184,8 +182,6 @@ sweatShop::loaderAppendToGlobal(sweatShopState *&tail, sweatShopState *&head, ui
   }
   _loaderP        = head;
 
-  _numberLoaded += num;
-
   err = pthread_mutex_unlock(&_stateMutex);
   if (err != 0)
     fprintf(stderr, "sweatShop::loaderAppend()--  Failed to unlock mutex (%d).  Fail.\n", err), exit(1);
@@ -198,52 +194,56 @@ sweatShop::loaderAppendToGlobal(sweatShopState *&tail, sweatShopState *&head, ui
 
 void*
 sweatShop::loader(void) {
-  struct timespec   naptime;
-  sweatShopState   *tail       = nullptr;  //  A local list, to reduce the number of times we
-  sweatShopState   *head       = nullptr;  //  lock the global list.
-  uint32            numLoaded  = 0;
 
+  struct timespec   naptime;
   naptime.tv_sec      = 0;
   naptime.tv_nsec     = 166666666ULL;  //  1/6 second
 
-  while (1) {
-    void *object = NULL;
+  //  We can batch several loads together before we push them onto the
+  //  queue, this should reduce the number of times the loader needs to
+  //  lock the queue.
+  //
+  //  But it also increases the latency, so it's disabled by default.
+  //
+  sweatShopState        *tail       = 0L;  //  The first thing loaded
+  sweatShopState        *head       = 0L;  //  The last thing loaded
+  uint32                 numLoaded  = 0;
+
+  bool  moreToLoad = true;
+
+  while (moreToLoad) {
 
-    while (_numberLoaded > _numberComputed + _loaderQueueSize)  //  Sleep if the queue is too big.
+    //  Zzzzzzz....
+    while (_numberLoaded > _numberComputed + _loaderQueueSize)
       nanosleep(&naptime, 0L);
 
-    //  If a userLoader function exists, use it to load the data object, then
-    //  make a new state for that object.
+    void *object = NULL;
 
     if (_userLoader)
       object = (*_userLoader)(_globalUserData);
 
     sweatShopState  *thisState = new sweatShopState(object);
 
-    //  If there is no user pointer, we've run out of inputs.
-    //  Push on the empty state to the local list, force an append
-    //  to the global list, and exit this loader function.
-
-    if (thisState->_user == nullptr) {
-      loaderAddToLocal(tail, head, thisState);
-      loaderAppendToGlobal(tail, head, numLoaded + 1);
-
-      return(nullptr);
-    }
-
-    //  Otherwise, we've loaded a user object.  Push it onto the local list,
-    //  then merge into the global list if the local list is long enough.
-
-    loaderAddToLocal(tail, head, thisState);
-    numLoaded++;
-
-    if (numLoaded >= _loaderBatchSize) {
-      loaderAppendToGlobal(tail, head, numLoaded);
-      numLoaded = 0;
+    //  If we actually loaded a new state, add it
+    //
+    if (thisState->_user) {
+      loaderSave(tail, head, thisState);
+      numLoaded++;
+      if (numLoaded >= _loaderBatchSize)
+        loaderAppend(tail, head);
+    } else {
+      //  Didn't read, must be all done!  Push on the end-of-input marker state.
+      //
+      loaderSave(tail, head, new sweatShopState(0L));
+      loaderAppend(tail, head);
+
+      moreToLoad = false;
+      delete thisState;
     }
   }
 
-  return(nullptr);  //  Never returns.
+  //fprintf(stderr, "sweatShop::reader exits.\n");
+  return(0L);
 }
 
 
@@ -286,7 +286,7 @@ sweatShop::worker(sweatShopWorker *workerData) {
 
     err = pthread_mutex_unlock(&_stateMutex);
     if (err != 0)
-      fprintf(stderr, "sweatShop::worker()--  Failed to lock mutex (%d).  Fail.\n", err), exit(1);
+      fprintf(stderr, "sweatShop::worler()--  Failed to lock mutex (%d).  Fail.\n", err), exit(1);
 
 
     if (workerData->workerQueueLen == 0) {
@@ -324,68 +324,45 @@ sweatShop::worker(sweatShopWorker *workerData) {
 }
 
 
-void
-sweatShop::writerWrite(sweatShopState *w) {
-
-  if (_userWriter)
-    (*_userWriter)(_globalUserData, w->_user);
-  _numberOutput++;
-
-  w->_outputted = true;
-}
-
-
 void*
 sweatShop::writer(void) {
   sweatShopState  *deleteState = 0L;
-  struct timespec naptime1 = { .tv_sec = 0, .tv_nsec = 5000000ULL };
-  struct timespec naptime2 = { .tv_sec = 0, .tv_nsec = 5000000ULL };
-
-
-  while ((_writerP        != nullptr) &&
-         (_writerP->_user != nullptr)) {
-
-    //  If a complete result, write it.
-    if ((_writerP->_computed  == true) &&
-        (_writerP->_outputted == false)) {
-      writerWrite(_writerP);
-      continue;
-    }
 
-    //  If we can write output out-of-order, search ahead
-    //  for any results and output them.
-    //  if (_outOfOrder == true)
-    if (_writeInOrder == false) {
-      for (sweatShopState *ss = _writerP; ss != nullptr; ss = ss->_next)
-        if ((ss->_computed  == true) &&
-            (ss->_outputted == false)) {
-          writerWrite(ss);
-        }
-    }
+  //  Wait for output to appear, then write.
+  //
+  while (_writerP && _writerP->_user) {
 
-    //  If no next, wait for input to appear.  We can't purge this node
-    //  from the list until there is a next, else we lose the list!
-    if (_writerP->_next == nullptr) {
-      nanosleep(&naptime1, 0L);
-      continue;
-    }
+    if        (_writerP->_computed == false) {
+      //  Wait for a slow computation.
+      struct timespec   naptime;
+      naptime.tv_sec      = 0;
+      naptime.tv_nsec     = 5000000ULL;
 
-    //  If already output, remove the node.
-    if (_writerP->_outputted == true) {
-      sweatShopState *ds = _writerP;
-      _writerP           = _writerP->_next;
+      //fprintf(stderr, "Writer waits for slow thread at " F_U64 ".\n", _numberOutput);
+      nanosleep(&naptime, 0L);
+    } else if (_writerP->_next == 0L) {
+      //  Wait for the input.
+      struct timespec   naptime;
+      naptime.tv_sec      = 0;
+      naptime.tv_nsec     = 5000000ULL;
 
-      delete ds;
-      continue;
+      //fprintf(stderr, "Writer waits for all threads at " F_U64 ".\n", _numberOutput);
+      nanosleep(&naptime, 0L);
+    } else {
+      if (_userWriter)
+        (*_userWriter)(_globalUserData, _writerP->_user);
+      _numberOutput++;
+
+      deleteState = _writerP;
+      _writerP    = _writerP->_next;
+      delete deleteState;
     }
-
-    //  Otherwise, we need to wait for a state to appear on the queue.
-    nanosleep(&naptime2, 0L);
   }
 
   //  Tell status to stop.
   _writerP = 0L;
 
+  //fprintf(stderr, "sweatShop::writer exits.\n");
   return(0L);
 }
 
diff --git a/ext/meryl/src/utility/src/utility/sweatShop.H b/ext/meryl/src/utility/src/utility/sweatShop.H
index 223dedf..ae62583 100644
--- a/ext/meryl/src/utility/src/utility/sweatShop.H
+++ b/ext/meryl/src/utility/src/utility/sweatShop.H
@@ -49,8 +49,6 @@ public:
 
   void        setWriterQueueSize(uint32 queueSize) { _writerQueueSize = queueSize;  _writerQueueMax = queueSize; };
 
-  void        setInOrderOutput(bool o)             { _writeInOrder = o; };
-
   void        run(void *user=0L, bool beVerbose=false);
 private:
 
@@ -67,11 +65,9 @@ private:
   void   *status(void);
 
   //  Utilities for the loader thread
-  void    loaderAddToLocal(sweatShopState *&tail, sweatShopState *&head, sweatShopState *thisState);
-  void    loaderAppendToGlobal(sweatShopState *&tail, sweatShopState *&head, uint32 num);
-
-  //  Utilities for the writer thread
-  void    writerWrite(sweatShopState *w);
+  //void    loaderAdd(sweatShopState *thisState);
+  void    loaderSave(sweatShopState *&tail, sweatShopState *&head, sweatShopState *thisState);
+  void    loaderAppend(sweatShopState *&tail, sweatShopState *&head);
 
   pthread_mutex_t        _stateMutex;
 
@@ -86,7 +82,6 @@ private:
   sweatShopState        *_loaderP;  //  Where input is put, the head
 
   bool                   _showStatus;
-  bool                   _writeInOrder;
 
   uint32                 _loaderQueueSize, _loaderQueueMin, _loaderQueueMax;
   uint32                 _loaderBatchSize;
diff --git a/ext/meryl/src/utility/src/utility/system-stackTrace.C b/ext/meryl/src/utility/src/utility/system-stackTrace.C
index 456ec7d..d391538 100644
--- a/ext/meryl/src/utility/src/utility/system-stackTrace.C
+++ b/ext/meryl/src/utility/src/utility/system-stackTrace.C
@@ -161,6 +161,10 @@ AS_UTL_catchCrash(int sig_num, siginfo_t *UNUSED(info), void *UNUSED(ctx)) {
 
 #include "backward.hpp"
 
+//namespace backward {
+//backward::SignalHandling sh;
+//} // namespace backward
+
 void
 AS_UTL_catchCrash(int sig_num, siginfo_t *UNUSED(info), void *UNUSED(ctx)) {
 
@@ -212,6 +216,17 @@ AS_UTL_catchCrash(int sig_num, siginfo_t *UNUSED(info), void *UNUSED(ctx)) {
 
   }
 
+#if 0
+  backward::Printer p;
+
+  p.snippet = true;
+  p.object  = true;
+  p.color   = false;
+  p.address = true;
+
+  p.print(st);
+#endif
+
   //  Pass the signal through, only so a core file can get generated.
 
   struct sigaction sa;
diff --git a/ext/meryl/src/utility/src/utility/system.C b/ext/meryl/src/utility/src/utility/system.C
index 38886ff..faab801 100644
--- a/ext/meryl/src/utility/src/utility/system.C
+++ b/ext/meryl/src/utility/src/utility/system.C
@@ -33,6 +33,10 @@
 #include "jemalloc/jemalloc.h"
 #endif
 
+#if !defined(__CYGWIN__) && !defined(_WIN32)
+#include <sys/sysctl.h>
+#endif
+
 
 
 double
@@ -99,7 +103,7 @@ getProcessTime(void) {
   double         tm = 0;
 
   if (gettimeofday(&tp, NULL) == 0)
-    tm  = tp.tv_sec + tp.tv_usec / 1000000.0;
+    tm  = tp.tv_sec + tp.tv_usec / 100000.0;
 
   if (st == 0.0)
     st = tm;
@@ -129,7 +133,7 @@ getProcessSize(void) {
 uint64
 getProcessSizeLimit(void) {
   struct rlimit rl;
-  uint64        sz = uint64max;
+  uint64        sz = ~uint64ZERO;
 
   if (getrlimit(rl) == true)
     sz = rl.rlim_cur;
@@ -162,145 +166,56 @@ getBytesAllocated(void) {
 
 
 
-uint64
-getPhysicalMemorySize(void) {
-  uint64  physPages  = sysconf(_SC_PHYS_PAGES);
-  uint64  pageSize   = sysconf(_SC_PAGESIZE);
-  uint64  physMemory = physPages * pageSize;
-
-  return(physMemory);
-}
-
+#ifdef HW_PHYSMEM
 
+//  MacOS, FreeBSD
 
-//  Return the size of a page of memory.  Every OS we care about (MacOS,
-//  FreeBSD, Linux) claims to have getpagesize().
-//
 uint64
-getPageSize(void) {
-  return(getpagesize());
-}
-
-
+getPhysicalMemorySize(void) {
+  uint64  physMemory = 0;
 
-//  Query the machine or the environment to find any memory size limit.  If
-//  there is no environment limit, the physical memory size is returned.
-//
-//  Slurm variables (from sbatch man page).
-//    SLURM_MEM_PER_CPU
-//      Set if --mem-per-cpu is supplied to sbatch.
-//      "SLURM_MEM_PER_CPU=2048" for a request of --mem-per-cpu=2g
-//
-//    SLURM_MEM_PER_NODE
-//      Set if --mem is supplied to sbatch.
-//      "SLURM_MEM_PER_NODE=5120" for a request of --mem=5g
-//
-//    SLURM_MEM_PER_GPU
-//      Requested memory per allocated GPU.
-//        Only set if the --mem-per-gpu option is specified.
-//        Not checked for below.
-//
-//  There doesn't appear to be a comparable environment variable for SGE.
-//
-//  PBS/OpenPBS/PBS Pro variables.
-//    PBS_RESC_MEM
-//    TORQUE_RESC_MEM  (probably obsolete)
-//      Potentially memory in bytes.
-//
-//
-uint64
-getMaxMemoryAllowed(void) {
-  char    *env;
-  uint64   maxmem = getPhysicalMemorySize();
+  int     mib[2] = { CTL_HW, HW_PHYSMEM };
+  size_t  len    = sizeof(uint64);
 
-  env = getenv("SLURM_MEM_PER_CPU");
-  if (env)
-    maxmem = getMaxThreadsAllowed() * strtouint64(env) * 1024 * 1024;
+  errno = 0;
 
-  env = getenv("SLURM_MEM_PER_NODE");
-  if (env)
-    maxmem = strtouint64(env) * 1024 * 1024;
+  if (sysctl(mib, 2, &physMemory, &len, NULL, 0) != 0)
+    fprintf(stderr, "getPhysicalMemorySize()-- sysctl() failed to return CTL_HW, HW_PHYSMEM: %s\n", strerror(errno)), exit(1);
 
-  env = getenv("PBS_RESC_MEM");
-  if (env)
-    maxmem = strtouint64(env);
+  if (len != sizeof(uint64)) {
+#ifdef HW_MEMSIZE
+    mib[1] = HW_MEMSIZE;
+    len = sizeof(uint64);
+    if (sysctl(mib, 2, &physMemory, &len, NULL, 0) != 0 || len != sizeof(uint64))
+#endif
+      fprintf(stderr, "getPhysicalMemorySize()-- sysctl() failed to return CTL_HW, HW_PHYSMEM: %s\n", strerror(errno)), exit(1);
+  }
 
-  return(maxmem);
+  return(physMemory);
 }
 
+#else
 
+//  Linux, FreeBSD
 
-//  There is a bit of a race condition in here.  On our grid, at least, a
-//  multi-cpu interactive job sets both SLURM_JOB_CPUS_PER_NODE and
-//  OMP_NUM_THREADS - but sets the former to the correct value and the
-//  latter to one.
-//
-//  Because of this, we let the grid variables overwrite the OpenMP
-//  variable, and further reset OpenMP to use whatever the grid has
-//  told us to use.
-//
-//  OpenMP variables.
-//    OMP_NUM_THREADS
-//     - we don't query this, and instead use omp_get_max_threads(),
-//       because if OMP_NUM_THREADS isn't set, the function will
-//       return the number of CPUs on the host.
-//
-//  Slurm variables (from sbatch man page).
-//    SLURM_CPUS_ON_NODE
-//     - Number of CPUS on the allocated node.
-//
-//    SLURM_JOB_CPUS_PER_NODE
-//     - --cpus-per-node
-//     - Count of processors available to the job on this node. Note the
-//       select/linear plugin allocates entire nodes to jobs, so the value
-//       indicates the total count of CPUs on the node. The select/cons_res
-//       plugin allocates individual processors to jobs, so this number
-//       indicates the number of processors on this node allocated to the
-//       job.
-//
-//    SLURM_JOB_NUM_NODES
-//     - total number of nodes in the job's resource allocation
-//
-//  PBS/OpenPBS/PBS Pro variables (from Torque 9.0.3).
-//    PBS_NUM_NODES    - Number of nodes allocated to the job
-//    PBS_NUM_PPN      - Number of procs per node allocated to the job
-//    PBS_NCPUS        - (older version of PBS_NUM_PPN?)
-//    PBS_NP           - Number of execution slots (cores) for the job
-//    TORQUE_RESC_PROC - (can't find any doc on this)
-//
-//  SGE variables.
-//    NSLOTS
-//
-uint32
-getMaxThreadsAllowed(void) {
-  char    *env;
-  uint32   nAllowed = omp_get_max_threads();
-
-  env = getenv("SLURM_JOB_CPUS_PER_NODE");
-  if (env)
-    nAllowed = strtouint32(env);
-
-  env = getenv("PBS_NCPUS");
-  if (env)
-    nAllowed = strtouint32(env);
-
-  env = getenv("PBS_NUM_PPN");
-  if (env)
-    nAllowed = strtouint32(env);
+uint64
+getPhysicalMemorySize(void) {
+  uint64  physPages  = sysconf(_SC_PHYS_PAGES);
+  uint64  pageSize   = sysconf(_SC_PAGESIZE);
+  uint64  physMemory = physPages * pageSize;
 
-  env = getenv("NSLOTS");
-  if (env)
-    nAllowed = strtouint32(env);
+  return(physMemory);
+}
 
-  omp_set_num_threads(nAllowed);
+#endif
 
-  return(nAllowed);
-}
 
 
 
-uint32
-getNumThreadsActive(void) {
-  return(omp_get_num_threads());
+//  Return the size of a page of memory.  Every OS we care about (MacOS, FreeBSD, Linux)
+//  claims to have getpagesize().
+//
+uint64
+getPageSize(void) {
+  return(getpagesize());
 }
-
diff --git a/ext/meryl/src/utility/src/utility/system.H b/ext/meryl/src/utility/src/utility/system.H
index 0df676c..088541e 100644
--- a/ext/meryl/src/utility/src/utility/system.H
+++ b/ext/meryl/src/utility/src/utility/system.H
@@ -40,11 +40,6 @@ uint64   getPhysicalMemorySize(void);
 
 uint64   getPageSize(void);
 
-uint64   getMaxMemoryAllowed(void);
-
-uint32   getMaxThreadsAllowed(void);
-uint32   getNumThreadsActive(void);
-
 
 void  AS_UTL_catchCrash(int sig_num, siginfo_t *info, void *ctx);
 
diff --git a/ext/meryl/src/utility/src/utility/types.C b/ext/meryl/src/utility/src/utility/types.C
index ff8c750..586e2b2 100644
--- a/ext/meryl/src/utility/src/utility/types.C
+++ b/ext/meryl/src/utility/src/utility/types.C
@@ -18,516 +18,82 @@
  */
 
 #include "types.H"
-#include "strings.H"
 
-
-////////////////////////////////////////////////////////////
-//
-//  Sadly, there is no equivalent of strtoull() for 128-bit integers, so I
-//  provide my own.  Only base 10 is supported.  Overflow isn't handled.
-//
-//  The obvious implementation of strtollll() -- that being to sum all the
-//  digits and then negate the sum for negative values -- doesn't handle
-//  int128min technically correct.  It ends up overflowing the (positive)
-//  int128 by one.  This results in int128min.  Fortunately, int128min ==
-//  -int128min, and the negation done by 'neg' doesn't do anything.  As
-//  implemented below, though, we instead add or subtract each new digit,
-//  giving us overflow-free results (but a little slower).
-//
-
-uint128
-strtoullll(char const *nptr, char **endptr) {
-  uint128     res = 0;
-  char const *ptr = nptr;
-
-  if (isEmptyString(ptr))
-    return(res);
-
-  while ((*ptr != 0) && (isWhiteSpace(*ptr) == true))
-    ptr++;
-
-  while ((*ptr != 0) && (isDecDigit(*ptr) == true)) {
-    res *= 10;
-    res += asciiDecToInteger(*ptr);
-
-    ptr++;
-  }
-
-  if (endptr)
-    *endptr = (char *)ptr;
-  return(res);
-}
-
-int128
-strtollll(char const *nptr, char **endptr) {
-  int128      res = 0;
-  bool        neg = false;
-  char const *ptr = nptr;
-
-  if (isEmptyString(ptr))
-    return(res);
-
-  while ((*ptr != 0) && (isWhiteSpace(*ptr) == true))
-    ptr++;
-
-  switch (*ptr) {
-    case '-':  ptr++;  neg = true;  break;
-    case '+':  ptr++;               break;
-    default:                        break;
-  }
-
-  while ((*ptr != 0) && (isDecDigit(*ptr) == true)) {
-    res *= 10;
-
-    if (neg == false)
-      res += asciiDecToInteger(*ptr);
-    else
-      res -= asciiDecToInteger(*ptr);
-
-    ptr++;
-  }
-
-  if (endptr)
-    *endptr = (char *)ptr;
-  return(res);
-}
-
-
-
-////////////////////////////////////////////////////////////
-//
-//  Test if a string is a number in the desired encoding.
+//  In hex, a 128-bit integer needs 32 digits.
+//  In dec, a 128-bit integer needs 39 digits.  (it is 340,282,366,920,938,463,463,374,607,431,768,211,456)
 //
+//  We'll just allocate 64 digits and be done with it.  Until we want to
+//  print that 128-bit integer as binary.  (that we overallocate space makes
+//  conversion to decimal a little bit easier)
 
-bool
-isBinNumber(char const *s) {
-  if (isEmptyString(s) == true)
-    return(false);
-
-  for (uint32 ii=0; s[ii] != 0; ii++)
-    if (isBinDigit(s[ii]) == false)
-      return(false);
-
-  return(true);
-}
-
-
-bool
-isOctNumber(char const *s) {
-  if (isEmptyString(s) == true)
-    return(false);
-
-  for (uint32 ii=0; s[ii] != 0; ii++)
-    if (isOctDigit(s[ii]) == false)
-      return(false);
-
-  return(true);
-}
-
-
-bool
-isDecNumber(char const *s, char dot) {
-  if (isEmptyString(s) == true)
-    return(false);
-
-  for (uint32 ii=0; s[ii] != 0; ii++)
-    if ((isDecDigit(s[ii]) == false) && (s[ii] != dot))
-      return(false);
-
-  return(true);
-}
-
-
-bool
-isHexNumber(char const *s) {
-  if (isEmptyString(s) == true)
-    return(false);
-
-  for (uint32 ii=0; s[ii] != 0; ii++)
-    if (isHexDigit(s[ii]) == false)
-      return(false);
-
-  return(true);
-}
-
-
-
-////////////////////////////////////////////////////////////
-//
-//  Convert a string of numbers to a pair of numbers, a vector of ranges, or
-//  a set of values.
-//
-
-template<typename numberType>
-char const *
-decodeRange(char const *range, numberType &bgn, numberType &end) {
-  char const    *ap = range;
-
-  ap = strtonumber(ap, bgn);       //  Grab the first number.
-
-  end = bgn;                       //  Set the second to that.
-
-  if ((*ap == '-') ||              //  If this is a range,
-      (*ap == '/'))                //  or a one-of-many selection,
-    ap = strtonumber(ap+1, end);   //  grab the second number
-
-  if (*ap == ',')                  //  If the next letter continues
-    return(ap + 1);                //  move past that and return.
-
-  if (*ap == 0)                    //  If the next letter is the end
-    return(nullptr);               //  of the string, return nullptr.
-
-  //  Otherwise, we can't decode this range.
-
-  fprintf(stderr, "ERROR: invalid range '%s'\n", range);
-  exit(1);
-
-  return(nullptr);
-}
-
-
-template<typename numberType>
-void
-decodeRange(char const *range, std::vector<numberType> &bgn, std::vector<numberType> &end) {
-  char const  *ap = range;
-  numberType   av = 0;
-  numberType   bv = 0;
-
-  while (isEmptyString(ap) == false) {
-    ap = decodeRange(ap, av, bv);
-
-    bgn.push_back(av);
-    end.push_back(bv);
-  }
-}
-
-
-template<typename numberType>
-void
-decodeRange(char const *range, std::set<numberType> &values) {
-  char const  *ap = range;
-  numberType   av = 0;
-  numberType   bv = 0;
-
-  while (isEmptyString(ap) == false) {
-    ap = decodeRange(ap, av, bv);
-
-    for (numberType xx=av; xx<=bv; xx++)
-      values.insert(xx);
-  }
-}
-
-
-template  char const *decodeRange<uint128>(char const *range, uint128 &bgn, uint128 &end);
-template  char const *decodeRange <int128>(char const *range,  int128 &bgn,  int128 &end);
-template  char const *decodeRange<uint64> (char const *range, uint64  &bgn, uint64  &end);
-template  char const *decodeRange <int64> (char const *range,  int64  &bgn,  int64  &end);
-template  char const *decodeRange<uint32> (char const *range, uint32  &bgn, uint32  &end);
-template  char const *decodeRange <int32> (char const *range,  int32  &bgn,  int32  &end);
-template  char const *decodeRange<uint16> (char const *range, uint16  &bgn, uint16  &end);
-template  char const *decodeRange <int16> (char const *range,  int16  &bgn,  int16  &end);
-template  char const *decodeRange<uint8>  (char const *range, uint8   &bgn, uint8   &end);
-template  char const *decodeRange <int8>  (char const *range,  int8   &bgn,  int8   &end);
-template  char const *decodeRange<double> (char const *range, double  &bgn, double  &end);
-
-template  void  decodeRange<uint128>(char const *range, std::vector<uint128> &bgn, std::vector<uint128> &end);
-template  void  decodeRange <int128>(char const *range, std::vector <int128> &bgn, std::vector <int128> &end);
-template  void  decodeRange<uint64> (char const *range, std::vector<uint64>  &bgn, std::vector<uint64>  &end);
-template  void  decodeRange <int64> (char const *range, std::vector <int64>  &bgn, std::vector <int64>  &end);
-template  void  decodeRange<uint32> (char const *range, std::vector<uint32>  &bgn, std::vector<uint32>  &end);
-template  void  decodeRange <int32> (char const *range, std::vector <int32>  &bgn, std::vector <int32>  &end);
-template  void  decodeRange<uint16> (char const *range, std::vector<uint16>  &bgn, std::vector<uint16>  &end);
-template  void  decodeRange <int16> (char const *range, std::vector <int16>  &bgn, std::vector <int16>  &end);
-template  void  decodeRange<uint8>  (char const *range, std::vector<uint8>   &bgn, std::vector<uint8>   &end);
-template  void  decodeRange <int8>  (char const *range, std::vector <int8>   &bgn, std::vector <int8>   &end);
-template  void  decodeRange <double>(char const *range, std::vector <double> &bgn, std::vector <double> &end);
-
-template  void  decodeRange<uint128>(char const *range, std::set<uint128> &values);
-template  void  decodeRange <int128>(char const *range, std::set <int128> &values);
-template  void  decodeRange<uint64> (char const *range, std::set<uint64>  &values);
-template  void  decodeRange <int64> (char const *range, std::set <int64>  &values);
-template  void  decodeRange<uint32> (char const *range, std::set<uint32>  &values);
-template  void  decodeRange <int32> (char const *range, std::set <int32>  &values);
-template  void  decodeRange<uint16> (char const *range, std::set<uint16>  &values);
-template  void  decodeRange <int16> (char const *range, std::set <int16>  &values);
-template  void  decodeRange<uint8>  (char const *range, std::set<uint8>   &values);
-template  void  decodeRange <int8>  (char const *range, std::set <int8>   &values);
-template  void  decodeRange <double>(char const *range, std::set <double> &values);
-
-
-
-////////////////////////////////////////////////////////////
-//
-//  Convert an unsigned integer to one with 3 significant digit number, and
-//  also return the correct SI base.
-//
-//  This does NOT round correctly.  We'd need to track the remainder
-//  and increment 'n' if the remainder is more than half 'div'.
-//
-
-uint64
-scaledNumber(uint64 n, uint32 div) {
-
-  if (n > 9999)   n /= div;
-  if (n > 9999)   n /= div;
-  if (n > 9999)   n /= div;
-  if (n > 9999)   n /= div;
-  if (n > 9999)   n /= div;
-  if (n > 9999)   n /= div;
-  if (n > 9999)   n /= div;
-  if (n > 9999)   n /= div;
-
-  return(n);
-}
-
-char
-scaledUnit(uint64 n, uint32 div) {
-  char u = ' ';
-
-  if (n > 9999)  {  n /= div; u = 'k';  }   //  kilo
-  if (n > 9999)  {  n /= div; u = 'M';  }   //  mega
-  if (n > 9999)  {  n /= div; u = 'G';  }   //  giga
-  if (n > 9999)  {  n /= div; u = 'T';  }   //  tera
-  if (n > 9999)  {  n /= div; u = 'P';  }   //  peta
-  if (n > 9999)  {  n /= div; u = 'E';  }   //  exa
-  if (n > 9999)  {  n /= div; u = 'Z';  }   //  zetta
-  if (n > 9999)  {  n /= div; u = 'Y';  }   //  yotta
-
-  return(u);
-}
-
-const char *
-scaledName(uint64 n, uint32 div) {
-  const char *s = "";
-
-  if (n > 9999)  {  n /= div; s = " thousand";     }
-  if (n > 9999)  {  n /= div; s = " million";      }
-  if (n > 9999)  {  n /= div; s = " billion";      }
-  if (n > 9999)  {  n /= div; s = " trillion";     }
-  if (n > 9999)  {  n /= div; s = " quadrillion";  }
-  if (n > 9999)  {  n /= div; s = " quintillion";  }
-  if (n > 9999)  {  n /= div; s = " sextillion";   }
-  if (n > 9999)  {  n /= div; s = " septillion";   }
-
-  return(s);
-}
-
-
-
-////////////////////////////////////////////////////////////
-//
-//  Convert an unsigned integer to a character string in the desired base.
-//
-//  All follow the same pattern except for the constants, and except for
-//  toDec() which also differs in the 'shift' operation.
-//
-//  The helper function getNextString() is the only part that needs
-//  to worry about thread safety.  Everything else operates on that
-//  returned buffer space.
-//
-//  Instead of allocating 32 strings of max length, we could allocate 4096
-//  bytes and dole pieces out of the appropriate max length as needed.
-//
-
-char     alpha[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
-char    *strAlloc  =   nullptr;
-char    *str[32]   = { nullptr };
-uint32   pos       =   0;
-
-static
-char *                   //  Helper function to return the next available buffer.
-getNextString(void) {    //  This is the only part that needs to care about threads.
-  char *ret = nullptr;   //  Everything else operates on the buffer returned.
-
-#pragma omp critical (toHEXlock)
-  {
-    if (strAlloc == nullptr) {
-      strAlloc = new char [32 * 129];
-
-      for (uint32 ii=0; ii<32; ii++)
-        str[ii] = strAlloc + ii * 129;
-    }
-
-    ret = str[pos++];
-
-    if (pos >= 32)
-      pos = 0;
-  }
-
-  return(ret);
-}
+char   dec[10]     = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
+char   hex[16]     = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
+char   str[16][64] = { 0 };
+uint32 pos         =   0;
 
 
 
 template<typename uintType>
-char *
-toBin(uintType v, char *ret, uint32 w) {
-  uint32  L = w;
-  uint32  W = sizeof(uintType) * 8;
-  uint32  l = std::min(L, W);
-  uint32  p = l;
+char const *
+toHex(uintType v) {
+  char   *ret = str[pos++];
+  uint32  w = sizeof(uintType) * 2;
+  uint32  p = w;
   uint32  s = 0;
 
+  if (pos >= 16)
+    pos = 0;
+
   while (p > 0) {
     p -= 1;
-    ret[p] = alpha[ (v >> s) & 0x1 ];
-    s += 1;
+    ret[p] = hex[ (v >> s) & 0xf ];
+    s += 4;
   }
 
   ret[w] = 0;
 
-  return(ret + w);
-}
-
-template<typename uintType>
-char const *
-toBin(uintType v, uint32 w) {
-  char *ret = getNextString();
-  toBin(v, ret, w);
   return(ret);
 }
 
-template char       *toBin<uint128>(uint128 v, char *out, uint32 width);
-template char       *toBin<uint64> (uint64  v, char *out, uint32 width);
-template char       *toBin<uint32> (uint32  v, char *out, uint32 width);
-template char       *toBin<uint16> (uint16  v, char *out, uint32 width);
-template char       *toBin<uint8>  (uint8   v, char *out, uint32 width);
-
-template char const *toBin<uint128>(uint128 v, uint32 width);
-template char const *toBin<uint64> (uint64  v, uint32 width);
-template char const *toBin<uint32> (uint32  v, uint32 width);
-template char const *toBin<uint16> (uint16  v, uint32 width);
-template char const *toBin<uint8>  (uint8   v, uint32 width);
-
-
+template char const *toHex<uint128>(uint128 v);
+template char const *toHex<uint64> (uint64  v);
+template char const *toHex<uint32> (uint32  v);
+template char const *toHex<uint16> (uint16  v);
 
-template<typename uintType>
-char *
-toOct(uintType v, char *ret, uint32 w) {
-  uint32  L = (w + 2) / 3;
-  uint32  W = sizeof(uintType) * 8 / 3 + 1;
-  uint32  l = std::min(L, W);
-  uint32  p = l;
-  uint32  s = 0;
-
-  while (p > 0) {
-    p -= 1;
-    ret[p] = alpha[ (v >> s) & 0x7 ];
-    s += 3;
-  }
 
-  ret[l] = 0;
-
-  return(ret + l);
-}
 
 template<typename uintType>
 char const *
-toOct(uintType v, uint32 w) {
-  char *ret = getNextString();
-  toOct(v, ret, w);
-  return(ret);
-}
-
-template char       *toOct<uint128>(uint128 v, char *out, uint32 width);
-template char       *toOct<uint64> (uint64  v, char *out, uint32 width);
-template char       *toOct<uint32> (uint32  v, char *out, uint32 width);
-template char       *toOct<uint16> (uint16  v, char *out, uint32 width);
-template char       *toOct<uint8>  (uint8   v, char *out, uint32 width);
+toDec(uintType v) {
+  char   *ret = str[pos++];
+  uint32  p   = 64;
+  uint32  x   = 0;
 
-template char const *toOct<uint128>(uint128 v, uint32 width);
-template char const *toOct<uint64> (uint64  v, uint32 width);
-template char const *toOct<uint32> (uint32  v, uint32 width);
-template char const *toOct<uint16> (uint16  v, uint32 width);
-template char const *toOct<uint8>  (uint8   v, uint32 width);
-
-
-
-template<typename uintType>
-char *
-toDec(uintType v, char *ret, uint32 w) {
-  uint32  p = 64;
-  uint32  x = 0;
+  if (pos >= 16)
+    pos = 0;
 
   if (v == 0) {
-    ret[x++] = '0';
+    ret[0] = '0';
+    ret[1] =  0;
   }
 
   else {
-    while (v > 0) {               //  Write the number, low-order
-      p -= 1;                     //  digits first, to the end
-      ret[p] = alpha[ v % 10 ];   //  of the string.
+    while (v > 0) {             //  Write the number, low-order
+      p -= 1;                   //  digits first, to the end
+      ret[p] = dec[ v % 10 ];   //  of the string.
       v /= 10;
     }
 
-    for (x=0; p<64; x++, p++)     //  Shift the string so it
-      ret[x] = ret[p];            //  starts at the origin.
-  }
-
-  ret[x] = 0;
-
-  return(ret + x);
-}
+    for (x=0; p<64; x++, p++)   //  Shift the string so it
+      ret[x] = ret[p];          //  starts at the origin.
 
-template<typename uintType>
-char const *
-toDec(uintType v, uint32 w) {
-  char *ret = getNextString();
-  toDec(v, ret, w);
-  return(ret);
-}
-
-template char       *toDec<uint128>(uint128 v, char *out, uint32 width);
-template char       *toDec<uint64> (uint64  v, char *out, uint32 width);
-template char       *toDec<uint32> (uint32  v, char *out, uint32 width);
-template char       *toDec<uint16> (uint16  v, char *out, uint32 width);
-template char       *toDec<uint8>  (uint8   v, char *out, uint32 width);
-
-template char const *toDec<uint128>(uint128 v, uint32 width);
-template char const *toDec<uint64> (uint64  v, uint32 width);
-template char const *toDec<uint32> (uint32  v, uint32 width);
-template char const *toDec<uint16> (uint16  v, uint32 width);
-template char const *toDec<uint8>  (uint8   v, uint32 width);
-
-
-
-template<typename uintType>
-char *
-toHex(uintType v, char *ret, uint32 w) {
-  uint32  L = sizeof(uintType) * 8 / 4;   //  The maximum possible width
-  uint32  W = (w + 3) / 4;                //  The user suggested width
-  uint32  l = std::min(L, W);
-  uint32  p = l;
-  uint32  s = 0;
-
-  while (p > 0) {
-    p -= 1;
-    ret[p] = alpha[ (v >> s) & 0xf ];
-    s += 4;
+    ret[x] = 0;
   }
 
-  ret[l] = 0;
-
-  return(ret + l);
-}
-
-template<typename uintType>
-char const *
-toHex(uintType v, uint32 w) {
-  char *ret = getNextString();
-  toHex(v, ret, w);
   return(ret);
 }
 
-template char       *toHex<uint128>(uint128 v, char *out, uint32 width);
-template char       *toHex<uint64> (uint64  v, char *out, uint32 width);
-template char       *toHex<uint32> (uint32  v, char *out, uint32 width);
-template char       *toHex<uint16> (uint16  v, char *out, uint32 width);
-template char       *toHex<uint8>  (uint8   v, char *out, uint32 width);
-
-template char const *toHex<uint128>(uint128 v, uint32 width);
-template char const *toHex<uint64> (uint64  v, uint32 width);
-template char const *toHex<uint32> (uint32  v, uint32 width);
-template char const *toHex<uint16> (uint16  v, uint32 width);
-template char const *toHex<uint8>  (uint8   v, uint32 width);
-
+template char const *toDec<uint128>(uint128 v);
+template char const *toDec<uint64> (uint64  v);
+template char const *toDec<uint32> (uint32  v);
+template char const *toDec<uint16> (uint16  v);
diff --git a/ext/meryl/src/utility/src/utility/types.H b/ext/meryl/src/utility/src/utility/types.H
index 82ce17c..78421c9 100644
--- a/ext/meryl/src/utility/src/utility/types.H
+++ b/ext/meryl/src/utility/src/utility/types.H
@@ -66,314 +66,93 @@
 
 #include <omp.h>
 
-#include <limits>
-#include <set>
-#include <vector>
-
 #if defined(_FILE_OFFSET_BITS) && (_FILE_OFFSET_BITS == 32)
 #error I do not support 32-bit off_t.
 #endif
 
-//  Make the basic int types a bit more friendly.
+typedef int8_t  int8;
+typedef int16_t int16;
+typedef int32_t int32;
+typedef int64_t int64;
+
+typedef uint8_t  uint8;
+typedef uint16_t uint16;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
 
+typedef __int128            int128;
 typedef unsigned __int128  uint128;
-typedef          __int128   int128;
-
-typedef uint64_t           uint64;
-typedef  int64_t            int64;
-
-typedef uint32_t           uint32;
-typedef  int32_t            int32;
-
-typedef uint16_t           uint16;
-typedef  int16_t            int16;
-
-typedef  uint8_t           uint8;
-typedef   int8_t            int8;
-
-//  There's no way to assign a constant value to the 128-bit integers
-//  directly, but with a helper function we can assign it using two 64-bit
-//  integers.  This only really makes sense for the unsigned flavor, e.g.,
-//  when used for bit packed quantities.
-
-constexpr inline uint128   build_uint128(uint64 a, uint64 b)    { return(((uint128)a << 64) | ((uint128)b)); };
-constexpr inline  int128   build_int128 ( int64 a,  int64 b)    { return( ((int128)a << 64) |  ((int128)b)); };
-
-//  Some handy constants.
-//
-//  numeric_limits<> on the 128-bit types is undefined, so we're forced to do
-//  it the hard way.
-
-constexpr uint128   uint128zero = 0;
-constexpr uint128   uint128one  = 1;
-constexpr uint128   uint128min  = 0;
-constexpr uint128   uint128max  = (uint128)(0xffffffffffffffffllu) << 64 | (uint128)(0xffffffffffffffffllu);
-
-constexpr  int128    int128zero = 0;
-constexpr  int128    int128one  = 1;
-constexpr  int128    int128min  = (uint128)(0x8000000000000000llu) << 64 | (uint128)(0x0000000000000000llu);
-constexpr  int128    int128max  = (uint128)(0x7fffffffffffffffllu) << 64 | (uint128)(0xffffffffffffffffllu);
-
-constexpr uint64    uint64zero  = 0;
-constexpr uint64    uint64one   = 1;
-constexpr uint64    uint64min   = std::numeric_limits<uint64>::min();
-constexpr uint64    uint64max   = std::numeric_limits<uint64>::max();
-
-constexpr  int64     int64zero  = 0;
-constexpr  int64     int64one   = 1;
-constexpr  int64     int64min   = std::numeric_limits<int64>::min();
-constexpr  int64     int64max   = std::numeric_limits<int64>::max();
-
-constexpr uint32    uint32zero  = 0;
-constexpr uint32    uint32one   = 1;
-constexpr uint32    uint32min   = std::numeric_limits<uint32>::min();
-constexpr uint32    uint32max   = std::numeric_limits<uint32>::max();
-
-constexpr  int32     int32zero  = 0;
-constexpr  int32     int32one   = 1;
-constexpr  int32     int32min   = std::numeric_limits<int32>::min();
-constexpr  int32     int32max   = std::numeric_limits<int32>::max();
-
-constexpr uint16    uint16zero  = 0;
-constexpr uint16    uint16one   = 1;
-constexpr uint16    uint16min   = std::numeric_limits<uint16>::min();
-constexpr uint16    uint16max   = std::numeric_limits<uint16>::max();
-
-constexpr  int16     int16zero  = 0;
-constexpr  int16     int16one   = 1;
-constexpr  int16     int16min   = std::numeric_limits<int16>::min();
-constexpr  int16     int16max   = std::numeric_limits<int16>::max();
-
-constexpr uint8     uint8zero   = 0;
-constexpr uint8     uint8one    = 1;
-constexpr uint8     uint8min    = std::numeric_limits<uint8>::min();
-constexpr uint8     uint8max    = std::numeric_limits<uint8>::max();
-
-constexpr  int8      int8zero   = 0;
-constexpr  int8      int8one    = 1;
-constexpr  int8      int8min    = std::numeric_limits<int8>::min();
-constexpr  int8      int8max    = std::numeric_limits<int8>::max();
-
-//  Conversion from floating point to integer.  lrint() rounds the
-//  floating-point argument to an integer value, using the current rounding
-//  mode.  This mode can be set with std::fesetround().
-
-inline  int64  doubletoint64(double d)   { return(std::llrint(d)); }
-inline  int32  doubletoint32(double d)   { return(std:: lrint(d)); }
-
-//  Decoding stings into numbers (and a boolean).
-//   - The first set simply convert the string to a number and return that
-//     number.
-//   - The second set converts the string to a number and returns a pointer
-//     to the letter in the string just after the number.
-//
-//  There probably should be a strtobool() of the second form, but I'm not
-//  really sure what to do with the 'invalid' case that is currently treated
-//  as 'false'.
-
-uint128 strtoullll(char const *nptr, char **endptr);   //  The original strtoul() et al take char**
- int128 strtollll (char const *nptr, char **endptr);   //  as the second arg.
-
-inline uint128 strtouint128(char const *str)  {  return((uint128)strtoullll(str, nullptr));      }
-inline  int128 strtoint128 (char const *str)  {  return( (int128)strtollll (str, nullptr));      }
-inline uint64  strtouint64 (char const *str)  {  return((uint64) strtoull  (str, nullptr, 10));  }
-inline  int64  strtoint64  (char const *str)  {  return( (int64) strtoll   (str, nullptr, 10));  }
-inline uint32  strtouint32 (char const *str)  {  return((uint32) strtoul   (str, nullptr, 10));  }
-inline  int32  strtoint32  (char const *str)  {  return( (int32) strtol    (str, nullptr, 10));  }
-inline uint16  strtouint16 (char const *str)  {  return((uint16) strtoul   (str, nullptr, 10));  }   //  WARNING: these convert to
-inline  int16  strtoint16  (char const *str)  {  return( (int16) strtol    (str, nullptr, 10));  }   //  a 32-bit integer then cast
-inline uint8   strtouint8  (char const *str)  {  return((uint8)  strtoul   (str, nullptr, 10));  }   //  to 16- or 8-bit integers.
-inline  int8   strtoint8   (char const *str)  {  return( (int8)  strtol    (str, nullptr, 10));  }
-inline  float  strtofloat  (char const *str)  {  return( (float) strtof    (str, nullptr));      }
-inline double  strtodouble (char const *str)  {  return((double) strtod    (str, nullptr));      }
-
-inline char const *strtonumber(char const *str, uint128 &num)  {  char *rem;  num = (uint128)strtoullll(str, &rem);      return(rem);  }
-inline char const *strtonumber(char const *str,  int128 &num)  {  char *rem;  num =  (int128)strtollll (str, &rem);      return(rem);  }
-inline char const *strtonumber(char const *str, uint64  &num)  {  char *rem;  num = (uint64) strtoull  (str, &rem, 10);  return(rem);  }
-inline char const *strtonumber(char const *str,  int64  &num)  {  char *rem;  num =  (int64) strtoll   (str, &rem, 10);  return(rem);  }
-inline char const *strtonumber(char const *str, uint32  &num)  {  char *rem;  num = (uint32) strtoul   (str, &rem, 10);  return(rem);  }
-inline char const *strtonumber(char const *str,  int32  &num)  {  char *rem;  num =  (int32) strtol    (str, &rem, 10);  return(rem);  }
-inline char const *strtonumber(char const *str, uint16  &num)  {  char *rem;  num = (uint16) strtoul   (str, &rem, 10);  return(rem);  }
-inline char const *strtonumber(char const *str,  int16  &num)  {  char *rem;  num =  (int16) strtol    (str, &rem, 10);  return(rem);  }
-inline char const *strtonumber(char const *str, uint8   &num)  {  char *rem;  num = (uint8)  strtoul   (str, &rem, 10);  return(rem);  }
-inline char const *strtonumber(char const *str,  int8   &num)  {  char *rem;  num =  (int8)  strtol    (str, &rem, 10);  return(rem);  }
-inline char const *strtonumber(char const *str,  float  &num)  {  char *rem;  num = (double) strtof    (str, &rem);      return(rem);  }
-inline char const *strtonumber(char const *str, double  &num)  {  char *rem;  num = (double) strtod    (str, &rem);      return(rem);  }
-
-inline bool    strtobool(char const *str)  {
-  if ((str == nullptr) ||
-      (str[0] == 0))
-    return(false);
-
-  if (((str[0] == 'y') && (str[1] == 0)) ||
-      ((str[0] == 'Y') && (str[1] == 0)) ||
-      ((str[0] == 't') && (str[1] == 0)) ||
-      ((str[0] == 'T') && (str[1] == 0)) ||
-      ((str[0] == '1') && (str[1] == 0)) ||
-      ((str[0] == '+') && (str[1] == 0)))
-    return(true);
-
-  if ((strcasecmp(str, "yes")  == 0) ||
-      (strcasecmp(str, "true") == 0))
-    return(true);
-
-  return(false);
-}
-
-//  Test if a character or string is of the desired encoding.
-
-inline bool   isNUL(char c)        { return(c == 0); }
-
-inline bool   isVisible(char c)    { return(('!' <= c) && (c <= '~')); }
-
-inline bool   isLetter(char c)     { return((('a' <= c) && (c <= 'z')) ||
-                                            (('A' <= c) && (c <= 'Z')));  }
-
-inline bool   isWhiteSpace(char c) { return((c == ' ')  || (c == '\n') ||
-                                            (c == '\t') || (c == '\r')); };
-
-inline bool   isComment(char c)    { return((c == '!') || (c == '#') || (c == 0));        };
-inline bool   isDelimiter(char c)  { return((c == ':') || (c == '=') || isWhiteSpace(c)); };
-
-inline bool   isBinDigit(char c)   { return((('0' <= c) && (c <= '1')));  }
-inline bool   isOctDigit(char c)   { return((('0' <= c) && (c <= '7')));  }
-inline bool   isDecDigit(char c)   { return((('0' <= c) && (c <= '9')));  }
-inline bool   isHexDigit(char c)   { return((('0' <= c) && (c <= '9')) ||
-                                            (('a' <= c) && (c <= 'f')) ||
-                                            (('A' <= c) && (c <= 'F')));  }
-
-bool          isBinNumber (char const *s);
-bool          isOctNumber (char const *s);
-bool          isDecNumber (char const *s, char dot='.');
-bool inline   isDecInteger(char const *s)   { return(isDecNumber(s,  0));  };
-bool inline   isDecFloat  (char const *s)   { return(isDecNumber(s, '.')); };
-bool          isHexNumber (char const *s);
-
-//  Disallow the usual character tests becuse of their goofy return values.
-
-#undef  isalnum
-#undef  isalpha
-#undef  iscntrl
-#undef  isdigit
-#undef  isgraph
-#undef  islower
-#undef  isprint
-#undef  ispunct
-#undef  isspace
-#undef  isupper
-#undef  isxdigit
-#undef  isnumber
-
-int inline isalnum (char c) = delete;
-int inline isalpha (char c) = delete;
-int inline iscntrl (char c) = delete;
-int inline isdigit (char c) = delete;
-int inline isgraph (char c) = delete;
-int inline islower (char c) = delete;
-int inline isprint (char c) = delete;
-int inline ispunct (char c) = delete;
-int inline isspace (char c) = delete;
-int inline isupper (char c) = delete;
-int inline isxdigit(char c) = delete;
-int inline isnumber(char c) = delete;
-
-//  Convert an ascii binary, octal, decimal or hexadecimal letter to an
-//  integer.  No type checking is performed; you've already called
-//  isHexNumber() et al, right?
-//
-//  The pieces of asciiHexToInteger() are as follows:
-//    (d & 0xf)        //  Decodes '0'-'9' as 0-9, 'a' - 'f' as 1-6
-//    (d >> 6)         //  Decodes digits as 0, letters as 1.
-//   ((d >> 6) << 3)   //  Decodes digits as 0, letters as 8.
-
-inline uint8  asciiBinToInteger(char d)   { return(d - '0'); }   //  Pretty trivial.
-inline uint8  asciiOctToInteger(char d)   { return(d - '0'); }
-inline uint8  asciiDecToInteger(char d)   { return(d - '0'); }
-inline uint8  asciiHexToInteger(char d)   { return(((uint8)d & 0xf) + ((uint8)d >> 6) + (((uint8)d >> 6) << 3)); }
-
-//  Convert an integer to a printable letter.  If it's not a printable
-//  letter, returns '.'.
-
-inline
-char
-integerToLetter(uint32 i) {
-  return(((' ' <= i) && (i <= '~')) ? i : '.');
-}
-
-//  Convert a string representing a set of numbers to
-//   - the first and last values (for form '#' or '#-#')
-//   - a vector of the low and high values
-//   - a set of the values
-//
-//  The string should be comprised of multiple comma separated ranges:
-//   - #     a single number
-//   - #-#   a range of numbers
-//   - #/#   a one-out-of-N specification
-//
-//  The first form returns a pointer to the letter after the decoded values.
-//
-//  If a single number is encountered in the first or second forms, both
-//  'bgn' and 'end' are set to that value.
-//
-//  If 'numberType' is a 128-bit integer, only 64-bit integers can be
-//  converted.
-
-template<typename numberType> char const *decodeRange(char const *range, numberType &bgn, numberType &end);
-template<typename numberType> void        decodeRange(char const *range, std::vector<numberType> &bgn, std::vector<numberType> &end);
-template<typename numberType> void        decodeRange(char const *range, std::set<numberType> &values);
-
-//  Convert an unsigned integer representing bits or bytes to
-//  a floating point number representing GB or MB.
-
-inline double bitsToGB(uint64 bits)   { return(bits / 8 / 1024.0 / 1024.0 / 1024.0); }
-inline double bitsToMB(uint64 bits)   { return(bits / 8 / 1024.0 / 1024.0);          }
-
-//  Convert an unsigned integer to one with 3 significant digit number, and
-//  also return the correct SI base.
-
-uint64      scaledNumber(uint64 n, uint32 div=1024);   //  Return n between 0 and div,
-char        scaledUnit  (uint64 n, uint32 div=1024);   //  and the SI unit of that
-const char *scaledName  (uint64 n, uint32 div=1024);   //  scaling.
-
-//  Convert an unsigned integer to a character string in the desired base.
-//
-//    char *toXXX(v, str)
-//      Expects a pre-allocated character buffer 'str' with enough space for
-//      the output string and a NUL terminating byte.  It returns a pointer
-//      to the NUL byte.  A 128-bit integer in:
-//        binary      needs 129 bytes
-//        octal       needs  44 bytes
-//        decimal     needs  40 bytes (it's 340,282,366,920,938,463,463,374,607,431,768,211,455)
-//        hexadecimal needs  33 bytes
-//
-//    char const *toXX(v)
-//      Returns a pointer to one of 32 private string buffers.  This is
-//      thread safe, as long as you don't use it more than 32 times at once.
-//
-//  Both forms take an optional 'width' (in bits) to display.  The actual
-//  width used is the minimum of this width and the number of bits in the
-//  type.  toDec() accepts the width, but doesn't use it.
-
-template<typename uintType> char       *toBin(uintType value, char *out, uint32 width=128);
-template<typename uintType> char       *toOct(uintType value, char *out, uint32 width=128);
-template<typename uintType> char       *toDec(uintType value, char *out, uint32 width=128);
-template<typename uintType> char       *toHex(uintType value, char *out, uint32 width=128);
-
-template<typename uintType> char const *toBin(uintType value, uint32 width=128);
-template<typename uintType> char const *toOct(uintType value, uint32 width=128);
-template<typename uintType> char const *toDec(uintType value, uint32 width=128);
-template<typename uintType> char const *toHex(uintType value, uint32 width=128);
-
-//  Format specifications for printf()
-
-#define F_PTR    "0x%016p"   // Pointers
-#define F_C           "%c"   // Characters
+
+#define  uint128NUMBER(A,B)   ((((uint128)A) << 64) | ((uint128)B))
+#define  uint64NUMBER(X) X ## LLU
+#define  uint32NUMBER(X) X ## LU
+
+#define  uint64ZERO      uint64NUMBER(0x0000000000000000)
+#define  uint64ONE       uint64NUMBER(0x0000000000000001)
+#define  uint64MAX       uint64NUMBER(0xffffffffffffffff)
+#define  uint64MASK(X)   (((~uint64ZERO) >> (64 - (X))) & (-(uint64)((X) != 0)))
+
+#define  uint32ZERO      uint32NUMBER(0x00000000)
+#define  uint32ONE       uint32NUMBER(0x00000001)
+#define  uint32MAX       uint32NUMBER(0xffffffff)
+#define  uint32MASK(X)   (((~uint32ZERO) >> (32 - (X))) & (-(uint32)((X) != 0)))
+
+#define  uint16ZERO      (0x0000)
+#define  uint16ONE       (0x0001)
+#define  uint16MAX       (0xffff)
+#define  uint16MASK(X)   (((~uint16ZERO) >> (16 - (X))) & (-(uint16)((X) != 0)))
+
+#define  uint8ZERO       (0x00)
+#define  uint8ONE        (0x01)
+#define  uint8MAX        (0xff)
+#define  uint8MASK(X)    (((~uint8ZERO) >> (8 - (X))) & (-(uint8)((X) != 0)))
+
+
+inline  int32  strtoint32 (char *str)  {  return( (int32)strtol  (str, NULL, 10));  }
+inline uint32  strtouint32(char *str)  {  return((uint32)strtoul (str, NULL, 10));  }
+inline  int64  strtoint64 (char *str)  {  return( (int64)strtoll (str, NULL, 10));  }
+inline uint64  strtouint64(char *str)  {  return((uint64)strtoull(str, NULL, 10));  }
+inline double  strtodouble(char *str)  {  return((double)strtod  (str, NULL));      }
+
+inline void    strtonumber( int32 &num, char *str, char **rem)  {  num =  (int32)strtol  (str, rem, 10);  }
+inline void    strtonumber(uint32 &num, char *str, char **rem)  {  num = (uint32)strtoul (str, rem, 10);  }
+inline void    strtonumber( int64 &num, char *str, char **rem)  {  num =  (int64)strtoll (str, rem, 10);  }
+inline void    strtonumber(uint64 &num, char *str, char **rem)  {  num = (uint64)strtoull(str, rem, 10);  }
+inline void    strtonumber(double &num, char *str, char **rem)  {  num = (double)strtod  (str, rem);      }
+
+inline  int32  doubletoint32(double d)   { return((int32) ((1.0 + 16.0 * DBL_EPSILON) * d));  };
+inline  int64  doubletoint64(double d)   { return((int64) ((1.0 + 16.0 * DBL_EPSILON) * d));  };
+
+template<typename uintType> char const *toHex(uintType v);
+template<typename uintType> char const *toDec(uintType v);
+
+// These macros are use to eliminate inter-platform differnces between
+// calculated results
+//#define DBL_TO_INT(X)   ((int)((1.0+16.0*DBL_EPSILON)*(X)))
+//#define ROUNDPOS(X)     (DBL_TO_INT((X)+0.5) )
+//#define ROUND(X)        (((X)>0.0) ? ROUNDPOS(X) : -ROUNDPOS(-(X)) )
+//#define ZERO_PLUS       ( 16.0*DBL_EPSILON)
+//#define ZERO_MINUS      (-16.0*DBL_EPSILON)
+//#define ONE_PLUS        (1.0+ZERO_PLUS)
+//#define ONE_MINUS       (1.0+ZERO_MINUS)
+//#define INT_EQ_DBL(I,D) (fabs((double)(I)-(D)) < 16.0*DBL_EPSILON  )
+//#define DBL_EQ_DBL(A,B) (fabs((A)-(B))<16.0*DBL_EPSILON)
+
+// Pointers
+#define F_PTR    "0x%016p"
+
+// Characters
+#define F_C           "%c"
 #define F_CP           "c"
 #define F_CI         "%*c"
-#define F_STR         "%s"   // Strings
+
+// Strings
+#define F_STR         "%s"
 #define F_STRP         "s"
 #define F_STRI       "%*s"
-#define F_S16    "%" PRId16  // Integers
+
+// Integers
+#define F_S16    "%" PRId16
 #define F_S16P       PRId16
 #define F_S16I  "%*" PRId16
 #define F_U16    "%" PRIu16
@@ -394,15 +173,20 @@ template<typename uintType> char const *toHex(uintType value, uint32 width=128);
 #define F_X64 "%016" PRIx64
 #define F_X64P       PRIx64
 #define F_X64I  "%*" PRIx64
-#define F_F32         "%f"   // Floating points
+
+// Floating points
+#define F_F32         "%f"
 #define F_F32P         "f"
 #define F_F32I       "%*f"
 #define F_F64        "%lf"
 #define F_F64P        "lf"
 #define F_F64I      "%*lf"
-#define F_SIZE_T     "%zu"   // Standard typedefs
+
+// Standard typedefs
+#define F_SIZE_T     "%zu"
 #define F_SIZE_TP     "zu"
 #define F_SIZE_TI   "%*zu"
+
 #define F_OFF_T     F_S64
 #define F_OFF_TP    F_S64P
 #define F_OFF_TI    F_S64I