From 87e7728afe312e74876d06a8bfb85b26e78a004c Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 13 Apr 2024 01:00:08 +0300
Subject: [PATCH 1/8] BitVacuumerTest: don't use `skipManyBits`, actually get
 those bits

---
 test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp | 14 ++++++++++++--
 .../bitstreams/BitVacuumerMSB16Test.cpp            | 14 ++++++++++++--
 .../bitstreams/BitVacuumerMSB32Test.cpp            | 14 ++++++++++++--
 test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp | 14 ++++++++++++--
 4 files changed, 48 insertions(+), 8 deletions(-)
diff --git a/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp b/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
index e80319bf1..52d86fe44 100644
--- a/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
@@ -276,10 +276,20 @@ TEST(BitVacuumerLSBTest, DependencyBreaking) {
       Array1DRef(bitstream.data(), implicit_cast<int>(bitstream.size()));
 
   for (int numBitsToSkip = 0; numBitsToSkip <= numBitsTotal; ++numBitsToSkip) {
-    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
     auto bsRef = BitStreamer(fullInput);
     bsRef.fill();
-    bsRef.skipManyBits(numBitsToSkip);
+
+    for (int i = 0; i != numBitsToSkip / 8; ++i) {
+      const auto expectedVal = i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+    if ((numBitsToSkip % 8) != 0) {
+      const auto expectedVal =
+          extractLowBits<unsigned>(numBitsToSkip / CHAR_BIT, numBitsToSkip % 8);
+      ASSERT_THAT(bsRef.getBits(numBitsToSkip % 8), expectedVal);
+    }
+
+    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
 
     BitStreamPosition<BitStreamerTraits::Tag> state;
     state.pos = bsRef.getInputPosition();
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp b/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
index a6eeeaf11..3343722d6 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
@@ -273,10 +273,20 @@ TEST(BitVacuumerMSB16Test, DependencyBreaking) {
       Array1DRef(bitstream.data(), implicit_cast<int>(bitstream.size()));
 
   for (int numBitsToSkip = 0; numBitsToSkip <= numBitsTotal; ++numBitsToSkip) {
-    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
     auto bsRef = BitStreamer(fullInput);
     bsRef.fill();
-    bsRef.skipManyBits(numBitsToSkip);
+
+    for (int i = 0; i != numBitsToSkip / 8; ++i) {
+      const auto expectedVal = i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+    if ((numBitsToSkip % 8) != 0) {
+      const auto expectedVal = extractHighBits<unsigned>(
+          numBitsToSkip / CHAR_BIT, numBitsToSkip % 8, CHAR_BIT);
+      ASSERT_THAT(bsRef.getBits(numBitsToSkip % 8), expectedVal);
+    }
+
+    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
 
     BitStreamPosition<BitStreamerTraits::Tag> state;
     state.pos = bsRef.getInputPosition();
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp b/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
index d37e0fa03..c7e238c9d 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
@@ -276,10 +276,20 @@ TEST(BitVacuumerMSB32Test, DependencyBreaking) {
       Array1DRef(bitstream.data(), implicit_cast<int>(bitstream.size()));
 
   for (int numBitsToSkip = 0; numBitsToSkip <= numBitsTotal; ++numBitsToSkip) {
-    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
     auto bsRef = BitStreamer(fullInput);
     bsRef.fill();
-    bsRef.skipManyBits(numBitsToSkip);
+
+    for (int i = 0; i != numBitsToSkip / 8; ++i) {
+      const auto expectedVal = i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+    if ((numBitsToSkip % 8) != 0) {
+      const auto expectedVal = extractHighBits<unsigned>(
+          numBitsToSkip / CHAR_BIT, numBitsToSkip % 8, CHAR_BIT);
+      ASSERT_THAT(bsRef.getBits(numBitsToSkip % 8), expectedVal);
+    }
+
+    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
 
     BitStreamPosition<BitStreamerTraits::Tag> state;
     state.pos = bsRef.getInputPosition();
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp b/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
index dbbf9a56d..a3bf17b41 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
@@ -276,10 +276,20 @@ TEST(BitVacuumerMSBTest, DependencyBreaking) {
       Array1DRef(bitstream.data(), implicit_cast<int>(bitstream.size()));
 
   for (int numBitsToSkip = 0; numBitsToSkip <= numBitsTotal; ++numBitsToSkip) {
-    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
     auto bsRef = BitStreamer(fullInput);
     bsRef.fill();
-    bsRef.skipManyBits(numBitsToSkip);
+
+    for (int i = 0; i != numBitsToSkip / 8; ++i) {
+      const auto expectedVal = i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+    if ((numBitsToSkip % 8) != 0) {
+      const auto expectedVal = extractHighBits<unsigned>(
+          numBitsToSkip / CHAR_BIT, numBitsToSkip % 8, CHAR_BIT);
+      ASSERT_THAT(bsRef.getBits(numBitsToSkip % 8), expectedVal);
+    }
+
+    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
 
     BitStreamPosition<BitStreamerTraits::Tag> state;
     state.pos = bsRef.getInputPosition();

From 311d09248cc26efec7ec25e0702959f5a8c6061e Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 13 Apr 2024 01:05:05 +0300
Subject: [PATCH 2/8] `BitStreamer::fill()`: produced fill level is sufficient

---
 src/librawspeed/bitstreams/BitStreamer.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/librawspeed/bitstreams/BitStreamer.h b/src/librawspeed/bitstreams/BitStreamer.h
index 187270637..a5c822b53 100644
--- a/src/librawspeed/bitstreams/BitStreamer.h
+++ b/src/librawspeed/bitstreams/BitStreamer.h
@@ -204,6 +204,7 @@ class BitStreamer {
     const auto input = replenisher.getInput();
     const auto numBytes = static_cast<Derived*>(this)->fillCache(input);
     replenisher.markNumBytesAsConsumed(numBytes);
+    invariant(cache.fillLevel >= nbits);
   }
 
   // these methods might be specialized by implementations that support it

From cbdb4b74588a89f4c63161a3260e026903caa3b7 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 13 Apr 2024 01:21:51 +0300
Subject: [PATCH 3/8] `BitStreamer`: implement `reload()`

---
 src/librawspeed/bitstreams/BitStreamer.h      | 17 ++++++
 .../bitstreams/BitVacuumerLSBTest.cpp         | 54 +++++++++++++++++++
 .../bitstreams/BitVacuumerMSB16Test.cpp       | 54 +++++++++++++++++++
 .../bitstreams/BitVacuumerMSB32Test.cpp       | 54 +++++++++++++++++++
 .../bitstreams/BitVacuumerMSBTest.cpp         | 54 +++++++++++++++++++
 5 files changed, 233 insertions(+)

diff --git a/src/librawspeed/bitstreams/BitStreamer.h b/src/librawspeed/bitstreams/BitStreamer.h
index a5c822b53..988475aee 100644
--- a/src/librawspeed/bitstreams/BitStreamer.h
+++ b/src/librawspeed/bitstreams/BitStreamer.h
@@ -28,6 +28,7 @@
 #include "adt/Invariant.h"
 #include "adt/VariableLengthLoad.h"
 #include "bitstreams/BitStream.h"
+#include "bitstreams/BitStreamPosition.h"
 #include "io/Endianness.h"
 #include "io/IOException.h"
 #include <array>
@@ -192,6 +193,22 @@ class BitStreamer {
     establishClassInvariants();
   }
 
+  void reload() {
+    establishClassInvariants();
+
+    BitStreamPosition<Traits::Tag> state;
+    state.pos = getInputPosition();
+    state.fillLevel = getFillLevel();
+    const auto bsPos = getAsByteStreamPosition(state);
+
+    auto replacement = BitStreamer(replenisher.input);
+    if (bsPos.bytePos != 0)
+      replacement.replenisher.markNumBytesAsConsumed(bsPos.bytePos);
+    if (bsPos.numBitsToSkip != 0)
+      replacement.skipBits(bsPos.numBitsToSkip);
+    *this = std::move(replacement);
+  }
+
   void fill(int nbits = Cache::MaxGetBits) {
     establishClassInvariants();
     invariant(nbits >= 0);
diff --git a/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp b/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
index 52d86fe44..37ca89af4 100644
--- a/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
@@ -320,6 +320,60 @@ TEST(BitVacuumerLSBTest, DependencyBreaking) {
   }
 }
 
+TEST(BitVacuumerLSBTest, ReloadCache) {
+  std::vector<uint8_t> bitstream;
+
+  using BitStreamer = BitStreamerLSB;
+  using BitStreamerTraits = BitStreamer::Traits;
+
+  constexpr int numByteElts = 256;
+
+  {
+    auto bsInserter = PartitioningOutputIterator(std::back_inserter(bitstream));
+    using BitVacuumer = BitVacuumerLSB<decltype(bsInserter)>;
+    auto bv = BitVacuumer(bsInserter);
+
+    for (int e = 0; e != numByteElts + BitStreamerTraits::MaxProcessBytes; ++e)
+      bv.put(e, 8);
+  }
+  constexpr int numBitsTotal = CHAR_BIT * numByteElts;
+
+  auto fullInput =
+      Array1DRef(bitstream.data(), implicit_cast<int>(bitstream.size()));
+
+  for (int numBitsToSkip = 0; numBitsToSkip <= numBitsTotal; ++numBitsToSkip) {
+    auto bsRef = BitStreamer(fullInput);
+    bsRef.fill();
+
+    for (int i = 0; i != numBitsToSkip / 8; ++i) {
+      const auto expectedVal = i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+    if ((numBitsToSkip % 8) != 0) {
+      const auto expectedVal =
+          extractLowBits<unsigned>(numBitsToSkip / CHAR_BIT, numBitsToSkip % 8);
+      ASSERT_THAT(bsRef.getBits(numBitsToSkip % 8), expectedVal);
+    }
+
+    bsRef.reload();
+
+    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
+
+    int numSubByteBitsRemaining = numBitsRemaining % CHAR_BIT;
+    int numBytesRemaining = numBitsRemaining / CHAR_BIT;
+    if (numSubByteBitsRemaining != 0) {
+      const auto expectedVal = extractHighBits<unsigned>(
+          numBitsToSkip / CHAR_BIT, numSubByteBitsRemaining, CHAR_BIT);
+      ASSERT_THAT(bsRef.getBits(numSubByteBitsRemaining), expectedVal);
+    }
+
+    for (int i = 0; i != numBytesRemaining; ++i) {
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+  }
+}
+
 } // namespace
 
 } // namespace rawspeed
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp b/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
index 3343722d6..a005ac77e 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
@@ -317,6 +317,60 @@ TEST(BitVacuumerMSB16Test, DependencyBreaking) {
   }
 }
 
+TEST(BitVacuumerMSB16Test, ReloadCache) {
+  std::vector<uint8_t> bitstream;
+
+  using BitStreamer = BitStreamerMSB16;
+  using BitStreamerTraits = BitStreamer::Traits;
+
+  constexpr int numByteElts = 256;
+
+  {
+    auto bsInserter = PartitioningOutputIterator(std::back_inserter(bitstream));
+    using BitVacuumer = BitVacuumerMSB16<decltype(bsInserter)>;
+    auto bv = BitVacuumer(bsInserter);
+
+    for (int e = 0; e != numByteElts + BitStreamerTraits::MaxProcessBytes; ++e)
+      bv.put(e, 8);
+  }
+  constexpr int numBitsTotal = CHAR_BIT * numByteElts;
+
+  auto fullInput =
+      Array1DRef(bitstream.data(), implicit_cast<int>(bitstream.size()));
+
+  for (int numBitsToSkip = 0; numBitsToSkip <= numBitsTotal; ++numBitsToSkip) {
+    auto bsRef = BitStreamer(fullInput);
+    bsRef.fill();
+
+    for (int i = 0; i != numBitsToSkip / 8; ++i) {
+      const auto expectedVal = i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+    if ((numBitsToSkip % 8) != 0) {
+      const auto expectedVal = extractHighBits<unsigned>(
+          numBitsToSkip / CHAR_BIT, numBitsToSkip % 8, CHAR_BIT);
+      ASSERT_THAT(bsRef.getBits(numBitsToSkip % 8), expectedVal);
+    }
+
+    bsRef.reload();
+
+    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
+
+    int numSubByteBitsRemaining = numBitsRemaining % CHAR_BIT;
+    int numBytesRemaining = numBitsRemaining / CHAR_BIT;
+    if (numSubByteBitsRemaining != 0) {
+      const auto expectedVal = extractLowBits<unsigned>(
+          numBitsToSkip / CHAR_BIT, numSubByteBitsRemaining);
+      ASSERT_THAT(bsRef.getBits(numSubByteBitsRemaining), expectedVal);
+    }
+
+    for (int i = 0; i != numBytesRemaining; ++i) {
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+  }
+}
+
 } // namespace
 
 } // namespace rawspeed
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp b/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
index c7e238c9d..9cdc170ae 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
@@ -320,6 +320,60 @@ TEST(BitVacuumerMSB32Test, DependencyBreaking) {
   }
 }
 
+TEST(BitVacuumerMSB32Test, ReloadCache) {
+  std::vector<uint8_t> bitstream;
+
+  using BitStreamer = BitStreamerMSB32;
+  using BitStreamerTraits = BitStreamer::Traits;
+
+  constexpr int numByteElts = 256;
+
+  {
+    auto bsInserter = PartitioningOutputIterator(std::back_inserter(bitstream));
+    using BitVacuumer = BitVacuumerMSB32<decltype(bsInserter)>;
+    auto bv = BitVacuumer(bsInserter);
+
+    for (int e = 0; e != numByteElts + BitStreamerTraits::MaxProcessBytes; ++e)
+      bv.put(e, 8);
+  }
+  constexpr int numBitsTotal = CHAR_BIT * numByteElts;
+
+  auto fullInput =
+      Array1DRef(bitstream.data(), implicit_cast<int>(bitstream.size()));
+
+  for (int numBitsToSkip = 0; numBitsToSkip <= numBitsTotal; ++numBitsToSkip) {
+    auto bsRef = BitStreamer(fullInput);
+    bsRef.fill();
+
+    for (int i = 0; i != numBitsToSkip / 8; ++i) {
+      const auto expectedVal = i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+    if ((numBitsToSkip % 8) != 0) {
+      const auto expectedVal = extractHighBits<unsigned>(
+          numBitsToSkip / CHAR_BIT, numBitsToSkip % 8, CHAR_BIT);
+      ASSERT_THAT(bsRef.getBits(numBitsToSkip % 8), expectedVal);
+    }
+
+    bsRef.reload();
+
+    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
+
+    int numSubByteBitsRemaining = numBitsRemaining % CHAR_BIT;
+    int numBytesRemaining = numBitsRemaining / CHAR_BIT;
+    if (numSubByteBitsRemaining != 0) {
+      const auto expectedVal = extractLowBits<unsigned>(
+          numBitsToSkip / CHAR_BIT, numSubByteBitsRemaining);
+      ASSERT_THAT(bsRef.getBits(numSubByteBitsRemaining), expectedVal);
+    }
+
+    for (int i = 0; i != numBytesRemaining; ++i) {
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+  }
+}
+
 } // namespace
 
 } // namespace rawspeed
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp b/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
index a3bf17b41..5dbff005f 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
@@ -320,6 +320,60 @@ TEST(BitVacuumerMSBTest, DependencyBreaking) {
   }
 }
 
+TEST(BitVacuumerMSBTest, ReloadCache) {
+  std::vector<uint8_t> bitstream;
+
+  using BitStreamer = BitStreamerMSB;
+  using BitStreamerTraits = BitStreamer::Traits;
+
+  constexpr int numByteElts = 256;
+
+  {
+    auto bsInserter = PartitioningOutputIterator(std::back_inserter(bitstream));
+    using BitVacuumer = BitVacuumerMSB<decltype(bsInserter)>;
+    auto bv = BitVacuumer(bsInserter);
+
+    for (int e = 0; e != numByteElts + BitStreamerTraits::MaxProcessBytes; ++e)
+      bv.put(e, 8);
+  }
+  constexpr int numBitsTotal = CHAR_BIT * numByteElts;
+
+  auto fullInput =
+      Array1DRef(bitstream.data(), implicit_cast<int>(bitstream.size()));
+
+  for (int numBitsToSkip = 0; numBitsToSkip <= numBitsTotal; ++numBitsToSkip) {
+    auto bsRef = BitStreamer(fullInput);
+    bsRef.fill();
+
+    for (int i = 0; i != numBitsToSkip / 8; ++i) {
+      const auto expectedVal = i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+    if ((numBitsToSkip % 8) != 0) {
+      const auto expectedVal = extractHighBits<unsigned>(
+          numBitsToSkip / CHAR_BIT, numBitsToSkip % 8, CHAR_BIT);
+      ASSERT_THAT(bsRef.getBits(numBitsToSkip % 8), expectedVal);
+    }
+
+    bsRef.reload();
+
+    const int numBitsRemaining = numBitsTotal - numBitsToSkip;
+
+    int numSubByteBitsRemaining = numBitsRemaining % CHAR_BIT;
+    int numBytesRemaining = numBitsRemaining / CHAR_BIT;
+    if (numSubByteBitsRemaining != 0) {
+      const auto expectedVal = extractLowBits<unsigned>(
+          numBitsToSkip / CHAR_BIT, numSubByteBitsRemaining);
+      ASSERT_THAT(bsRef.getBits(numSubByteBitsRemaining), expectedVal);
+    }
+
+    for (int i = 0; i != numBytesRemaining; ++i) {
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      ASSERT_THAT(bsRef.getBits(8), expectedVal);
+    }
+  }
+}
+
 } // namespace
 
 } // namespace rawspeed

From bd2b74eb46446924a8f5b15f58fdad18a6f6e3d3 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 13 Apr 2024 01:29:31 +0300
Subject: [PATCH 4/8] Rename `roundUpDivision()` as `roundUpDivisionSafe()` -
 it never overflows

---
 .../adt/CoalescingOutputIteratorBenchmark.cpp |  4 ++--
 .../bitstreams/BitStreamJPEGUtils.cpp         |  2 +-
 .../bitstreams/BitVacuumerBenchmark.cpp       |  4 ++--
 .../bitstreams/BitVacuumerJPEGBenchmark.cpp   |  2 +-
 .../bitstreams/BitStreamPosition.h            |  2 +-
 src/librawspeed/common/Common.h               |  4 ++--
 src/librawspeed/common/DngOpcodes.cpp         |  6 +++---
 src/librawspeed/common/RawImage.cpp           |  4 ++--
 src/librawspeed/decoders/ArwDecoder.cpp       |  4 ++--
 src/librawspeed/decoders/DngDecoder.cpp       |  6 +++---
 src/librawspeed/decoders/IiqDecoder.cpp       |  4 ++--
 src/librawspeed/decoders/NefDecoder.cpp       |  4 ++--
 src/librawspeed/decoders/OrfDecoder.cpp       |  2 +-
 src/librawspeed/decoders/RawDecoder.cpp       |  2 +-
 .../decompressors/AbstractDngDecompressor.h   |  4 ++--
 .../decompressors/FujiDecompressor.cpp        |  2 +-
 .../decompressors/LJpegDecompressor.cpp       |  6 +++---
 .../decompressors/PanasonicV4Decompressor.cpp |  3 ++-
 .../decompressors/PanasonicV5Decompressor.cpp |  2 +-
 .../decompressors/VC5Decompressor.cpp         | 14 ++++++-------
 .../adt/CoalescingOutputIteratorTest.cpp      |  2 +-
 .../bitstreams/BitVacuumerLSBTest.cpp         |  4 ++--
 .../bitstreams/BitVacuumerMSB16Test.cpp       |  4 ++--
 .../bitstreams/BitVacuumerMSB32Test.cpp       |  4 ++--
 .../bitstreams/BitVacuumerMSBTest.cpp         |  4 ++--
 test/librawspeed/common/CommonTest.cpp        | 20 +++++++++----------
 26 files changed, 60 insertions(+), 59 deletions(-)

diff --git a/bench/librawspeed/adt/CoalescingOutputIteratorBenchmark.cpp b/bench/librawspeed/adt/CoalescingOutputIteratorBenchmark.cpp
index 563b16ebe..7414c38d6 100644
--- a/bench/librawspeed/adt/CoalescingOutputIteratorBenchmark.cpp
+++ b/bench/librawspeed/adt/CoalescingOutputIteratorBenchmark.cpp
@@ -61,7 +61,7 @@ template <typename C> void BM_Broadcast(benchmark::State& state) {
   int64_t numBytes = state.range(0);
   const int bytesPerChunk = sizeof(T);
   const auto numChunks =
-      implicit_cast<int>(roundUpDivision(numBytes, bytesPerChunk));
+      implicit_cast<int>(roundUpDivisionSafe(numBytes, bytesPerChunk));
   numBytes = bytesPerChunk * numChunks;
 
   std::vector<T, DefaultInitAllocatorAdaptor<T, std::allocator<T>>> output;
@@ -105,7 +105,7 @@ template <typename C> void BM_Copy(benchmark::State& state) {
   int64_t numBytes = state.range(0);
   const int bytesPerChunk = sizeof(T);
   const auto numChunks =
-      implicit_cast<int>(roundUpDivision(numBytes, bytesPerChunk));
+      implicit_cast<int>(roundUpDivisionSafe(numBytes, bytesPerChunk));
   numBytes = bytesPerChunk * numChunks;
 
   std::vector<uint8_t,
diff --git a/bench/librawspeed/bitstreams/BitStreamJPEGUtils.cpp b/bench/librawspeed/bitstreams/BitStreamJPEGUtils.cpp
index 30409dd28..672a2d3b4 100644
--- a/bench/librawspeed/bitstreams/BitStreamJPEGUtils.cpp
+++ b/bench/librawspeed/bitstreams/BitStreamJPEGUtils.cpp
@@ -93,7 +93,7 @@ __attribute__((noinline)) __attribute__((visibility("default")))
 JPEGStuffedByteStreamGenerator::JPEGStuffedByteStreamGenerator(
     const int64_t numBytesMax, bool AppendStuffingByte) {
   invariant(numBytesMax > 0);
-  const auto expectedOverhead = roundUpDivision(numBytesMax, 100); // <=1%
+  const auto expectedOverhead = roundUpDivisionSafe(numBytesMax, 100); // <=1%
   dataStorage.reserve(implicit_cast<size_t>(numBytesMax + expectedOverhead));
 
   // Here we only need to differentiate between a normal byte,
diff --git a/bench/librawspeed/bitstreams/BitVacuumerBenchmark.cpp b/bench/librawspeed/bitstreams/BitVacuumerBenchmark.cpp
index 23b35e08a..287a9431b 100644
--- a/bench/librawspeed/bitstreams/BitVacuumerBenchmark.cpp
+++ b/bench/librawspeed/bitstreams/BitVacuumerBenchmark.cpp
@@ -103,7 +103,7 @@ struct BitVectorLengthsGenerator final {
     std::random_device rd;
     std::mt19937_64 gen(rd());
 
-    for (int64_t numBits = 0; implicit_cast<int64_t>(roundUpDivision(
+    for (int64_t numBits = 0; implicit_cast<int64_t>(roundUpDivisionSafe(
                                   numBits, CHAR_BIT)) < maxBytes;) {
       int len = dist(gen);
       numBitsToProduce += len;
@@ -148,7 +148,7 @@ template <typename T, typename C> void BM(benchmark::State& state) {
               DefaultInitAllocatorAdaptor<OutputChunkType,
                                           std::allocator<OutputChunkType>>>
       output;
-  output.reserve(implicit_cast<size_t>(roundUpDivision(
+  output.reserve(implicit_cast<size_t>(roundUpDivisionSafe(
       gen.numBitsToProduce, CHAR_BIT * sizeof(OutputChunkType))));
 
   for (auto _ : state) {
diff --git a/bench/librawspeed/bitstreams/BitVacuumerJPEGBenchmark.cpp b/bench/librawspeed/bitstreams/BitVacuumerJPEGBenchmark.cpp
index cb1b2853a..7c7210104 100644
--- a/bench/librawspeed/bitstreams/BitVacuumerJPEGBenchmark.cpp
+++ b/bench/librawspeed/bitstreams/BitVacuumerJPEGBenchmark.cpp
@@ -106,7 +106,7 @@ void BM(benchmark::State& state, bool Stuffed) {
                                           std::allocator<OutputChunkType>>>
       output;
   output.reserve(implicit_cast<size_t>(
-      roundUpDivision(input->size(), sizeof(OutputChunkType))));
+      roundUpDivisionSafe(input->size(), sizeof(OutputChunkType))));
 
   for (auto _ : state) {
     output.clear();
diff --git a/src/librawspeed/bitstreams/BitStreamPosition.h b/src/librawspeed/bitstreams/BitStreamPosition.h
index 60c3bd275..a411db4a3 100644
--- a/src/librawspeed/bitstreams/BitStreamPosition.h
+++ b/src/librawspeed/bitstreams/BitStreamPosition.h
@@ -49,7 +49,7 @@ ByteStreamPosition<bo> getAsByteStreamPosition(BitStreamPosition<bo> state) {
   invariant(state.fillLevel >= 0);
 
   auto numBytesRemainingInCache =
-      implicit_cast<int>(roundUpDivision(state.fillLevel, CHAR_BIT));
+      implicit_cast<int>(roundUpDivisionSafe(state.fillLevel, CHAR_BIT));
   invariant(numBytesRemainingInCache >= 0);
   invariant(numBytesRemainingInCache <= state.pos);
 
diff --git a/src/librawspeed/common/Common.h b/src/librawspeed/common/Common.h
index 0ee4aa5c9..c97dc05e0 100644
--- a/src/librawspeed/common/Common.h
+++ b/src/librawspeed/common/Common.h
@@ -136,8 +136,8 @@ constexpr uint64_t RAWSPEED_READNONE roundUp(uint64_t value,
   return roundToMultiple(value, multiple, /*roundDown=*/false);
 }
 
-constexpr uint64_t RAWSPEED_READNONE roundUpDivision(uint64_t value,
-                                                     uint64_t div) {
+constexpr uint64_t RAWSPEED_READNONE roundUpDivisionSafe(uint64_t value,
+                                                         uint64_t div) {
   return (value != 0) ? (1 + ((value - 1) / div)) : 0;
 }
 
diff --git a/src/librawspeed/common/DngOpcodes.cpp b/src/librawspeed/common/DngOpcodes.cpp
index 6655b6fc4..a890584fa 100644
--- a/src/librawspeed/common/DngOpcodes.cpp
+++ b/src/librawspeed/common/DngOpcodes.cpp
@@ -393,8 +393,8 @@ class DngOpcodes::PixelOpcode : public ROIOpcode {
     int cpp = ri->getCpp();
     const iRectangle2D& ROI = getRoi();
     const iPoint2D numAffected(
-        implicit_cast<int>(roundUpDivision(getRoi().dim.x, colPitch)),
-        implicit_cast<int>(roundUpDivision(getRoi().dim.y, rowPitch)));
+        implicit_cast<int>(roundUpDivisionSafe(getRoi().dim.x, colPitch)),
+        implicit_cast<int>(roundUpDivisionSafe(getRoi().dim.y, rowPitch)));
     for (int y = 0; y < numAffected.y; ++y) {
       for (int x = 0; x < numAffected.x; ++x) {
         for (auto p = 0U; p < planes; ++p) {
@@ -568,7 +568,7 @@ class DngOpcodes::DeltaRowOrCol : public DeltaRowOrColBase {
     // See PixelOpcode::applyOP(). We will access deltaF/deltaI up to (excl.)
     // either ROI.getWidth() or ROI.getHeight() index. Thus, we need to have
     // either ROI.getRight() or ROI.getBottom() elements in there.
-    if (const auto expectedSize = roundUpDivision(
+    if (const auto expectedSize = roundUpDivisionSafe(
             S::select(getRoi().getWidth(), getRoi().getHeight()),
             S::select(getPitch().x, getPitch().y));
         expectedSize != deltaF_count) {
diff --git a/src/librawspeed/common/RawImage.cpp b/src/librawspeed/common/RawImage.cpp
index ffbc01920..ccd8b2f91 100644
--- a/src/librawspeed/common/RawImage.cpp
+++ b/src/librawspeed/common/RawImage.cpp
@@ -201,8 +201,8 @@ void RawImageData::subFrame(iRectangle2D crop) {
 void RawImageData::createBadPixelMap() {
   if (!isAllocated())
     ThrowRDE("(internal) Bad pixel map cannot be allocated before image.");
-  mBadPixelMapPitch =
-      implicit_cast<uint32_t>(roundUp(roundUpDivision(uncropped_dim.x, 8), 16));
+  mBadPixelMapPitch = implicit_cast<uint32_t>(
+      roundUp(roundUpDivisionSafe(uncropped_dim.x, 8), 16));
   assert(mBadPixelMap.empty());
   mBadPixelMap.resize(static_cast<size_t>(mBadPixelMapPitch) * uncropped_dim.y,
                       uint8_t(0));
diff --git a/src/librawspeed/decoders/ArwDecoder.cpp b/src/librawspeed/decoders/ArwDecoder.cpp
index b6479edab..83c812b58 100644
--- a/src/librawspeed/decoders/ArwDecoder.cpp
+++ b/src/librawspeed/decoders/ArwDecoder.cpp
@@ -328,13 +328,13 @@ void ArwDecoder::DecodeLJpeg(const TiffIFD* raw) {
 
   assert(tilew > 0);
   const auto tilesX =
-      implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.x, tilew));
+      implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.x, tilew));
   if (!tilesX)
     ThrowRDE("Zero tiles horizontally");
 
   assert(tileh > 0);
   const auto tilesY =
-      implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.y, tileh));
+      implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.y, tileh));
   if (!tilesY)
     ThrowRDE("Zero tiles vertically");
 
diff --git a/src/librawspeed/decoders/DngDecoder.cpp b/src/librawspeed/decoders/DngDecoder.cpp
index 5509027d0..37228853c 100644
--- a/src/librawspeed/decoders/DngDecoder.cpp
+++ b/src/librawspeed/decoders/DngDecoder.cpp
@@ -308,13 +308,13 @@ DngDecoder::getTilingDescription(const TiffIFD* raw) const {
 
     assert(tilew > 0);
     const auto tilesX =
-        implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.x, tilew));
+        implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.x, tilew));
     if (!tilesX)
       ThrowRDE("Zero tiles horizontally");
 
     assert(tileh > 0);
     const auto tilesY =
-        implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.y, tileh));
+        implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.y, tileh));
     if (!tilesY)
       ThrowRDE("Zero tiles vertically");
 
@@ -350,7 +350,7 @@ DngDecoder::getTilingDescription(const TiffIFD* raw) const {
                            : mRaw->dim.y;
 
   if (yPerSlice == 0 ||
-      roundUpDivision(mRaw->dim.y, yPerSlice) != counts->count) {
+      roundUpDivisionSafe(mRaw->dim.y, yPerSlice) != counts->count) {
     ThrowRDE("Invalid y per slice %u or strip count %u (height = %u)",
              yPerSlice, counts->count, mRaw->dim.y);
   }
diff --git a/src/librawspeed/decoders/IiqDecoder.cpp b/src/librawspeed/decoders/IiqDecoder.cpp
index 869d1a3f4..9d8b98fdf 100644
--- a/src/librawspeed/decoders/IiqDecoder.cpp
+++ b/src/librawspeed/decoders/IiqDecoder.cpp
@@ -427,8 +427,8 @@ void IiqDecoder::PhaseOneFlatField(ByteStream data, IiqCorr corr) const {
   if (head[2] == 0 || head[3] == 0 || head[4] == 0 || head[5] == 0)
     return;
 
-  auto wide = implicit_cast<int>(roundUpDivision(head[2], head[4]));
-  auto high = implicit_cast<int>(roundUpDivision(head[3], head[5]));
+  auto wide = implicit_cast<int>(roundUpDivisionSafe(head[2], head[4]));
+  auto high = implicit_cast<int>(roundUpDivisionSafe(head[3], head[5]));
 
   std::vector<float> mrow_storage;
   Array2DRef<float> mrow = Array2DRef<float>::create(
diff --git a/src/librawspeed/decoders/NefDecoder.cpp b/src/librawspeed/decoders/NefDecoder.cpp
index 4d9b30f3a..c0fce1f6a 100644
--- a/src/librawspeed/decoders/NefDecoder.cpp
+++ b/src/librawspeed/decoders/NefDecoder.cpp
@@ -182,7 +182,7 @@ bool NefDecoder::NEFIsUncompressed(const TiffIFD* raw) {
   // We can't just accept this. Some *compressed* NEF's also pass this check :(
   // Thus, let's accept *some* *small* padding.
   const auto requiredInputBits = bitPerPixel * requiredPixels;
-  const auto requiredInputBytes = roundUpDivision(requiredInputBits, 8);
+  const auto requiredInputBytes = roundUpDivisionSafe(requiredInputBits, 8);
   // While we might have more *pixels* than needed, it does not nessesairly mean
   // that we have more input *bytes*. We might be off by a few pixels, and with
   // small image dimensions and bpp, we might still be in the same byte.
@@ -229,7 +229,7 @@ void NefDecoder::DecodeUncompressed() const {
   }
 
   if (yPerSlice == 0 || yPerSlice > static_cast<uint32_t>(mRaw->dim.y) ||
-      roundUpDivision(mRaw->dim.y, yPerSlice) != counts->count) {
+      roundUpDivisionSafe(mRaw->dim.y, yPerSlice) != counts->count) {
     ThrowRDE("Invalid y per slice %u or strip count %u (height = %u)",
              yPerSlice, counts->count, mRaw->dim.y);
   }
diff --git a/src/librawspeed/decoders/OrfDecoder.cpp b/src/librawspeed/decoders/OrfDecoder.cpp
index b4d283815..68b4d517f 100644
--- a/src/librawspeed/decoders/OrfDecoder.cpp
+++ b/src/librawspeed/decoders/OrfDecoder.cpp
@@ -164,7 +164,7 @@ void OrfDecoder::decodeUncompressedInterleaved(ByteStream s, uint32_t w,
 
   int inputPitchBytes = inputPitchBits / 8;
 
-  const auto numEvenLines = implicit_cast<int>(roundUpDivision(h, 2));
+  const auto numEvenLines = implicit_cast<int>(roundUpDivisionSafe(h, 2));
   const auto evenLinesInput = s.getStream(numEvenLines, inputPitchBytes)
                                   .peekRemainingBuffer()
                                   .getAsArray1DRef();
diff --git a/src/librawspeed/decoders/RawDecoder.cpp b/src/librawspeed/decoders/RawDecoder.cpp
index 72f4537b4..ed5b56cea 100644
--- a/src/librawspeed/decoders/RawDecoder.cpp
+++ b/src/librawspeed/decoders/RawDecoder.cpp
@@ -74,7 +74,7 @@ void RawDecoder::decodeUncompressed(const TiffIFD* rawIFD,
   }
 
   if (yPerSlice == 0 || yPerSlice > static_cast<uint32_t>(mRaw->dim.y) ||
-      roundUpDivision(mRaw->dim.y, yPerSlice) != counts->count) {
+      roundUpDivisionSafe(mRaw->dim.y, yPerSlice) != counts->count) {
     ThrowRDE("Invalid y per slice %u or strip count %u (height = %u)",
              yPerSlice, counts->count, mRaw->dim.y);
   }
diff --git a/src/librawspeed/decompressors/AbstractDngDecompressor.h b/src/librawspeed/decompressors/AbstractDngDecompressor.h
index 847f993be..dcd6ad0b9 100644
--- a/src/librawspeed/decompressors/AbstractDngDecompressor.h
+++ b/src/librawspeed/decompressors/AbstractDngDecompressor.h
@@ -55,8 +55,8 @@ struct DngTilingDescription final {
 
   DngTilingDescription(const iPoint2D& dim_, uint32_t tileW_, uint32_t tileH_)
       : dim(dim_), tileW(tileW_), tileH(tileH_),
-        tilesX(implicit_cast<uint32_t>(roundUpDivision(dim.x, tileW))),
-        tilesY(implicit_cast<uint32_t>(roundUpDivision(dim.y, tileH))),
+        tilesX(implicit_cast<uint32_t>(roundUpDivisionSafe(dim.x, tileW))),
+        tilesY(implicit_cast<uint32_t>(roundUpDivisionSafe(dim.y, tileH))),
         numTiles(tilesX * tilesY) {
     invariant(dim.area() > 0);
     invariant(tileW > 0);
diff --git a/src/librawspeed/decompressors/FujiDecompressor.cpp b/src/librawspeed/decompressors/FujiDecompressor.cpp
index 81ef007ea..e4cd47d0e 100644
--- a/src/librawspeed/decompressors/FujiDecompressor.cpp
+++ b/src/librawspeed/decompressors/FujiDecompressor.cpp
@@ -927,7 +927,7 @@ FujiDecompressor::FujiHeader::operator bool() const {
        raw_rounded_width % block_size ||
        raw_rounded_width - raw_width >= block_size || blocks_in_row > 0x10 ||
        blocks_in_row == 0 || blocks_in_row != raw_rounded_width / block_size ||
-       blocks_in_row != roundUpDivision(raw_width, block_size) ||
+       blocks_in_row != roundUpDivisionSafe(raw_width, block_size) ||
        total_lines > 0x800 || total_lines == 0 ||
        total_lines != raw_height / FujiStrip::lineHeight() ||
        (raw_bits != 12 && raw_bits != 14 && raw_bits != 16) ||
diff --git a/src/librawspeed/decompressors/LJpegDecompressor.cpp b/src/librawspeed/decompressors/LJpegDecompressor.cpp
index eb830a0c9..4f80a9aa3 100644
--- a/src/librawspeed/decompressors/LJpegDecompressor.cpp
+++ b/src/librawspeed/decompressors/LJpegDecompressor.cpp
@@ -135,8 +135,8 @@ LJpegDecompressor::LJpegDecompressor(RawImage img, iRectangle2D imgFrame_,
       static_cast<int>(mRaw->getCpp()) * imgFrame.dim.x;
 
   // How many full pixel MCUs do we need to consume for that?
-  if (const auto mcusToConsume =
-          implicit_cast<int>(roundUpDivision(tileRequiredWidth, frame.mcu.x));
+  if (const auto mcusToConsume = implicit_cast<int>(
+          roundUpDivisionSafe(tileRequiredWidth, frame.mcu.x));
       frame.dim.x < mcusToConsume ||
       frame.mcu.y * frame.dim.y < imgFrame.dim.y ||
       frame.mcu.x * frame.dim.x < tileRequiredWidth) {
@@ -274,7 +274,7 @@ ByteStream::size_type LJpegDecompressor::decodeN() const {
   // the raw image buffer. The excessive content has to be ignored.
 
   invariant(imgFrame.dim.y % frame.mcu.y == 0);
-  const auto numRestartIntervals = implicit_cast<int>(roundUpDivision(
+  const auto numRestartIntervals = implicit_cast<int>(roundUpDivisionSafe(
       imgFrame.dim.y / frame.mcu.y, numLJpegRowsPerRestartInterval));
   invariant(numRestartIntervals >= 0);
   invariant(numRestartIntervals != 0);
diff --git a/src/librawspeed/decompressors/PanasonicV4Decompressor.cpp b/src/librawspeed/decompressors/PanasonicV4Decompressor.cpp
index 72a6973d8..d1bcda158 100644
--- a/src/librawspeed/decompressors/PanasonicV4Decompressor.cpp
+++ b/src/librawspeed/decompressors/PanasonicV4Decompressor.cpp
@@ -91,7 +91,8 @@ void PanasonicV4Decompressor::chopInputIntoBlocks() {
   };
 
   // If section_split_offset == 0, last block may not be full.
-  const auto blocksTotal = roundUpDivision(input.getRemainSize(), BlockSize);
+  const auto blocksTotal =
+      roundUpDivisionSafe(input.getRemainSize(), BlockSize);
   invariant(blocksTotal > 0);
   invariant(blocksTotal * PixelsPerBlock >= mRaw->dim.area());
   assert(blocksTotal <= std::numeric_limits<uint32_t>::max());
diff --git a/src/librawspeed/decompressors/PanasonicV5Decompressor.cpp b/src/librawspeed/decompressors/PanasonicV5Decompressor.cpp
index f346c5a38..b24f3ae0e 100644
--- a/src/librawspeed/decompressors/PanasonicV5Decompressor.cpp
+++ b/src/librawspeed/decompressors/PanasonicV5Decompressor.cpp
@@ -98,7 +98,7 @@ PanasonicV5Decompressor::PanasonicV5Decompressor(RawImage img,
   invariant(numPackets > 0);
 
   // And how many blocks that would be? Last block may not be full, pad it.
-  numBlocks = roundUpDivision(numPackets, PacketsPerBlock);
+  numBlocks = roundUpDivisionSafe(numPackets, PacketsPerBlock);
   invariant(numBlocks > 0);
 
   // Does the input contain enough blocks?
diff --git a/src/librawspeed/decompressors/VC5Decompressor.cpp b/src/librawspeed/decompressors/VC5Decompressor.cpp
index 2be326abb..d9f0271a1 100644
--- a/src/librawspeed/decompressors/VC5Decompressor.cpp
+++ b/src/librawspeed/decompressors/VC5Decompressor.cpp
@@ -208,8 +208,8 @@ VC5Decompressor::BandData VC5Decompressor::Wavelet::reconstructPass(
 #pragma GCC diagnostic ignored "-Wshorten-64-to-32"
 #ifdef HAVE_OPENMP
 #pragma omp taskloop default(none) firstprivate(dst, process)                  \
-    num_tasks(roundUpDivision(rawspeed_get_number_of_processor_cores(),        \
-                                  numChannels))
+    num_tasks(roundUpDivisionSafe(rawspeed_get_number_of_processor_cores(),    \
+                                      numChannels))
 #endif
   for (int row = 0; row < dst.height() / 2; ++row) {
 #pragma GCC diagnostic pop
@@ -267,9 +267,8 @@ VC5Decompressor::BandData VC5Decompressor::Wavelet::combineLowHighPass(
 #pragma GCC diagnostic ignored "-Wshorten-64-to-32"
 #ifdef HAVE_OPENMP
 #pragma omp taskloop if (finalWavelet) default(none)                           \
-    firstprivate(dst, process)                                                 \
-    num_tasks(roundUpDivision(rawspeed_get_number_of_processor_cores(), 2))    \
-    mergeable
+    firstprivate(dst, process) num_tasks(roundUpDivisionSafe(                  \
+            rawspeed_get_number_of_processor_cores(), 2)) mergeable
 #endif
   for (int row = 0; row < dst.height(); ++row) {
 #pragma GCC diagnostic pop
@@ -411,7 +410,8 @@ VC5Decompressor::VC5Decompressor(ByteStream bs, const RawImage& img)
     for (Wavelet& wavelet : channel.wavelets) {
       // Pad dimensions as necessary and divide them by two for the next wavelet
       for (auto* dimension : {&waveletWidth, &waveletHeight})
-        *dimension = implicit_cast<uint16_t>(roundUpDivision(*dimension, 2));
+        *dimension =
+            implicit_cast<uint16_t>(roundUpDivisionSafe(*dimension, 2));
       wavelet.width = waveletWidth;
       wavelet.height = waveletHeight;
 
@@ -658,7 +658,7 @@ VC5Decompressor::Wavelet::LowPassBand::LowPassBand(Wavelet& wavelet_,
   const auto bitsTotal = waveletArea * lowpassPrecision;
   constexpr int bytesPerChunk = 8; // FIXME: or is it 4?
   constexpr int bitsPerChunk = 8 * bytesPerChunk;
-  const auto chunksTotal = roundUpDivision(bitsTotal, bitsPerChunk);
+  const auto chunksTotal = roundUpDivisionSafe(bitsTotal, bitsPerChunk);
   const auto bytesTotal = bytesPerChunk * chunksTotal;
   // And clamp the size / verify sufficient input while we are at it.
   // NOTE: this might fail (and should throw, not assert).
diff --git a/test/librawspeed/adt/CoalescingOutputIteratorTest.cpp b/test/librawspeed/adt/CoalescingOutputIteratorTest.cpp
index a4749be6f..17826d60f 100644
--- a/test/librawspeed/adt/CoalescingOutputIteratorTest.cpp
+++ b/test/librawspeed/adt/CoalescingOutputIteratorTest.cpp
@@ -67,7 +67,7 @@ template <typename CoalescedType, typename PartType>
 auto coalesceElts(Array1DRef<const PartType> input) {
   std::vector<CoalescedType> outputStorage;
   {
-    outputStorage.reserve(implicit_cast<size_t>(roundUpDivision(
+    outputStorage.reserve(implicit_cast<size_t>(roundUpDivisionSafe(
         sizeof(PartType) * input.size(), sizeof(CoalescedType))));
     auto subIter = std::back_inserter(outputStorage);
     auto iter = CoalescingOutputIterator<decltype(subIter), PartType>(subIter);
diff --git a/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp b/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
index 37ca89af4..4c72d412b 100644
--- a/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
@@ -313,7 +313,7 @@ TEST(BitVacuumerLSBTest, DependencyBreaking) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
       ASSERT_THAT(bsRebased.getBits(8), expectedVal);
     }
@@ -368,7 +368,7 @@ TEST(BitVacuumerLSBTest, ReloadCache) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
     }
   }
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp b/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
index a005ac77e..db1e1e935 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
@@ -310,7 +310,7 @@ TEST(BitVacuumerMSB16Test, DependencyBreaking) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
       ASSERT_THAT(bsRebased.getBits(8), expectedVal);
     }
@@ -365,7 +365,7 @@ TEST(BitVacuumerMSB16Test, ReloadCache) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
     }
   }
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp b/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
index 9cdc170ae..9f67cf515 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
@@ -313,7 +313,7 @@ TEST(BitVacuumerMSB32Test, DependencyBreaking) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
       ASSERT_THAT(bsRebased.getBits(8), expectedVal);
     }
@@ -368,7 +368,7 @@ TEST(BitVacuumerMSB32Test, ReloadCache) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
     }
   }
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp b/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
index 5dbff005f..a08ce1002 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
@@ -313,7 +313,7 @@ TEST(BitVacuumerMSBTest, DependencyBreaking) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
       ASSERT_THAT(bsRebased.getBits(8), expectedVal);
     }
@@ -368,7 +368,7 @@ TEST(BitVacuumerMSBTest, ReloadCache) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
     }
   }
diff --git a/test/librawspeed/common/CommonTest.cpp b/test/librawspeed/common/CommonTest.cpp
index f8d3e940c..d0b9d1db8 100644
--- a/test/librawspeed/common/CommonTest.cpp
+++ b/test/librawspeed/common/CommonTest.cpp
@@ -36,7 +36,7 @@ using rawspeed::isIn;
 using rawspeed::isPowerOfTwo;
 using rawspeed::roundDown;
 using rawspeed::roundUp;
-using rawspeed::roundUpDivision;
+using rawspeed::roundUpDivisionSafe;
 using rawspeed::splitString;
 using rawspeed::trimSpaces;
 using std::make_tuple;
@@ -101,11 +101,11 @@ INSTANTIATE_TEST_SUITE_P(RoundUpTest, RoundUpTest,
                          ::testing::ValuesIn(RoundUpValues));
 TEST_P(RoundUpTest, RoundUpTest) { ASSERT_EQ(roundUp(in, multiple), expected); }
 
-using RoundUpDivisionType = std::tuple<uint64_t, uint64_t, uint64_t>;
-class RoundUpDivisionTest
-    : public ::testing::TestWithParam<RoundUpDivisionType> {
+using roundUpDivisionSafeType = std::tuple<uint64_t, uint64_t, uint64_t>;
+class roundUpDivisionSafeTest
+    : public ::testing::TestWithParam<roundUpDivisionSafeType> {
 protected:
-  RoundUpDivisionTest() = default;
+  roundUpDivisionSafeTest() = default;
   virtual void SetUp() {
     in = std::get<0>(GetParam());
     divider = std::get<1>(GetParam());
@@ -116,7 +116,7 @@ class RoundUpDivisionTest
   uint64_t divider;
   uint64_t expected; // expected output
 };
-static const RoundUpDivisionType RoundUpDivisionValues[] = {
+static const roundUpDivisionSafeType roundUpDivisionSafeValues[] = {
     make_tuple(0, 10, 0),
     make_tuple(10, 10, 1),
     make_tuple(10, 1, 10),
@@ -148,10 +148,10 @@ static const RoundUpDivisionType RoundUpDivisionValues[] = {
                1),
 
 };
-INSTANTIATE_TEST_SUITE_P(RoundUpDivisionTest, RoundUpDivisionTest,
-                         ::testing::ValuesIn(RoundUpDivisionValues));
-TEST_P(RoundUpDivisionTest, RoundUpDivisionTest) {
-  ASSERT_EQ(roundUpDivision(in, divider), expected);
+INSTANTIATE_TEST_SUITE_P(roundUpDivisionSafeTest, roundUpDivisionSafeTest,
+                         ::testing::ValuesIn(roundUpDivisionSafeValues));
+TEST_P(roundUpDivisionSafeTest, roundUpDivisionSafeTest) {
+  ASSERT_EQ(roundUpDivisionSafe(in, divider), expected);
 }
 
 using IsAlignedType = std::tuple<int, int>;

From 991a04ae173510ae1df2561c8b4ce0703ac67a76 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 13 Apr 2024 01:36:39 +0300
Subject: [PATCH 5/8] Use non-overflow-hardened `roundUpDivision()` in a few
 places

---
 src/librawspeed/bitstreams/BitStreamPosition.h       | 2 +-
 src/librawspeed/common/Common.h                      | 6 ++++++
 test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp   | 4 ++--
 test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp | 4 ++--
 test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp | 4 ++--
 test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp   | 4 ++--
 6 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/librawspeed/bitstreams/BitStreamPosition.h b/src/librawspeed/bitstreams/BitStreamPosition.h
index a411db4a3..60c3bd275 100644
--- a/src/librawspeed/bitstreams/BitStreamPosition.h
+++ b/src/librawspeed/bitstreams/BitStreamPosition.h
@@ -49,7 +49,7 @@ ByteStreamPosition<bo> getAsByteStreamPosition(BitStreamPosition<bo> state) {
   invariant(state.fillLevel >= 0);
 
   auto numBytesRemainingInCache =
-      implicit_cast<int>(roundUpDivisionSafe(state.fillLevel, CHAR_BIT));
+      implicit_cast<int>(roundUpDivision(state.fillLevel, CHAR_BIT));
   invariant(numBytesRemainingInCache >= 0);
   invariant(numBytesRemainingInCache <= state.pos);
 
diff --git a/src/librawspeed/common/Common.h b/src/librawspeed/common/Common.h
index c97dc05e0..3df47d993 100644
--- a/src/librawspeed/common/Common.h
+++ b/src/librawspeed/common/Common.h
@@ -136,6 +136,12 @@ constexpr uint64_t RAWSPEED_READNONE roundUp(uint64_t value,
   return roundToMultiple(value, multiple, /*roundDown=*/false);
 }
 
+constexpr uint64_t RAWSPEED_READNONE roundUpDivision(uint64_t value,
+                                                     uint64_t div) {
+  invariant(div != 0);
+  return roundUp(value, div) / div;
+}
+
 constexpr uint64_t RAWSPEED_READNONE roundUpDivisionSafe(uint64_t value,
                                                          uint64_t div) {
   return (value != 0) ? (1 + ((value - 1) / div)) : 0;
diff --git a/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp b/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
index 4c72d412b..37ca89af4 100644
--- a/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerLSBTest.cpp
@@ -313,7 +313,7 @@ TEST(BitVacuumerLSBTest, DependencyBreaking) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
       ASSERT_THAT(bsRebased.getBits(8), expectedVal);
     }
@@ -368,7 +368,7 @@ TEST(BitVacuumerLSBTest, ReloadCache) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
     }
   }
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp b/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
index db1e1e935..a005ac77e 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSB16Test.cpp
@@ -310,7 +310,7 @@ TEST(BitVacuumerMSB16Test, DependencyBreaking) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
       ASSERT_THAT(bsRebased.getBits(8), expectedVal);
     }
@@ -365,7 +365,7 @@ TEST(BitVacuumerMSB16Test, ReloadCache) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
     }
   }
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp b/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
index 9f67cf515..9cdc170ae 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSB32Test.cpp
@@ -313,7 +313,7 @@ TEST(BitVacuumerMSB32Test, DependencyBreaking) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
       ASSERT_THAT(bsRebased.getBits(8), expectedVal);
     }
@@ -368,7 +368,7 @@ TEST(BitVacuumerMSB32Test, ReloadCache) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
     }
   }
diff --git a/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp b/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
index a08ce1002..5dbff005f 100644
--- a/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
+++ b/test/librawspeed/bitstreams/BitVacuumerMSBTest.cpp
@@ -313,7 +313,7 @@ TEST(BitVacuumerMSBTest, DependencyBreaking) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
       ASSERT_THAT(bsRebased.getBits(8), expectedVal);
     }
@@ -368,7 +368,7 @@ TEST(BitVacuumerMSBTest, ReloadCache) {
     }
 
     for (int i = 0; i != numBytesRemaining; ++i) {
-      const auto expectedVal = roundUpDivisionSafe(numBitsToSkip, CHAR_BIT) + i;
+      const auto expectedVal = roundUpDivision(numBitsToSkip, CHAR_BIT) + i;
       ASSERT_THAT(bsRef.getBits(8), expectedVal);
     }
   }

From dda5a6b307d4b9ae0948057d5dd4bc1e59e6d318 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 13 Apr 2024 02:29:07 +0300
Subject: [PATCH 6/8] ByteStreamPosition: `numBitsToSkip` has predictable range

---
 src/librawspeed/bitstreams/BitStreamPosition.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/librawspeed/bitstreams/BitStreamPosition.h b/src/librawspeed/bitstreams/BitStreamPosition.h
index 60c3bd275..db4e183a8 100644
--- a/src/librawspeed/bitstreams/BitStreamPosition.h
+++ b/src/librawspeed/bitstreams/BitStreamPosition.h
@@ -67,6 +67,8 @@ ByteStreamPosition<bo> getAsByteStreamPosition(BitStreamPosition<bo> state) {
   res.bytePos = state.pos - numBytesToBacktrack;
   invariant(numBitsToBacktrack >= state.fillLevel);
   res.numBitsToSkip = numBitsToBacktrack - state.fillLevel;
+  invariant(res.numBitsToSkip >= 0);
+  invariant(res.numBitsToSkip < CHAR_BIT * MinByteStepMultiple);
 
   invariant(res.bytePos >= 0);
   invariant(res.bytePos <= state.pos);

From 267cb2bc0b68dff612fb2f0eecbe5b8437cb6062 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 13 Apr 2024 03:01:19 +0300
Subject: [PATCH 7/8] `BitStreamer::reload()`: avoid unpredictable branch

---
 src/librawspeed/bitstreams/BitStream.h   | 2 +-
 src/librawspeed/bitstreams/BitStreamer.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/librawspeed/bitstreams/BitStream.h b/src/librawspeed/bitstreams/BitStream.h
index 07669d4fc..9d80187d7 100644
--- a/src/librawspeed/bitstreams/BitStream.h
+++ b/src/librawspeed/bitstreams/BitStream.h
@@ -81,7 +81,7 @@ struct BitStreamCacheLeftInRightOut final : BitStreamCacheBase {
     establishClassInvariants();
     invariant(count >= 0);
     // `count` *could* be larger than `MaxGetBits`.
-    invariant(count != 0);
+    // `count` could be zero.
     invariant(count <= Size);
     invariant(count <= fillLevel);
     cache >>= count;
diff --git a/src/librawspeed/bitstreams/BitStreamer.h b/src/librawspeed/bitstreams/BitStreamer.h
index 988475aee..a4368ba2b 100644
--- a/src/librawspeed/bitstreams/BitStreamer.h
+++ b/src/librawspeed/bitstreams/BitStreamer.h
@@ -204,8 +204,8 @@ class BitStreamer {
     auto replacement = BitStreamer(replenisher.input);
     if (bsPos.bytePos != 0)
       replacement.replenisher.markNumBytesAsConsumed(bsPos.bytePos);
-    if (bsPos.numBitsToSkip != 0)
-      replacement.skipBits(bsPos.numBitsToSkip);
+    replacement.fill();
+    replacement.skipBitsNoFill(bsPos.numBitsToSkip);
     *this = std::move(replacement);
   }
 

From 3588b6bd9f24187e793a714c3bcd702f20b2498e Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 13 Apr 2024 03:46:05 +0300
Subject: [PATCH 8/8] ByteStreamPosition: `getAsByteStreamPosition()`: merge
 rounding steps

Results in nicer code for `MSB32` bit flavor.
---
 src/librawspeed/bitstreams/BitStreamPosition.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/librawspeed/bitstreams/BitStreamPosition.h b/src/librawspeed/bitstreams/BitStreamPosition.h
index db4e183a8..5cf65ca53 100644
--- a/src/librawspeed/bitstreams/BitStreamPosition.h
+++ b/src/librawspeed/bitstreams/BitStreamPosition.h
@@ -48,13 +48,9 @@ ByteStreamPosition<bo> getAsByteStreamPosition(BitStreamPosition<bo> state) {
   invariant(state.pos % MinByteStepMultiple == 0);
   invariant(state.fillLevel >= 0);
 
-  auto numBytesRemainingInCache =
-      implicit_cast<int>(roundUpDivision(state.fillLevel, CHAR_BIT));
-  invariant(numBytesRemainingInCache >= 0);
-  invariant(numBytesRemainingInCache <= state.pos);
-
   auto numBytesToBacktrack = implicit_cast<int>(
-      roundUp(numBytesRemainingInCache, MinByteStepMultiple));
+      MinByteStepMultiple *
+      roundUpDivision(state.fillLevel, CHAR_BIT * MinByteStepMultiple));
   invariant(numBytesToBacktrack >= 0);
   invariant(numBytesToBacktrack <= state.pos);
   invariant(numBytesToBacktrack % MinByteStepMultiple == 0);