Skip to content

Commit

Permalink
Merge pull request #711 from LebedevRI/bs-pos
Browse files Browse the repository at this point in the history
`ByteStreamPosition`: some more touchups
  • Loading branch information
LebedevRI authored Apr 13, 2024
2 parents 19a74a7 + 3588b6b commit 3bddb8d
Show file tree
Hide file tree
Showing 28 changed files with 342 additions and 63 deletions.
4 changes: 2 additions & 2 deletions bench/librawspeed/adt/CoalescingOutputIteratorBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ template <typename C> void BM_Broadcast(benchmark::State& state) {
int64_t numBytes = state.range(0);
const int bytesPerChunk = sizeof(T);
const auto numChunks =
implicit_cast<int>(roundUpDivision(numBytes, bytesPerChunk));
implicit_cast<int>(roundUpDivisionSafe(numBytes, bytesPerChunk));
numBytes = bytesPerChunk * numChunks;

std::vector<T, DefaultInitAllocatorAdaptor<T, std::allocator<T>>> output;
Expand Down Expand Up @@ -105,7 +105,7 @@ template <typename C> void BM_Copy(benchmark::State& state) {
int64_t numBytes = state.range(0);
const int bytesPerChunk = sizeof(T);
const auto numChunks =
implicit_cast<int>(roundUpDivision(numBytes, bytesPerChunk));
implicit_cast<int>(roundUpDivisionSafe(numBytes, bytesPerChunk));
numBytes = bytesPerChunk * numChunks;

std::vector<uint8_t,
Expand Down
2 changes: 1 addition & 1 deletion bench/librawspeed/bitstreams/BitStreamJPEGUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ __attribute__((noinline)) __attribute__((visibility("default")))
JPEGStuffedByteStreamGenerator::JPEGStuffedByteStreamGenerator(
const int64_t numBytesMax, bool AppendStuffingByte) {
invariant(numBytesMax > 0);
const auto expectedOverhead = roundUpDivision(numBytesMax, 100); // <=1%
const auto expectedOverhead = roundUpDivisionSafe(numBytesMax, 100); // <=1%
dataStorage.reserve(implicit_cast<size_t>(numBytesMax + expectedOverhead));

// Here we only need to differentiate between a normal byte,
Expand Down
4 changes: 2 additions & 2 deletions bench/librawspeed/bitstreams/BitVacuumerBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ struct BitVectorLengthsGenerator final {
std::random_device rd;
std::mt19937_64 gen(rd());

for (int64_t numBits = 0; implicit_cast<int64_t>(roundUpDivision(
for (int64_t numBits = 0; implicit_cast<int64_t>(roundUpDivisionSafe(
numBits, CHAR_BIT)) < maxBytes;) {
int len = dist(gen);
numBitsToProduce += len;
Expand Down Expand Up @@ -148,7 +148,7 @@ template <typename T, typename C> void BM(benchmark::State& state) {
DefaultInitAllocatorAdaptor<OutputChunkType,
std::allocator<OutputChunkType>>>
output;
output.reserve(implicit_cast<size_t>(roundUpDivision(
output.reserve(implicit_cast<size_t>(roundUpDivisionSafe(
gen.numBitsToProduce, CHAR_BIT * sizeof(OutputChunkType))));

for (auto _ : state) {
Expand Down
2 changes: 1 addition & 1 deletion bench/librawspeed/bitstreams/BitVacuumerJPEGBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ void BM(benchmark::State& state, bool Stuffed) {
std::allocator<OutputChunkType>>>
output;
output.reserve(implicit_cast<size_t>(
roundUpDivision(input->size(), sizeof(OutputChunkType))));
roundUpDivisionSafe(input->size(), sizeof(OutputChunkType))));

for (auto _ : state) {
output.clear();
Expand Down
2 changes: 1 addition & 1 deletion src/librawspeed/bitstreams/BitStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ struct BitStreamCacheLeftInRightOut final : BitStreamCacheBase {
establishClassInvariants();
invariant(count >= 0);
// `count` *could* be larger than `MaxGetBits`.
invariant(count != 0);
// `count` could be zero.
invariant(count <= Size);
invariant(count <= fillLevel);
cache >>= count;
Expand Down
10 changes: 4 additions & 6 deletions src/librawspeed/bitstreams/BitStreamPosition.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,9 @@ ByteStreamPosition<bo> getAsByteStreamPosition(BitStreamPosition<bo> state) {
invariant(state.pos % MinByteStepMultiple == 0);
invariant(state.fillLevel >= 0);

auto numBytesRemainingInCache =
implicit_cast<int>(roundUpDivision(state.fillLevel, CHAR_BIT));
invariant(numBytesRemainingInCache >= 0);
invariant(numBytesRemainingInCache <= state.pos);

auto numBytesToBacktrack = implicit_cast<int>(
roundUp(numBytesRemainingInCache, MinByteStepMultiple));
MinByteStepMultiple *
roundUpDivision(state.fillLevel, CHAR_BIT * MinByteStepMultiple));
invariant(numBytesToBacktrack >= 0);
invariant(numBytesToBacktrack <= state.pos);
invariant(numBytesToBacktrack % MinByteStepMultiple == 0);
Expand All @@ -67,6 +63,8 @@ ByteStreamPosition<bo> getAsByteStreamPosition(BitStreamPosition<bo> state) {
res.bytePos = state.pos - numBytesToBacktrack;
invariant(numBitsToBacktrack >= state.fillLevel);
res.numBitsToSkip = numBitsToBacktrack - state.fillLevel;
invariant(res.numBitsToSkip >= 0);
invariant(res.numBitsToSkip < CHAR_BIT * MinByteStepMultiple);

invariant(res.bytePos >= 0);
invariant(res.bytePos <= state.pos);
Expand Down
18 changes: 18 additions & 0 deletions src/librawspeed/bitstreams/BitStreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "adt/Invariant.h"
#include "adt/VariableLengthLoad.h"
#include "bitstreams/BitStream.h"
#include "bitstreams/BitStreamPosition.h"
#include "io/Endianness.h"
#include "io/IOException.h"
#include <array>
Expand Down Expand Up @@ -192,6 +193,22 @@ class BitStreamer {
establishClassInvariants();
}

void reload() {
establishClassInvariants();

BitStreamPosition<Traits::Tag> state;
state.pos = getInputPosition();
state.fillLevel = getFillLevel();
const auto bsPos = getAsByteStreamPosition(state);

auto replacement = BitStreamer(replenisher.input);
if (bsPos.bytePos != 0)
replacement.replenisher.markNumBytesAsConsumed(bsPos.bytePos);
replacement.fill();
replacement.skipBitsNoFill(bsPos.numBitsToSkip);
*this = std::move(replacement);
}

void fill(int nbits = Cache::MaxGetBits) {
establishClassInvariants();
invariant(nbits >= 0);
Expand All @@ -204,6 +221,7 @@ class BitStreamer {
const auto input = replenisher.getInput();
const auto numBytes = static_cast<Derived*>(this)->fillCache(input);
replenisher.markNumBytesAsConsumed(numBytes);
invariant(cache.fillLevel >= nbits);
}

// these methods might be specialized by implementations that support it
Expand Down
6 changes: 6 additions & 0 deletions src/librawspeed/common/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,12 @@ constexpr uint64_t RAWSPEED_READNONE roundUp(uint64_t value,

constexpr uint64_t RAWSPEED_READNONE roundUpDivision(uint64_t value,
uint64_t div) {
invariant(div != 0);
return roundUp(value, div) / div;
}

constexpr uint64_t RAWSPEED_READNONE roundUpDivisionSafe(uint64_t value,
uint64_t div) {
return (value != 0) ? (1 + ((value - 1) / div)) : 0;
}

Expand Down
6 changes: 3 additions & 3 deletions src/librawspeed/common/DngOpcodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,8 @@ class DngOpcodes::PixelOpcode : public ROIOpcode {
int cpp = ri->getCpp();
const iRectangle2D& ROI = getRoi();
const iPoint2D numAffected(
implicit_cast<int>(roundUpDivision(getRoi().dim.x, colPitch)),
implicit_cast<int>(roundUpDivision(getRoi().dim.y, rowPitch)));
implicit_cast<int>(roundUpDivisionSafe(getRoi().dim.x, colPitch)),
implicit_cast<int>(roundUpDivisionSafe(getRoi().dim.y, rowPitch)));
for (int y = 0; y < numAffected.y; ++y) {
for (int x = 0; x < numAffected.x; ++x) {
for (auto p = 0U; p < planes; ++p) {
Expand Down Expand Up @@ -568,7 +568,7 @@ class DngOpcodes::DeltaRowOrCol : public DeltaRowOrColBase {
// See PixelOpcode::applyOP(). We will access deltaF/deltaI up to (excl.)
// either ROI.getWidth() or ROI.getHeight() index. Thus, we need to have
// either ROI.getRight() or ROI.getBottom() elements in there.
if (const auto expectedSize = roundUpDivision(
if (const auto expectedSize = roundUpDivisionSafe(
S::select(getRoi().getWidth(), getRoi().getHeight()),
S::select(getPitch().x, getPitch().y));
expectedSize != deltaF_count) {
Expand Down
4 changes: 2 additions & 2 deletions src/librawspeed/common/RawImage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ void RawImageData::subFrame(iRectangle2D crop) {
void RawImageData::createBadPixelMap() {
if (!isAllocated())
ThrowRDE("(internal) Bad pixel map cannot be allocated before image.");
mBadPixelMapPitch =
implicit_cast<uint32_t>(roundUp(roundUpDivision(uncropped_dim.x, 8), 16));
mBadPixelMapPitch = implicit_cast<uint32_t>(
roundUp(roundUpDivisionSafe(uncropped_dim.x, 8), 16));
assert(mBadPixelMap.empty());
mBadPixelMap.resize(static_cast<size_t>(mBadPixelMapPitch) * uncropped_dim.y,
uint8_t(0));
Expand Down
4 changes: 2 additions & 2 deletions src/librawspeed/decoders/ArwDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,13 +328,13 @@ void ArwDecoder::DecodeLJpeg(const TiffIFD* raw) {

assert(tilew > 0);
const auto tilesX =
implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.x, tilew));
implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.x, tilew));
if (!tilesX)
ThrowRDE("Zero tiles horizontally");

assert(tileh > 0);
const auto tilesY =
implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.y, tileh));
implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.y, tileh));
if (!tilesY)
ThrowRDE("Zero tiles vertically");

Expand Down
6 changes: 3 additions & 3 deletions src/librawspeed/decoders/DngDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,13 +308,13 @@ DngDecoder::getTilingDescription(const TiffIFD* raw) const {

assert(tilew > 0);
const auto tilesX =
implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.x, tilew));
implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.x, tilew));
if (!tilesX)
ThrowRDE("Zero tiles horizontally");

assert(tileh > 0);
const auto tilesY =
implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.y, tileh));
implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.y, tileh));
if (!tilesY)
ThrowRDE("Zero tiles vertically");

Expand Down Expand Up @@ -350,7 +350,7 @@ DngDecoder::getTilingDescription(const TiffIFD* raw) const {
: mRaw->dim.y;

if (yPerSlice == 0 ||
roundUpDivision(mRaw->dim.y, yPerSlice) != counts->count) {
roundUpDivisionSafe(mRaw->dim.y, yPerSlice) != counts->count) {
ThrowRDE("Invalid y per slice %u or strip count %u (height = %u)",
yPerSlice, counts->count, mRaw->dim.y);
}
Expand Down
4 changes: 2 additions & 2 deletions src/librawspeed/decoders/IiqDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,8 @@ void IiqDecoder::PhaseOneFlatField(ByteStream data, IiqCorr corr) const {
if (head[2] == 0 || head[3] == 0 || head[4] == 0 || head[5] == 0)
return;

auto wide = implicit_cast<int>(roundUpDivision(head[2], head[4]));
auto high = implicit_cast<int>(roundUpDivision(head[3], head[5]));
auto wide = implicit_cast<int>(roundUpDivisionSafe(head[2], head[4]));
auto high = implicit_cast<int>(roundUpDivisionSafe(head[3], head[5]));

std::vector<float> mrow_storage;
Array2DRef<float> mrow = Array2DRef<float>::create(
Expand Down
4 changes: 2 additions & 2 deletions src/librawspeed/decoders/NefDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ bool NefDecoder::NEFIsUncompressed(const TiffIFD* raw) {
// We can't just accept this. Some *compressed* NEF's also pass this check :(
// Thus, let's accept *some* *small* padding.
const auto requiredInputBits = bitPerPixel * requiredPixels;
const auto requiredInputBytes = roundUpDivision(requiredInputBits, 8);
const auto requiredInputBytes = roundUpDivisionSafe(requiredInputBits, 8);
// While we might have more *pixels* than needed, it does not nessesairly mean
// that we have more input *bytes*. We might be off by a few pixels, and with
// small image dimensions and bpp, we might still be in the same byte.
Expand Down Expand Up @@ -229,7 +229,7 @@ void NefDecoder::DecodeUncompressed() const {
}

if (yPerSlice == 0 || yPerSlice > static_cast<uint32_t>(mRaw->dim.y) ||
roundUpDivision(mRaw->dim.y, yPerSlice) != counts->count) {
roundUpDivisionSafe(mRaw->dim.y, yPerSlice) != counts->count) {
ThrowRDE("Invalid y per slice %u or strip count %u (height = %u)",
yPerSlice, counts->count, mRaw->dim.y);
}
Expand Down
2 changes: 1 addition & 1 deletion src/librawspeed/decoders/OrfDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ void OrfDecoder::decodeUncompressedInterleaved(ByteStream s, uint32_t w,

int inputPitchBytes = inputPitchBits / 8;

const auto numEvenLines = implicit_cast<int>(roundUpDivision(h, 2));
const auto numEvenLines = implicit_cast<int>(roundUpDivisionSafe(h, 2));
const auto evenLinesInput = s.getStream(numEvenLines, inputPitchBytes)
.peekRemainingBuffer()
.getAsArray1DRef();
Expand Down
2 changes: 1 addition & 1 deletion src/librawspeed/decoders/RawDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ void RawDecoder::decodeUncompressed(const TiffIFD* rawIFD,
}

if (yPerSlice == 0 || yPerSlice > static_cast<uint32_t>(mRaw->dim.y) ||
roundUpDivision(mRaw->dim.y, yPerSlice) != counts->count) {
roundUpDivisionSafe(mRaw->dim.y, yPerSlice) != counts->count) {
ThrowRDE("Invalid y per slice %u or strip count %u (height = %u)",
yPerSlice, counts->count, mRaw->dim.y);
}
Expand Down
4 changes: 2 additions & 2 deletions src/librawspeed/decompressors/AbstractDngDecompressor.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ struct DngTilingDescription final {

DngTilingDescription(const iPoint2D& dim_, uint32_t tileW_, uint32_t tileH_)
: dim(dim_), tileW(tileW_), tileH(tileH_),
tilesX(implicit_cast<uint32_t>(roundUpDivision(dim.x, tileW))),
tilesY(implicit_cast<uint32_t>(roundUpDivision(dim.y, tileH))),
tilesX(implicit_cast<uint32_t>(roundUpDivisionSafe(dim.x, tileW))),
tilesY(implicit_cast<uint32_t>(roundUpDivisionSafe(dim.y, tileH))),
numTiles(tilesX * tilesY) {
invariant(dim.area() > 0);
invariant(tileW > 0);
Expand Down
2 changes: 1 addition & 1 deletion src/librawspeed/decompressors/FujiDecompressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,7 @@ FujiDecompressor::FujiHeader::operator bool() const {
raw_rounded_width % block_size ||
raw_rounded_width - raw_width >= block_size || blocks_in_row > 0x10 ||
blocks_in_row == 0 || blocks_in_row != raw_rounded_width / block_size ||
blocks_in_row != roundUpDivision(raw_width, block_size) ||
blocks_in_row != roundUpDivisionSafe(raw_width, block_size) ||
total_lines > 0x800 || total_lines == 0 ||
total_lines != raw_height / FujiStrip::lineHeight() ||
(raw_bits != 12 && raw_bits != 14 && raw_bits != 16) ||
Expand Down
6 changes: 3 additions & 3 deletions src/librawspeed/decompressors/LJpegDecompressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ LJpegDecompressor::LJpegDecompressor(RawImage img, iRectangle2D imgFrame_,
static_cast<int>(mRaw->getCpp()) * imgFrame.dim.x;

// How many full pixel MCUs do we need to consume for that?
if (const auto mcusToConsume =
implicit_cast<int>(roundUpDivision(tileRequiredWidth, frame.mcu.x));
if (const auto mcusToConsume = implicit_cast<int>(
roundUpDivisionSafe(tileRequiredWidth, frame.mcu.x));
frame.dim.x < mcusToConsume ||
frame.mcu.y * frame.dim.y < imgFrame.dim.y ||
frame.mcu.x * frame.dim.x < tileRequiredWidth) {
Expand Down Expand Up @@ -274,7 +274,7 @@ ByteStream::size_type LJpegDecompressor::decodeN() const {
// the raw image buffer. The excessive content has to be ignored.

invariant(imgFrame.dim.y % frame.mcu.y == 0);
const auto numRestartIntervals = implicit_cast<int>(roundUpDivision(
const auto numRestartIntervals = implicit_cast<int>(roundUpDivisionSafe(
imgFrame.dim.y / frame.mcu.y, numLJpegRowsPerRestartInterval));
invariant(numRestartIntervals >= 0);
invariant(numRestartIntervals != 0);
Expand Down
3 changes: 2 additions & 1 deletion src/librawspeed/decompressors/PanasonicV4Decompressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ void PanasonicV4Decompressor::chopInputIntoBlocks() {
};

// If section_split_offset == 0, last block may not be full.
const auto blocksTotal = roundUpDivision(input.getRemainSize(), BlockSize);
const auto blocksTotal =
roundUpDivisionSafe(input.getRemainSize(), BlockSize);
invariant(blocksTotal > 0);
invariant(blocksTotal * PixelsPerBlock >= mRaw->dim.area());
assert(blocksTotal <= std::numeric_limits<uint32_t>::max());
Expand Down
2 changes: 1 addition & 1 deletion src/librawspeed/decompressors/PanasonicV5Decompressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ PanasonicV5Decompressor::PanasonicV5Decompressor(RawImage img,
invariant(numPackets > 0);

// And how many blocks that would be? Last block may not be full, pad it.
numBlocks = roundUpDivision(numPackets, PacketsPerBlock);
numBlocks = roundUpDivisionSafe(numPackets, PacketsPerBlock);
invariant(numBlocks > 0);

// Does the input contain enough blocks?
Expand Down
14 changes: 7 additions & 7 deletions src/librawspeed/decompressors/VC5Decompressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ VC5Decompressor::BandData VC5Decompressor::Wavelet::reconstructPass(
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
#ifdef HAVE_OPENMP
#pragma omp taskloop default(none) firstprivate(dst, process) \
num_tasks(roundUpDivision(rawspeed_get_number_of_processor_cores(), \
numChannels))
num_tasks(roundUpDivisionSafe(rawspeed_get_number_of_processor_cores(), \
numChannels))
#endif
for (int row = 0; row < dst.height() / 2; ++row) {
#pragma GCC diagnostic pop
Expand Down Expand Up @@ -267,9 +267,8 @@ VC5Decompressor::BandData VC5Decompressor::Wavelet::combineLowHighPass(
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
#ifdef HAVE_OPENMP
#pragma omp taskloop if (finalWavelet) default(none) \
firstprivate(dst, process) \
num_tasks(roundUpDivision(rawspeed_get_number_of_processor_cores(), 2)) \
mergeable
firstprivate(dst, process) num_tasks(roundUpDivisionSafe( \
rawspeed_get_number_of_processor_cores(), 2)) mergeable
#endif
for (int row = 0; row < dst.height(); ++row) {
#pragma GCC diagnostic pop
Expand Down Expand Up @@ -411,7 +410,8 @@ VC5Decompressor::VC5Decompressor(ByteStream bs, const RawImage& img)
for (Wavelet& wavelet : channel.wavelets) {
// Pad dimensions as necessary and divide them by two for the next wavelet
for (auto* dimension : {&waveletWidth, &waveletHeight})
*dimension = implicit_cast<uint16_t>(roundUpDivision(*dimension, 2));
*dimension =
implicit_cast<uint16_t>(roundUpDivisionSafe(*dimension, 2));
wavelet.width = waveletWidth;
wavelet.height = waveletHeight;

Expand Down Expand Up @@ -658,7 +658,7 @@ VC5Decompressor::Wavelet::LowPassBand::LowPassBand(Wavelet& wavelet_,
const auto bitsTotal = waveletArea * lowpassPrecision;
constexpr int bytesPerChunk = 8; // FIXME: or is it 4?
constexpr int bitsPerChunk = 8 * bytesPerChunk;
const auto chunksTotal = roundUpDivision(bitsTotal, bitsPerChunk);
const auto chunksTotal = roundUpDivisionSafe(bitsTotal, bitsPerChunk);
const auto bytesTotal = bytesPerChunk * chunksTotal;
// And clamp the size / verify sufficient input while we are at it.
// NOTE: this might fail (and should throw, not assert).
Expand Down
2 changes: 1 addition & 1 deletion test/librawspeed/adt/CoalescingOutputIteratorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ template <typename CoalescedType, typename PartType>
auto coalesceElts(Array1DRef<const PartType> input) {
std::vector<CoalescedType> outputStorage;
{
outputStorage.reserve(implicit_cast<size_t>(roundUpDivision(
outputStorage.reserve(implicit_cast<size_t>(roundUpDivisionSafe(
sizeof(PartType) * input.size(), sizeof(CoalescedType))));
auto subIter = std::back_inserter(outputStorage);
auto iter = CoalescingOutputIterator<decltype(subIter), PartType>(subIter);
Expand Down
Loading

0 comments on commit 3bddb8d

Please sign in to comment.